Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: allow user to configure whether synonyms should or should not be included when determining exact matches #58

Merged
merged 2 commits into from
Mar 19, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .golangci.yaml
Original file line number Diff line number Diff line change
@@ -96,7 +96,7 @@ linters:
- rowserrcheck # checks whether Err of rows is checked successfully
- sqlclosecheck # checks that sql.Rows and sql.Stmt are closed
- sloglint # A Go linter that ensures consistent code style when using log/slog
- tenv # detects using os.Setenv instead of t.Setenv since Go1.17
- usetesting # detects using os.Setenv instead of t.Setenv since Go1.17
- testableexamples # checks if examples are testable (have an expected output)
- tparallel # detects inappropriate usage of t.Parallel() method in your Go test codes
- unconvert # removes unnecessary type conversions
9 changes: 9 additions & 0 deletions cmd/main.go
Original file line number Diff line number Diff line change
@@ -52,6 +52,7 @@ const (
primarySuggestMultiplier = "primary-suggest-multiplier"
rankThreshold = "rank-threshold"
preRankLimitMultiplier = "pre-rank-limit-multiplier"
synonymsExactMatch = "synonyms-exact-match"
)

var (
@@ -238,6 +239,13 @@ func main() {
Required: false,
Value: 10,
},
&cli.BoolFlag{
Name: synonymsExactMatch,
EnvVars: []string{strcase.ToScreamingSnake(synonymsExactMatch)},
Usage: "When true synonyms are taken into account during exact match calculation",
Required: false,
Value: false,
},
},
Action: func(c *cli.Context) error {
log.Println(c.Command.Usage)
@@ -271,6 +279,7 @@ func main() {
c.Float64(primarySuggestMultiplier),
c.Int(rankThreshold),
c.Int(preRankLimitMultiplier),
c.Bool(synonymsExactMatch),
)
if err != nil {
return err
9 changes: 7 additions & 2 deletions internal/search/datasources/postgres/postgres.go
Original file line number Diff line number Diff line change
@@ -29,9 +29,13 @@ type Postgres struct {
primarySuggestMultiplier float64
rankThreshold int
preRankLimitMultiplier int
synonymsExactMatch bool
}

func NewPostgres(dbConn string, queryTimeout time.Duration, searchIndex string, searchIndexSrid d.SRID, rankNormalization int, exactMatchMultiplier float64, primarySuggestMultiplier float64, rankThreshold int, preRankLimitMultiplier int) (*Postgres, error) {
func NewPostgres(dbConn string, queryTimeout time.Duration, searchIndex string, searchIndexSrid d.SRID,
rankNormalization int, exactMatchMultiplier float64, primarySuggestMultiplier float64, rankThreshold int,
preRankLimitMultiplier int, synonymsExactMatch bool) (*Postgres, error) {

ctx := context.Background()
config, err := pgxpool.ParseConfig(dbConn)
if err != nil {
@@ -57,6 +61,7 @@ func NewPostgres(dbConn string, queryTimeout time.Duration, searchIndex string,
primarySuggestMultiplier,
rankThreshold,
preRankLimitMultiplier,
synonymsExactMatch,
}, nil
}

@@ -76,7 +81,7 @@ func (p *Postgres) SearchFeaturesAcrossCollections(ctx context.Context, searchQu
}
sql := makeSQL(p.searchIndex, srid, bboxFilter)
wildcardQuery := searchQuery.ToWildcardQuery()
exactMatchQuery := searchQuery.ToExactMatchQuery()
exactMatchQuery := searchQuery.ToExactMatchQuery(p.synonymsExactMatch)
names, versions, relevance := collections.NamesAndVersionsAndRelevance()
log.Printf("\nSEARCH QUERY (wildcard): %s\n", wildcardQuery)

32 changes: 16 additions & 16 deletions internal/search/domain/search.go
Original file line number Diff line number Diff line change
@@ -10,7 +10,6 @@ const (
VersionParam = "version"
RelevanceParam = "relevance"
DefaultRelevance = 0.5
Wildcard = ":*"
)

// GeoJSON properties in search response
@@ -36,36 +35,37 @@ func NewSearchQuery(words []string, withoutSynonyms map[string]struct{}, withSyn
}

func (q *SearchQuery) ToWildcardQuery() string {
return q.toString(true)
return q.toString(true, true)
}

func (q *SearchQuery) ToExactMatchQuery() string {
return q.toString(false)
func (q *SearchQuery) ToExactMatchQuery(useSynonyms bool) string {
return q.toString(false, useSynonyms)
}

func (q *SearchQuery) toString(wildcard bool) string {
func (q *SearchQuery) toString(useWildcard bool, useSynonyms bool) string {
wildcard := ""
if useWildcard {
wildcard = ":*"
}

sb := &strings.Builder{}
for i, word := range q.words {
if i > 0 {
sb.WriteString(" & ")
}
if _, ok := q.withoutSynonyms[word]; ok {
sb.WriteString(word)
if wildcard {
sb.WriteString(Wildcard)
}
sb.WriteString(wildcard)
} else if synonyms, ok := q.withSynonyms[word]; ok {
slices.Sort(synonyms)
sb.WriteByte('(')
sb.WriteString(word)
if wildcard {
sb.WriteString(Wildcard)
}
for _, synonym := range synonyms {
sb.WriteString(" | ")
sb.WriteString(synonym)
if wildcard {
sb.WriteString(Wildcard)
sb.WriteString(wildcard)
if useSynonyms {
for _, synonym := range synonyms {
sb.WriteString(" | ")
sb.WriteString(synonym)
sb.WriteString(wildcard)
}
}
sb.WriteByte(')')
12 changes: 10 additions & 2 deletions internal/search/main.go
Original file line number Diff line number Diff line change
@@ -26,7 +26,10 @@ type Search struct {
json *jsonFeatures
}

func NewSearch(e *engine.Engine, dbConn string, searchIndex string, searchIndexSrid int, rewritesFile string, synonymsFile string, rankNormalization int, exactMatchMultiplier float64, primarySuggestMultiplier float64, rankThreshold int, preRankLimitMultiplier int) (*Search, error) {
func NewSearch(e *engine.Engine, dbConn string, searchIndex string, searchIndexSrid int, rewritesFile string,
synonymsFile string, rankNormalization int, exactMatchMultiplier float64, primarySuggestMultiplier float64,
rankThreshold int, preRankLimitMultiplier int, synonymsExactMatch bool) (*Search, error) {

queryExpansion, err := NewQueryExpansion(rewritesFile, synonymsFile)
if err != nil {
return nil, err
@@ -43,6 +46,7 @@ func NewSearch(e *engine.Engine, dbConn string, searchIndex string, searchIndexS
primarySuggestMultiplier,
rankThreshold,
preRankLimitMultiplier,
synonymsExactMatch,
),
json: newJSONFeatures(e),
queryExpansion: queryExpansion,
@@ -137,7 +141,10 @@ func (s *Search) enrichFeaturesWithHref(fc *domain.FeatureCollection, outputCRS
return nil
}

func newDatasource(e *engine.Engine, dbConn string, searchIndex string, searchIndexSrid int, rankNormalization int, exactMatchMultiplier float64, primarySuggestMultiplier float64, rankThreshold int, preRankLimitMultiplier int) ds.Datasource {
func newDatasource(e *engine.Engine, dbConn string, searchIndex string, searchIndexSrid int, rankNormalization int,
exactMatchMultiplier float64, primarySuggestMultiplier float64, rankThreshold int,
preRankLimitMultiplier int, synonymsExactMatch bool) ds.Datasource {

datasource, err := postgres.NewPostgres(
dbConn,
timeout,
@@ -148,6 +155,7 @@ func newDatasource(e *engine.Engine, dbConn string, searchIndex string, searchIn
primarySuggestMultiplier,
rankThreshold,
preRankLimitMultiplier,
synonymsExactMatch,
)
if err != nil {
log.Fatalf("failed to create datasource: %v", err)
14 changes: 3 additions & 11 deletions internal/search/main_test.go
Original file line number Diff line number Diff line change
@@ -59,19 +59,11 @@ func TestSearch(t *testing.T) {
assert.NoError(t, err)

// given search endpoint
searchEndpoint, err := NewSearch(
eng,
dbConn,
testSearchIndex,
domain.WGS84SRIDPostgis,
searchEndpoint, err := NewSearch(eng, dbConn, testSearchIndex, domain.WGS84SRIDPostgis,
"internal/search/testdata/rewrites.csv",
"internal/search/testdata/synonyms.csv",
1,
3.0,
1.01,
4000,
10,
)
1, 3.0, 1.01,
4000, 10, false)
assert.NoError(t, err)

// given empty search index
2 changes: 1 addition & 1 deletion internal/search/query_expansion_fuzz_test.go
Original file line number Diff line number Diff line change
@@ -34,7 +34,7 @@ func FuzzExpand(f *testing.F) {
f.Fuzz(func(t *testing.T, input string) {
expanded, err := queryExpansion.Expand(context.Background(), input)
assert.NoError(t, err)
query := expanded.ToExactMatchQuery()
query := expanded.ToExactMatchQuery(true)

assert.Truef(t, utf8.ValidString(query), "valid string")
if strings.TrimSpace(input) != "" {
34 changes: 30 additions & 4 deletions internal/search/query_expansion_test.go
Original file line number Diff line number Diff line change
@@ -24,7 +24,8 @@ func init() {
func TestExpand(t *testing.T) {
type args struct {
searchQuery string
wildcard bool
useWildcard bool
useSynonyms bool
}
tests := []struct {
name string
@@ -35,56 +36,63 @@ func TestExpand(t *testing.T) {
name: "rewrite",
args: args{
searchQuery: `markt den bosch`,
useSynonyms: true,
},
want: `markt & hertogenbosch`,
},
{
name: "rewrite followed by synonym",
args: args{
searchQuery: `Spui 1 den Haag`,
useSynonyms: true,
},
want: `spui & 1 & (gravenhage | den <-> haag | s-gravenhage)`,
},
{
name: "no synonym",
args: args{
searchQuery: `just some text`,
useSynonyms: true,
},
want: `just & some & text`,
},
{
name: "wildcard",
args: args{
searchQuery: `just some text`,
wildcard: true,
useWildcard: true,
},
want: `just:* & some:* & text:*`,
},
{
name: "one synonym",
args: args{
searchQuery: `Foo`,
useSynonyms: true,
},
want: `(foo | foobar | foos)`,
},
{
name: "two the same synonyms",
args: args{
searchQuery: `Foo FooBar`,
useSynonyms: true,
},
want: `(foo | foobar | foos) & (foobar | foo | foos)`,
},
{
name: "two-way synonym",
args: args{
searchQuery: `eerste 2de`,
useSynonyms: true,
},
want: `(eerste | 1ste) & (2de | tweede)`,
},
{
name: "nesting",
args: args{
searchQuery: `oudwesterlijke-goeverneur`,
useSynonyms: true,
},
want: `
(oudwesterlijke-goeverneur | oudewestelijkelijke-goev | oudewestelijkelijke-goeverneur | oudewestelijkelijke-gouv |
@@ -99,41 +107,55 @@ oudwestlijke-goeverneur | oudwestlijke-gouv | oudwestlijke-gouverneur)
name: "overlapping synonyms",
args: args{
searchQuery: `foosball`,
useSynonyms: true,
},
want: `(foosball | fooball | foobarball)`,
},
{
name: "synonym with diacritics",
args: args{
searchQuery: `oude fryslân`,
useSynonyms: true,
},
want: `(oude | oud) & (fryslân | friesland)`,
},
{
name: "no synonyms for exact matches",
args: args{
searchQuery: `oude fryslân abc`,
useSynonyms: false,
},
want: `(oude) & (fryslân) & abc`,
},
{
name: "case insensitive",
args: args{
searchQuery: `OudE DeN HaAg`,
useSynonyms: true,
},
want: `(oude | oud) & (gravenhage | den <-> haag | s-gravenhage)`,
},
{
name: "word delimiters",
args: args{
searchQuery: `ok text with spaces ok`,
useSynonyms: true,
},
want: `ok & text & with & spaces`,
},
{
name: "long",
args: args{
searchQuery: `prof dr ir van der 1e noordsteeg`,
useSynonyms: true,
},
want: `prof & dr & ir & van & der & 1e & noordsteeg`,
},
{
name: "one substring",
args: args{
searchQuery: `Piet Gouverneurstraat 1800`,
useSynonyms: true,
},
want: `
piet & (gouverneurstraat | goeverneurstraat | goevstraat | gouvstraat) & 1800
@@ -143,6 +165,7 @@ piet & (gouverneurstraat | goeverneurstraat | goevstraat | gouvstraat) & 1800
name: "two substrings",
args: args{
searchQuery: `Oude Piet Gouverneurstraat 1800`,
useSynonyms: true,
},
want: `
(oude | oud) & piet & (gouverneurstraat | goeverneurstraat | goevstraat | gouvstraat) & 1800
@@ -152,6 +175,7 @@ piet & (gouverneurstraat | goeverneurstraat | goevstraat | gouvstraat) & 1800
name: "three substrings",
args: args{
searchQuery: `Oude Piet Westgouverneurstraat 1800`,
useSynonyms: true,
},
want: `
(oude | oud) & piet &
@@ -164,6 +188,7 @@ westgoeverneurstraat | westgoevstraat | westgouvstraat) & 1800
name: "one rewrite and multiple synonyms",
args: args{
searchQuery: `goev straat 1 in Den Haag niet in Friesland`,
useSynonyms: true,
},
want: `
(goev | goeverneur | gouv | gouverneur) & straat & 1 & in & (gravenhage | den <-> haag | s-gravenhage) & niet & (friesland | fryslân)
@@ -173,6 +198,7 @@ westgoeverneurstraat | westgoevstraat | westgouvstraat) & 1800
name: "five synonyms",
args: args{
searchQuery: `Oud Gouv 2DE 's-Gravenhage Fryslân Nederland`,
useSynonyms: true,
},
want: `
(oud | oude) & (gouv | goev | goeverneur | gouverneur) & (2de | tweede) & (gravenhage | den <-> haag | s-gravenhage) & (fryslân | friesland) & nederland
@@ -186,10 +212,10 @@ westgoeverneurstraat | westgoevstraat | westgouvstraat) & 1800
actual, err := queryExpansion.Expand(context.Background(), tt.args.searchQuery)
assert.NoError(t, err)
var query string
if tt.args.wildcard {
if tt.args.useWildcard {
query = actual.ToWildcardQuery()
} else {
query = actual.ToExactMatchQuery()
query = actual.ToExactMatchQuery(tt.args.useSynonyms)
}
assert.Equal(t, strings.ReplaceAll(tt.want, "\n", ""), query, tt.args.searchQuery)
})
4 changes: 2 additions & 2 deletions internal/search/testdata/expected-synonym-with-space.json
Original file line number Diff line number Diff line change
@@ -19,7 +19,7 @@
"displayName": "Spui 70 2511 BT s-Gravenhage",
"highlight": "<b>Spui</b> 70 2511 BT <b>s</b>-<b>Gravenhage</b>",
"href": "https://example.com/ogc/v1/collections/addresses/items/154?f=json",
"score": 0.038760408759117126
"score": 0.029046258330345156
},
"geometry": {
"type": "Point",
@@ -48,7 +48,7 @@
"displayName": "Spui 180 2511 BW 's-Gravenhage",
"highlight": "<b>Spui</b> 180 2511 BW '<b>s</b>-<b>Gravenhage</b>",
"href": "https://example.com/ogc/v1/collections/addresses/items/155?f=json",
"score": 0.038760408759117126
"score": 0.029046258330345156
},
"geometry": {
"type": "Point",
Loading