Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: allow user to configure whether synonyms should or should not be included when determining exact matches #58

Merged
merged 2 commits into from
Mar 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .golangci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ linters:
- rowserrcheck # checks whether Err of rows is checked successfully
- sqlclosecheck # checks that sql.Rows and sql.Stmt are closed
- sloglint # A Go linter that ensures consistent code style when using log/slog
- tenv # detects using os.Setenv instead of t.Setenv since Go1.17
- usetesting # detects using os.Setenv instead of t.Setenv since Go1.17
- testableexamples # checks if examples are testable (have an expected output)
- tparallel # detects inappropriate usage of t.Parallel() method in your Go test codes
- unconvert # removes unnecessary type conversions
Expand Down
9 changes: 9 additions & 0 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ const (
primarySuggestMultiplier = "primary-suggest-multiplier"
rankThreshold = "rank-threshold"
preRankLimitMultiplier = "pre-rank-limit-multiplier"
synonymsExactMatch = "synonyms-exact-match"
)

var (
Expand Down Expand Up @@ -238,6 +239,13 @@ func main() {
Required: false,
Value: 10,
},
&cli.BoolFlag{
Name: synonymsExactMatch,
EnvVars: []string{strcase.ToScreamingSnake(synonymsExactMatch)},
Usage: "When true synonyms are taken into account during exact match calculation",
Required: false,
Value: false,
},
},
Action: func(c *cli.Context) error {
log.Println(c.Command.Usage)
Expand Down Expand Up @@ -271,6 +279,7 @@ func main() {
c.Float64(primarySuggestMultiplier),
c.Int(rankThreshold),
c.Int(preRankLimitMultiplier),
c.Bool(synonymsExactMatch),
)
if err != nil {
return err
Expand Down
9 changes: 7 additions & 2 deletions internal/search/datasources/postgres/postgres.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,13 @@ type Postgres struct {
primarySuggestMultiplier float64
rankThreshold int
preRankLimitMultiplier int
synonymsExactMatch bool
}

func NewPostgres(dbConn string, queryTimeout time.Duration, searchIndex string, searchIndexSrid d.SRID, rankNormalization int, exactMatchMultiplier float64, primarySuggestMultiplier float64, rankThreshold int, preRankLimitMultiplier int) (*Postgres, error) {
func NewPostgres(dbConn string, queryTimeout time.Duration, searchIndex string, searchIndexSrid d.SRID,
rankNormalization int, exactMatchMultiplier float64, primarySuggestMultiplier float64, rankThreshold int,
preRankLimitMultiplier int, synonymsExactMatch bool) (*Postgres, error) {

ctx := context.Background()
config, err := pgxpool.ParseConfig(dbConn)
if err != nil {
Expand All @@ -57,6 +61,7 @@ func NewPostgres(dbConn string, queryTimeout time.Duration, searchIndex string,
primarySuggestMultiplier,
rankThreshold,
preRankLimitMultiplier,
synonymsExactMatch,
}, nil
}

Expand All @@ -76,7 +81,7 @@ func (p *Postgres) SearchFeaturesAcrossCollections(ctx context.Context, searchQu
}
sql := makeSQL(p.searchIndex, srid, bboxFilter)
wildcardQuery := searchQuery.ToWildcardQuery()
exactMatchQuery := searchQuery.ToExactMatchQuery()
exactMatchQuery := searchQuery.ToExactMatchQuery(p.synonymsExactMatch)
names, versions, relevance := collections.NamesAndVersionsAndRelevance()
log.Printf("\nSEARCH QUERY (wildcard): %s\n", wildcardQuery)

Expand Down
32 changes: 16 additions & 16 deletions internal/search/domain/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ const (
VersionParam = "version"
RelevanceParam = "relevance"
DefaultRelevance = 0.5
Wildcard = ":*"
)

// GeoJSON properties in search response
Expand All @@ -36,36 +35,37 @@ func NewSearchQuery(words []string, withoutSynonyms map[string]struct{}, withSyn
}

func (q *SearchQuery) ToWildcardQuery() string {
return q.toString(true)
return q.toString(true, true)
}

func (q *SearchQuery) ToExactMatchQuery() string {
return q.toString(false)
func (q *SearchQuery) ToExactMatchQuery(useSynonyms bool) string {
return q.toString(false, useSynonyms)
}

func (q *SearchQuery) toString(wildcard bool) string {
func (q *SearchQuery) toString(useWildcard bool, useSynonyms bool) string {
wildcard := ""
if useWildcard {
wildcard = ":*"
}

sb := &strings.Builder{}
for i, word := range q.words {
if i > 0 {
sb.WriteString(" & ")
}
if _, ok := q.withoutSynonyms[word]; ok {
sb.WriteString(word)
if wildcard {
sb.WriteString(Wildcard)
}
sb.WriteString(wildcard)
} else if synonyms, ok := q.withSynonyms[word]; ok {
slices.Sort(synonyms)
sb.WriteByte('(')
sb.WriteString(word)
if wildcard {
sb.WriteString(Wildcard)
}
for _, synonym := range synonyms {
sb.WriteString(" | ")
sb.WriteString(synonym)
if wildcard {
sb.WriteString(Wildcard)
sb.WriteString(wildcard)
if useSynonyms {
for _, synonym := range synonyms {
sb.WriteString(" | ")
sb.WriteString(synonym)
sb.WriteString(wildcard)
}
}
sb.WriteByte(')')
Expand Down
12 changes: 10 additions & 2 deletions internal/search/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,10 @@ type Search struct {
json *jsonFeatures
}

func NewSearch(e *engine.Engine, dbConn string, searchIndex string, searchIndexSrid int, rewritesFile string, synonymsFile string, rankNormalization int, exactMatchMultiplier float64, primarySuggestMultiplier float64, rankThreshold int, preRankLimitMultiplier int) (*Search, error) {
func NewSearch(e *engine.Engine, dbConn string, searchIndex string, searchIndexSrid int, rewritesFile string,
synonymsFile string, rankNormalization int, exactMatchMultiplier float64, primarySuggestMultiplier float64,
rankThreshold int, preRankLimitMultiplier int, synonymsExactMatch bool) (*Search, error) {

queryExpansion, err := NewQueryExpansion(rewritesFile, synonymsFile)
if err != nil {
return nil, err
Expand All @@ -43,6 +46,7 @@ func NewSearch(e *engine.Engine, dbConn string, searchIndex string, searchIndexS
primarySuggestMultiplier,
rankThreshold,
preRankLimitMultiplier,
synonymsExactMatch,
),
json: newJSONFeatures(e),
queryExpansion: queryExpansion,
Expand Down Expand Up @@ -137,7 +141,10 @@ func (s *Search) enrichFeaturesWithHref(fc *domain.FeatureCollection, outputCRS
return nil
}

func newDatasource(e *engine.Engine, dbConn string, searchIndex string, searchIndexSrid int, rankNormalization int, exactMatchMultiplier float64, primarySuggestMultiplier float64, rankThreshold int, preRankLimitMultiplier int) ds.Datasource {
func newDatasource(e *engine.Engine, dbConn string, searchIndex string, searchIndexSrid int, rankNormalization int,
exactMatchMultiplier float64, primarySuggestMultiplier float64, rankThreshold int,
preRankLimitMultiplier int, synonymsExactMatch bool) ds.Datasource {

datasource, err := postgres.NewPostgres(
dbConn,
timeout,
Expand All @@ -148,6 +155,7 @@ func newDatasource(e *engine.Engine, dbConn string, searchIndex string, searchIn
primarySuggestMultiplier,
rankThreshold,
preRankLimitMultiplier,
synonymsExactMatch,
)
if err != nil {
log.Fatalf("failed to create datasource: %v", err)
Expand Down
14 changes: 3 additions & 11 deletions internal/search/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,19 +59,11 @@ func TestSearch(t *testing.T) {
assert.NoError(t, err)

// given search endpoint
searchEndpoint, err := NewSearch(
eng,
dbConn,
testSearchIndex,
domain.WGS84SRIDPostgis,
searchEndpoint, err := NewSearch(eng, dbConn, testSearchIndex, domain.WGS84SRIDPostgis,
"internal/search/testdata/rewrites.csv",
"internal/search/testdata/synonyms.csv",
1,
3.0,
1.01,
4000,
10,
)
1, 3.0, 1.01,
4000, 10, false)
assert.NoError(t, err)

// given empty search index
Expand Down
2 changes: 1 addition & 1 deletion internal/search/query_expansion_fuzz_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ func FuzzExpand(f *testing.F) {
f.Fuzz(func(t *testing.T, input string) {
expanded, err := queryExpansion.Expand(context.Background(), input)
assert.NoError(t, err)
query := expanded.ToExactMatchQuery()
query := expanded.ToExactMatchQuery(true)

assert.Truef(t, utf8.ValidString(query), "valid string")
if strings.TrimSpace(input) != "" {
Expand Down
34 changes: 30 additions & 4 deletions internal/search/query_expansion_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ func init() {
func TestExpand(t *testing.T) {
type args struct {
searchQuery string
wildcard bool
useWildcard bool
useSynonyms bool
}
tests := []struct {
name string
Expand All @@ -35,56 +36,63 @@ func TestExpand(t *testing.T) {
name: "rewrite",
args: args{
searchQuery: `markt den bosch`,
useSynonyms: true,
},
want: `markt & hertogenbosch`,
},
{
name: "rewrite followed by synonym",
args: args{
searchQuery: `Spui 1 den Haag`,
useSynonyms: true,
},
want: `spui & 1 & (gravenhage | den <-> haag | s-gravenhage)`,
},
{
name: "no synonym",
args: args{
searchQuery: `just some text`,
useSynonyms: true,
},
want: `just & some & text`,
},
{
name: "wildcard",
args: args{
searchQuery: `just some text`,
wildcard: true,
useWildcard: true,
},
want: `just:* & some:* & text:*`,
},
{
name: "one synonym",
args: args{
searchQuery: `Foo`,
useSynonyms: true,
},
want: `(foo | foobar | foos)`,
},
{
name: "two the same synonyms",
args: args{
searchQuery: `Foo FooBar`,
useSynonyms: true,
},
want: `(foo | foobar | foos) & (foobar | foo | foos)`,
},
{
name: "two-way synonym",
args: args{
searchQuery: `eerste 2de`,
useSynonyms: true,
},
want: `(eerste | 1ste) & (2de | tweede)`,
},
{
name: "nesting",
args: args{
searchQuery: `oudwesterlijke-goeverneur`,
useSynonyms: true,
},
want: `
(oudwesterlijke-goeverneur | oudewestelijkelijke-goev | oudewestelijkelijke-goeverneur | oudewestelijkelijke-gouv |
Expand All @@ -99,41 +107,55 @@ oudwestlijke-goeverneur | oudwestlijke-gouv | oudwestlijke-gouverneur)
name: "overlapping synonyms",
args: args{
searchQuery: `foosball`,
useSynonyms: true,
},
want: `(foosball | fooball | foobarball)`,
},
{
name: "synonym with diacritics",
args: args{
searchQuery: `oude fryslân`,
useSynonyms: true,
},
want: `(oude | oud) & (fryslân | friesland)`,
},
{
name: "no synonyms for exact matches",
args: args{
searchQuery: `oude fryslân abc`,
useSynonyms: false,
},
want: `(oude) & (fryslân) & abc`,
},
{
name: "case insensitive",
args: args{
searchQuery: `OudE DeN HaAg`,
useSynonyms: true,
},
want: `(oude | oud) & (gravenhage | den <-> haag | s-gravenhage)`,
},
{
name: "word delimiters",
args: args{
searchQuery: `ok text with spaces ok`,
useSynonyms: true,
},
want: `ok & text & with & spaces`,
},
{
name: "long",
args: args{
searchQuery: `prof dr ir van der 1e noordsteeg`,
useSynonyms: true,
},
want: `prof & dr & ir & van & der & 1e & noordsteeg`,
},
{
name: "one substring",
args: args{
searchQuery: `Piet Gouverneurstraat 1800`,
useSynonyms: true,
},
want: `
piet & (gouverneurstraat | goeverneurstraat | goevstraat | gouvstraat) & 1800
Expand All @@ -143,6 +165,7 @@ piet & (gouverneurstraat | goeverneurstraat | goevstraat | gouvstraat) & 1800
name: "two substrings",
args: args{
searchQuery: `Oude Piet Gouverneurstraat 1800`,
useSynonyms: true,
},
want: `
(oude | oud) & piet & (gouverneurstraat | goeverneurstraat | goevstraat | gouvstraat) & 1800
Expand All @@ -152,6 +175,7 @@ piet & (gouverneurstraat | goeverneurstraat | goevstraat | gouvstraat) & 1800
name: "three substrings",
args: args{
searchQuery: `Oude Piet Westgouverneurstraat 1800`,
useSynonyms: true,
},
want: `
(oude | oud) & piet &
Expand All @@ -164,6 +188,7 @@ westgoeverneurstraat | westgoevstraat | westgouvstraat) & 1800
name: "one rewrite and multiple synonyms",
args: args{
searchQuery: `goev straat 1 in Den Haag niet in Friesland`,
useSynonyms: true,
},
want: `
(goev | goeverneur | gouv | gouverneur) & straat & 1 & in & (gravenhage | den <-> haag | s-gravenhage) & niet & (friesland | fryslân)
Expand All @@ -173,6 +198,7 @@ westgoeverneurstraat | westgoevstraat | westgouvstraat) & 1800
name: "five synonyms",
args: args{
searchQuery: `Oud Gouv 2DE 's-Gravenhage Fryslân Nederland`,
useSynonyms: true,
},
want: `
(oud | oude) & (gouv | goev | goeverneur | gouverneur) & (2de | tweede) & (gravenhage | den <-> haag | s-gravenhage) & (fryslân | friesland) & nederland
Expand All @@ -186,10 +212,10 @@ westgoeverneurstraat | westgoevstraat | westgouvstraat) & 1800
actual, err := queryExpansion.Expand(context.Background(), tt.args.searchQuery)
assert.NoError(t, err)
var query string
if tt.args.wildcard {
if tt.args.useWildcard {
query = actual.ToWildcardQuery()
} else {
query = actual.ToExactMatchQuery()
query = actual.ToExactMatchQuery(tt.args.useSynonyms)
}
assert.Equal(t, strings.ReplaceAll(tt.want, "\n", ""), query, tt.args.searchQuery)
})
Expand Down
4 changes: 2 additions & 2 deletions internal/search/testdata/expected-synonym-with-space.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
"displayName": "Spui 70 2511 BT s-Gravenhage",
"highlight": "<b>Spui</b> 70 2511 BT <b>s</b>-<b>Gravenhage</b>",
"href": "https://example.com/ogc/v1/collections/addresses/items/154?f=json",
"score": 0.038760408759117126
"score": 0.029046258330345156
},
"geometry": {
"type": "Point",
Expand Down Expand Up @@ -48,7 +48,7 @@
"displayName": "Spui 180 2511 BW 's-Gravenhage",
"highlight": "<b>Spui</b> 180 2511 BW '<b>s</b>-<b>Gravenhage</b>",
"href": "https://example.com/ogc/v1/collections/addresses/items/155?f=json",
"score": 0.038760408759117126
"score": 0.029046258330345156
},
"geometry": {
"type": "Point",
Expand Down
Loading