Skip to content

Commit

Permalink
do not run near queries on qualifier words
Browse files Browse the repository at this point in the history
There is too much potential for confusion (e.g. 'Rio Grande' read
as 'river near Grande') fir too little gain. Use near phrases
instead.
  • Loading branch information
lonvia committed Jan 7, 2024
1 parent f03ec3e commit 10a5424
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 7 deletions.
5 changes: 0 additions & 5 deletions nominatim/api/search/icu_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
Implementation of query analysis for the ICU tokenizer.
"""
from typing import Tuple, Dict, List, Optional, NamedTuple, Iterator, Any, cast
from copy import copy
from collections import defaultdict
import dataclasses
import difflib
Expand Down Expand Up @@ -188,10 +187,6 @@ async def analyze_query(self, phrases: List[qmod.Phrase]) -> qmod.QueryStruct:
query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
else:
query.add_token(trange, qmod.TokenType.QUALIFIER, token)
if trange.start == 0 or trange.end == query.num_token_slots():
token = copy(token)
token.penalty += 0.1 * (query.num_token_slots())
query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
else:
query.add_token(trange, DB_TO_TOKEN_TYPE[row.type], token)

Expand Down
4 changes: 2 additions & 2 deletions test/python/api/search/test_icu_query_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,9 +148,9 @@ async def test_qualifier_words(conn):
query = await ana.analyze_query(make_phrase('foo BAR foo BAR foo'))

assert query.num_token_slots() == 5
assert set(t.ttype for t in query.nodes[0].starting) == {TokenType.NEAR_ITEM, TokenType.QUALIFIER}
assert set(t.ttype for t in query.nodes[0].starting) == {TokenType.QUALIFIER}
assert set(t.ttype for t in query.nodes[2].starting) == {TokenType.QUALIFIER}
assert set(t.ttype for t in query.nodes[4].starting) == {TokenType.NEAR_ITEM, TokenType.QUALIFIER}
assert set(t.ttype for t in query.nodes[4].starting) == {TokenType.QUALIFIER}


@pytest.mark.asyncio
Expand Down

0 comments on commit 10a5424

Please sign in to comment.