Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sdem/remove trigram2 #499

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ and this project adheres to

## [Unreleased]

### Changed

- 🔥(teams) remove search users by trigram

### Fixed

- 💚(ci) improve E2E tests #492
Expand Down
23 changes: 5 additions & 18 deletions src/backend/core/api/viewsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,9 +182,7 @@ class UserViewSet(
User viewset for all interactions with user infos and teams.

GET /api/users/&q=query
Return a list of users whose email matches the query. Similarity is
calculated using trigram similarity, allowing for partial,
case-insensitive matches and accented queries.
Return a list of users whose email or name matches the query.
"""

permission_classes = [permissions.IsSelf]
Expand All @@ -207,22 +205,11 @@ def get_queryset(self):
if team_id := self.request.GET.get("team_id", ""):
queryset = queryset.exclude(teams__id=team_id)

# Search by case-insensitive and accent-insensitive trigram similarity
# Search by case-insensitive and accent-insensitive
if query := self.request.GET.get("q", ""):
similarity = Max(
TrigramSimilarity(
Coalesce(Func("email", function="unaccent"), Value("")),
Func(Value(query), function="unaccent"),
)
+ TrigramSimilarity(
Coalesce(Func("name", function="unaccent"), Value("")),
Func(Value(query), function="unaccent"),
)
)
queryset = (
queryset.annotate(similarity=similarity)
.filter(similarity__gte=SIMILARITY_THRESHOLD)
.order_by("-similarity")
queryset = queryset.filter(
Q(name__unaccent__icontains=query)
| Q(email__unaccent__icontains=query)
)

return queryset
Expand Down
62 changes: 26 additions & 36 deletions src/backend/core/tests/test_api_users.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,28 +78,16 @@ def test_api_users_authenticated_list_by_email():
user_ids = [user["id"] for user in response.json()["results"]]
assert user_ids[0] == str(frank.id)

# Result that matches a trigram twice ranks better than result that matches once
response = client.get("/api/v1.0/users/?q=ole")

assert response.status_code == HTTP_200_OK
user_ids = [user["id"] for user in response.json()["results"]]
# "Nicole Foole" matches twice on "ole"
assert user_ids == [str(nicole.id), str(frank.id)]
assert user_ids == [str(frank.id), str(nicole.id)]

# Even with a low similarity threshold, query should match expected emails
response = client.get("/api/v1.0/users/?q=ool")

assert response.status_code == HTTP_200_OK
assert response.json()["results"] == [
{
"id": str(nicole.id),
"email": nicole.email,
"name": nicole.name,
"is_device": nicole.is_device,
"is_staff": nicole.is_staff,
"language": nicole.language,
"timezone": str(nicole.timezone),
},
{
"id": str(frank.id),
"email": frank.email,
Expand All @@ -109,6 +97,15 @@ def test_api_users_authenticated_list_by_email():
"language": frank.language,
"timezone": str(frank.timezone),
},
{
"id": str(nicole.id),
"email": nicole.email,
"name": nicole.name,
"is_device": nicole.is_device,
"is_staff": nicole.is_staff,
"language": nicole.language,
"timezone": str(nicole.timezone),
},
]


Expand All @@ -118,17 +115,17 @@ def test_api_users_authenticated_list_by_name():
partial query on the name.
"""
user = factories.UserFactory(email="[email protected]", name="john doe")
dave = factories.UserFactory(name="dave bowman", email=None)
dave = factories.UserFactory(name="Dave bowman", email=None)
nicole = factories.UserFactory(name="nicole foole", email=None)
frank = factories.UserFactory(name="frank poole", email=None)
frank = factories.UserFactory(name="frank poolé", email=None)
factories.UserFactory(name="heywood floyd", email=None)

client = APIClient()
client.force_login(user)

# Full query should work
response = client.get(
"/api/v1.0/users/?q=[email protected]",
"/api/v1.0/users/?q=dave",
)

assert response.status_code == HTTP_200_OK
Expand All @@ -142,28 +139,16 @@ def test_api_users_authenticated_list_by_name():
user_ids = [user["id"] for user in response.json()["results"]]
assert user_ids[0] == str(frank.id)

# Result that matches a trigram twice ranks better than result that matches once
response = client.get("/api/v1.0/users/?q=ole")

assert response.status_code == HTTP_200_OK
user_ids = [user["id"] for user in response.json()["results"]]
# "Nicole Foole" matches twice on "ole"
assert user_ids == [str(nicole.id), str(frank.id)]
assert user_ids == [str(frank.id), str(nicole.id)]

# Even with a low similarity threshold, query should match expected user
response = client.get("/api/v1.0/users/?q=ool")
response = client.get("/api/v1.0/users/?q=oole")

assert response.status_code == HTTP_200_OK
assert response.json()["results"] == [
{
"id": str(nicole.id),
"email": nicole.email,
"name": nicole.name,
"is_device": nicole.is_device,
"is_staff": nicole.is_staff,
"language": nicole.language,
"timezone": str(nicole.timezone),
},
{
"id": str(frank.id),
"email": frank.email,
Expand All @@ -173,6 +158,15 @@ def test_api_users_authenticated_list_by_name():
"language": frank.language,
"timezone": str(frank.timezone),
},
{
"id": str(nicole.id),
"email": nicole.email,
"name": nicole.name,
"is_device": nicole.is_device,
"is_staff": nicole.is_staff,
"language": nicole.language,
"timezone": str(nicole.timezone),
},
]


Expand All @@ -184,22 +178,18 @@ def test_api_users_authenticated_list_by_name_and_email():

user = factories.UserFactory(email="[email protected]", name="john doe")
nicole = factories.UserFactory(email="[email protected]", name="nicole foole")
frank = factories.UserFactory(email="[email protected]", name=None)
oleg = factories.UserFactory(email="[email protected]", name=None)
david = factories.UserFactory(email=None, name="david role")

client = APIClient()
client.force_login(user)

# Result that matches a trigram in name and email ranks better than result that matches once
response = client.get("/api/v1.0/users/?q=ole")

assert response.status_code == HTTP_200_OK
user_ids = [user["id"] for user in response.json()["results"]]

# "Nicole Foole" matches twice on "ole" in her name and twice on "ole" in her email
# "Oleg poole" matches twice on "ole" in her email
# "David role" matches once on "ole" in his name
assert user_ids == [str(nicole.id), str(frank.id), str(david.id)]
assert user_ids == [str(david.id), str(oleg.id), str(nicole.id)]


def test_api_users_authenticated_list_exclude_users_already_in_team(
Expand Down
15 changes: 15 additions & 0 deletions src/backend/demo/management/commands/create_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,21 @@ def create_demo(stdout):
language=random.choice(settings.LANGUAGES)[0],
)
)
# this is a quick fix to fix e2e tests
# tests needs some no random data
queue.push(
models.User(
sub=uuid4(),
email="[email protected]",
name="Monique test",
password="!",
is_superuser=False,
is_active=True,
is_staff=False,
language=random.choice(settings.LANGUAGES)[0],
)
)

queue.flush()

with Timeit(stdout, "Creating teams"):
Expand Down
4 changes: 3 additions & 1 deletion src/backend/demo/tests/test_commands_create_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@ def test_commands_create_demo():
"""The create_demo management command should create objects as expected."""
call_command("create_demo")

assert models.User.objects.count() == TEST_NB_OBJECTS["users"]
assert (
models.User.objects.count() == TEST_NB_OBJECTS["users"] + 1
) # Monique Test (quick fix for e2e)
assert models.Team.objects.count() == TEST_NB_OBJECTS["teams"]
assert models.TeamAccess.objects.count() >= TEST_NB_OBJECTS["teams"]
assert mailbox_models.MailDomain.objects.count() == TEST_NB_OBJECTS["domains"]
Expand Down
Loading