numerique-gouv · sdemagny · Oct 22, 2024 · Oct 29, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,10 @@ and this project adheres to
 
 ## [Unreleased]
 
+### Changed
+
+- 🔥(teams) remove search users by trigram
+
 ### Fixed
 
 - 💚(ci) improve E2E tests #492

diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py
@@ -182,9 +182,7 @@ class UserViewSet(
     User viewset for all interactions with user infos and teams.
 
     GET /api/users/&q=query
-        Return a list of users whose email matches the query. Similarity is
-        calculated using trigram similarity, allowing for partial,
-        case-insensitive matches and accented queries.
+        Return a list of users whose email or name matches the query.
     """
 
     permission_classes = [permissions.IsSelf]
@@ -207,22 +205,11 @@ def get_queryset(self):
             if team_id := self.request.GET.get("team_id", ""):
                 queryset = queryset.exclude(teams__id=team_id)
 
-            # Search by case-insensitive and accent-insensitive trigram similarity
+            # Search by case-insensitive and accent-insensitive
             if query := self.request.GET.get("q", ""):
-                similarity = Max(
-                    TrigramSimilarity(
-                        Coalesce(Func("email", function="unaccent"), Value("")),
-                        Func(Value(query), function="unaccent"),
-                    )
-                    + TrigramSimilarity(
-                        Coalesce(Func("name", function="unaccent"), Value("")),
-                        Func(Value(query), function="unaccent"),
-                    )
-                )
-                queryset = (
-                    queryset.annotate(similarity=similarity)
-                    .filter(similarity__gte=SIMILARITY_THRESHOLD)
-                    .order_by("-similarity")
+                queryset = queryset.filter(
+                    Q(name__unaccent__icontains=query)
+                    | Q(email__unaccent__icontains=query)
                 )
 
         return queryset

diff --git a/src/backend/core/tests/test_api_users.py b/src/backend/core/tests/test_api_users.py
@@ -78,28 +78,16 @@ def test_api_users_authenticated_list_by_email():
     user_ids = [user["id"] for user in response.json()["results"]]
     assert user_ids[0] == str(frank.id)
 
-    # Result that matches a trigram twice ranks better than result that matches once
     response = client.get("/api/v1.0/users/?q=ole")
 
     assert response.status_code == HTTP_200_OK
     user_ids = [user["id"] for user in response.json()["results"]]
-    # "Nicole Foole" matches twice on "ole"
-    assert user_ids == [str(nicole.id), str(frank.id)]
+    assert user_ids == [str(frank.id), str(nicole.id)]
 
-    # Even with a low similarity threshold, query should match expected emails
     response = client.get("/api/v1.0/users/?q=ool")
 
     assert response.status_code == HTTP_200_OK
     assert response.json()["results"] == [
-        {
-            "id": str(nicole.id),
-            "email": nicole.email,
-            "name": nicole.name,
-            "is_device": nicole.is_device,
-            "is_staff": nicole.is_staff,
-            "language": nicole.language,
-            "timezone": str(nicole.timezone),
-        },
         {
             "id": str(frank.id),
             "email": frank.email,
@@ -109,6 +97,15 @@ def test_api_users_authenticated_list_by_email():
             "language": frank.language,
             "timezone": str(frank.timezone),
         },
+        {
+            "id": str(nicole.id),
+            "email": nicole.email,
+            "name": nicole.name,
+            "is_device": nicole.is_device,
+            "is_staff": nicole.is_staff,
+            "language": nicole.language,
+            "timezone": str(nicole.timezone),
+        },
     ]
 
 
@@ -118,17 +115,17 @@ def test_api_users_authenticated_list_by_name():
     partial query on the name.
     """
     user = factories.UserFactory(email="[email protected]", name="john doe")
-    dave = factories.UserFactory(name="dave bowman", email=None)
+    dave = factories.UserFactory(name="Dave bowman", email=None)
     nicole = factories.UserFactory(name="nicole foole", email=None)
-    frank = factories.UserFactory(name="frank poole", email=None)
+    frank = factories.UserFactory(name="frank poolé", email=None)
     factories.UserFactory(name="heywood floyd", email=None)
 
     client = APIClient()
     client.force_login(user)
 
     # Full query should work
     response = client.get(
-        "/api/v1.0/users/?q=[email protected]",
+        "/api/v1.0/users/?q=dave",
     )
 
     assert response.status_code == HTTP_200_OK
@@ -142,28 +139,16 @@ def test_api_users_authenticated_list_by_name():
     user_ids = [user["id"] for user in response.json()["results"]]
     assert user_ids[0] == str(frank.id)
 
-    # Result that matches a trigram twice ranks better than result that matches once
     response = client.get("/api/v1.0/users/?q=ole")
 
     assert response.status_code == HTTP_200_OK
     user_ids = [user["id"] for user in response.json()["results"]]
-    # "Nicole Foole" matches twice on "ole"
-    assert user_ids == [str(nicole.id), str(frank.id)]
+    assert user_ids == [str(frank.id), str(nicole.id)]
 
-    # Even with a low similarity threshold, query should match expected user
-    response = client.get("/api/v1.0/users/?q=ool")
+    response = client.get("/api/v1.0/users/?q=oole")
 
     assert response.status_code == HTTP_200_OK
     assert response.json()["results"] == [
-        {
-            "id": str(nicole.id),
-            "email": nicole.email,
-            "name": nicole.name,
-            "is_device": nicole.is_device,
-            "is_staff": nicole.is_staff,
-            "language": nicole.language,
-            "timezone": str(nicole.timezone),
-        },
         {
             "id": str(frank.id),
             "email": frank.email,
@@ -173,6 +158,15 @@ def test_api_users_authenticated_list_by_name():
             "language": frank.language,
             "timezone": str(frank.timezone),
         },
+        {
+            "id": str(nicole.id),
+            "email": nicole.email,
+            "name": nicole.name,
+            "is_device": nicole.is_device,
+            "is_staff": nicole.is_staff,
+            "language": nicole.language,
+            "timezone": str(nicole.timezone),
+        },
     ]
 
 
@@ -184,22 +178,18 @@ def test_api_users_authenticated_list_by_name_and_email():
 
     user = factories.UserFactory(email="[email protected]", name="john doe")
     nicole = factories.UserFactory(email="[email protected]", name="nicole foole")
-    frank = factories.UserFactory(email="[email protected]", name=None)
+    oleg = factories.UserFactory(email="[email protected]", name=None)
     david = factories.UserFactory(email=None, name="david role")
 
     client = APIClient()
     client.force_login(user)
 
-    # Result that matches a trigram in name and email ranks better than result that matches once
     response = client.get("/api/v1.0/users/?q=ole")
 
     assert response.status_code == HTTP_200_OK
     user_ids = [user["id"] for user in response.json()["results"]]
 
-    # "Nicole Foole" matches twice on "ole" in her name and twice on "ole" in her email
-    # "Oleg poole" matches twice on "ole" in her email
-    # "David role" matches once on "ole" in his name
-    assert user_ids == [str(nicole.id), str(frank.id), str(david.id)]
+    assert user_ids == [str(david.id), str(oleg.id), str(nicole.id)]
 
 
 def test_api_users_authenticated_list_exclude_users_already_in_team(

diff --git a/src/backend/demo/management/commands/create_demo.py b/src/backend/demo/management/commands/create_demo.py
@@ -128,6 +128,21 @@ def create_demo(stdout):
                     language=random.choice(settings.LANGUAGES)[0],
                 )
             )
+        # this is a quick fix to fix e2e tests
+        # tests needs some no random data
+        queue.push(
+            models.User(
+                sub=uuid4(),
+                email="[email protected]",
+                name="Monique test",
+                password="!",
+                is_superuser=False,
+                is_active=True,
+                is_staff=False,
+                language=random.choice(settings.LANGUAGES)[0],
+            )
+        )
+
         queue.flush()
 
     with Timeit(stdout, "Creating teams"):

diff --git a/src/backend/demo/tests/test_commands_create_demo.py b/src/backend/demo/tests/test_commands_create_demo.py
@@ -28,7 +28,9 @@ def test_commands_create_demo():
     """The create_demo management command should create objects as expected."""
     call_command("create_demo")
 
-    assert models.User.objects.count() == TEST_NB_OBJECTS["users"]
+    assert (
+        models.User.objects.count() == TEST_NB_OBJECTS["users"] + 1
+    )  # Monique Test (quick fix for e2e)
     assert models.Team.objects.count() == TEST_NB_OBJECTS["teams"]
     assert models.TeamAccess.objects.count() >= TEST_NB_OBJECTS["teams"]
     assert mailbox_models.MailDomain.objects.count() == TEST_NB_OBJECTS["domains"]