chg ! apply overrided config for deduplication set

unicef · Nov 12, 2024 · 0465529 · 0465529
1 parent a4b2ee8
commit 0465529
Show file tree

Hide file tree

Showing 4 changed files with 84 additions and 42 deletions.
diff --git a/src/hope_dedup_engine/apps/api/deduplication/adapters.py b/src/hope_dedup_engine/apps/api/deduplication/adapters.py
@@ -1,6 +1,6 @@
 from collections.abc import Callable, Generator
-from typing import Any
 
+from hope_dedup_engine.apps.api.deduplication.config import ConfigDefaults
 from hope_dedup_engine.apps.api.deduplication.registry import DuplicateKeyPair
 from hope_dedup_engine.apps.api.models import DeduplicationSet
 from hope_dedup_engine.apps.faces.services.duplication_detector import (
@@ -24,12 +24,12 @@ def run(
                 "reference_pk", "filename"
             )
         }
-        ds_config: dict[str, Any] = (
-            self.deduplication_set.config and self.deduplication_set.config.settings
-        ) or {}
+        cfg = ConfigDefaults()
+        if self.deduplication_set.config:
+            cfg.apply_config_overrides(self.deduplication_set.config.settings)
         # ignored key pairs are not handled correctly in DuplicationDetector
         detector = DuplicationDetector(
-            tuple[str](filename_to_reference_pk.keys()), ds_config
+            tuple[str](filename_to_reference_pk.keys()), cfg=cfg
         )
         for first_filename, second_filename, distance in detector.find_duplicates(
             tracker

diff --git a/src/hope_dedup_engine/apps/api/deduplication/config.py b/src/hope_dedup_engine/apps/api/deduplication/config.py
@@ -0,0 +1,55 @@
+from dataclasses import dataclass, field
+from typing import Any, Literal
+
+from constance import config as constance_cfg
+
+
+@dataclass
+class DetectionConfig:
+    dnn_files_source: str = constance_cfg.DNN_FILES_SOURCE
+    dnn_backend: int = constance_cfg.DNN_BACKEND
+    dnn_target: int = constance_cfg.DNN_TARGET
+    blob_from_image_scale_factor: float = constance_cfg.BLOB_FROM_IMAGE_SCALE_FACTOR
+    blob_from_image_mean_values: tuple[float, float, float] = tuple(
+        map(float, constance_cfg.BLOB_FROM_IMAGE_MEAN_VALUES.split(", "))
+    )
+    confidence: float = constance_cfg.FACE_DETECTION_CONFIDENCE
+    nms_threshold: float = constance_cfg.NMS_THRESHOLD
+
+
+@dataclass
+class RecognitionConfig:
+    num_jitters: int = constance_cfg.FACE_ENCODINGS_NUM_JITTERS
+    model: Literal["small", "large"] = constance_cfg.FACE_ENCODINGS_MODEL
+    preprocessors: list[str] = field(default_factory=list)
+
+
+@dataclass
+class DuplicatesConfig:
+    tolerance: float = constance_cfg.FACE_DISTANCE_THRESHOLD
+
+
+@dataclass
+class ConfigDefaults:
+    detection: DetectionConfig = field(default_factory=DetectionConfig)
+    recognition: RecognitionConfig = field(default_factory=RecognitionConfig)
+    duplicates: DuplicatesConfig = field(default_factory=DuplicatesConfig)
+
+    def apply_config_overrides(
+        self, config_settings: dict[str, Any] | None = None
+    ) -> None:
+        """
+        Updates the instance with values from the provided config settings.
+
+        Parameters:
+            config_settings (dict | None): Optional dictionary of configuration overrides, structured to match
+                sections in ConfigDefaults (e.g., "detection", "recognition", "duplicates"). Only matching attributes
+                are updated. No changes are made if `config_settings` is `None` or empty.
+        """
+        if config_settings:
+            for section_name, section_data in config_settings.items():
+                dataclass_section = getattr(self, section_name, None)
+                if dataclass_section and isinstance(section_data, dict):
+                    for k, v in section_data.items():
+                        if hasattr(dataclass_section, k):
+                            setattr(dataclass_section, k, v)
diff --git a/src/hope_dedup_engine/apps/faces/services/duplication_detector.py b/src/hope_dedup_engine/apps/faces/services/duplication_detector.py
@@ -7,6 +7,7 @@
 import face_recognition
 import numpy as np
 
+from hope_dedup_engine.apps.api.deduplication.config import ConfigDefaults
 from hope_dedup_engine.apps.faces.managers import StorageManager
 from hope_dedup_engine.apps.faces.services.image_processor import ImageProcessor
 from hope_dedup_engine.apps.faces.validators import IgnorePairsValidator
@@ -22,23 +23,25 @@ class DuplicationDetector:
     def __init__(
         self,
         filenames: tuple[str],
-        ds_config: dict[str, Any] = None,
+        cfg: ConfigDefaults,
         ignore_pairs: tuple[tuple[str, str], ...] = (),
     ) -> None:
         """
         Initialize the DuplicationDetector with the given filenames and ignore pairs.
 
         Args:
             filenames (tuple[str]): The filenames of the images to process.
-            ds_config (dict[str, Any], optional): The configuration settings for the deduplication set.
+            cfg (ConfigDefaults): The configuration settings.
             ignore_pairs (tuple[tuple[str, str]], optional):
                 The pairs of filenames to ignore. Defaults to an empty tuple.
         """
         self.filenames = filenames
-        self.face_distance_threshold = ds_config.get("duplicates").get("tolerance")
+        self.face_distance_threshold = cfg.duplicates.tolerance
         self.ignore_set = IgnorePairsValidator.validate(ignore_pairs)
         self.storages = StorageManager()
-        self.image_processor = ImageProcessor(ds_config)
+        self.image_processor = ImageProcessor(
+            cfg_detection=cfg.detection, cfg_recognition=cfg.recognition
+        )
 
     def _encodings_filename(self, filename: str) -> str:
         """

diff --git a/src/hope_dedup_engine/apps/faces/services/image_processor.py b/src/hope_dedup_engine/apps/faces/services/image_processor.py
@@ -9,32 +9,22 @@
 import cv2
 import face_recognition
 import numpy as np
-from constance import config
 
+from hope_dedup_engine.apps.api.deduplication.config import (
+    DetectionConfig,
+    RecognitionConfig,
+)
 from hope_dedup_engine.apps.core.exceptions import NotCompliantImageError
 from hope_dedup_engine.apps.faces.managers import DNNInferenceManager, StorageManager
 
 
-@dataclass(frozen=True, slots=True)
-class FaceEncodingsConfig:
-    num_jitters: int
-    model: str
-
-
 @dataclass(frozen=True, slots=True)
 class BlobFromImageConfig:
     shape: dict[str, int] = field(init=False)
     scale_factor: float
     mean_values: tuple[float, float, float]
     prototxt_path: str
 
-    def __post_init__(self) -> None:
-        object.__setattr__(self, "shape", self._get_shape())
-        mean_values = self.mean_values
-        if isinstance(mean_values, str):
-            mean_values = tuple(map(float, mean_values.split(", ")))
-        object.__setattr__(self, "mean_values", mean_values)
-
     def _get_shape(self) -> dict[str, int]:
         pattern = r"input_shape\s*\{\s*dim:\s*(\d+)\s*dim:\s*(\d+)\s*dim:\s*(\d+)\s*dim:\s*(\d+)\s*\}"
         with open(self.prototxt_path, "r") as file:
@@ -59,30 +49,24 @@ class ImageProcessor:
 
     def __init__(
         self,
-        ds_config: dict[str, Any] = None,
+        cfg_detection: DetectionConfig,
+        cfg_recognition: RecognitionConfig,
     ) -> None:
         """
         Initialize the ImageProcessor with the required configurations.
         """
         self.storages = StorageManager()
         self.net = DNNInferenceManager(self.storages.get_storage("cv2")).get_model()
 
+        self.cfg_detection = cfg_detection
+        self.cfg_recognition = cfg_recognition
         self.blob_from_image_cfg = BlobFromImageConfig(
-            scale_factor=config.BLOB_FROM_IMAGE_SCALE_FACTOR,
-            mean_values=config.BLOB_FROM_IMAGE_MEAN_VALUES,
+            scale_factor=self.cfg_detection.blob_from_image_scale_factor,
+            mean_values=self.cfg_detection.blob_from_image_mean_values,
             prototxt_path=self.storages.get_storage("cv2").path(
                 settings.DNN_FILES.get("prototxt").get("filename")
             ),
         )
-        self.face_encodings_cfg = FaceEncodingsConfig(
-            num_jitters=ds_config.get("recognition").get("num_jitters"),
-            model=ds_config.get("recognition").get("model"),
-        )
-        self.face_detection_confidence: float = ds_config.get("detection").get(
-            "confidence"
-        )
-        self.distance_threshold: float = ds_config.get("duplicates").get("tolerance")
-        self.nms_threshold: float = config.NMS_THRESHOLD
 
     def _get_face_detections_dnn(
         self, filename: str
@@ -129,7 +113,7 @@ def _get_face_detections_dnn(
             for i in range(detections.shape[2]):
                 confidence = detections[0, 0, i, 2]
                 # Filter out weak detections by ensuring the confidence is greater than the minimum confidence
-                if confidence > self.face_detection_confidence:
+                if confidence > self.cfg_detection.confidence:
                     box = (detections[0, 0, i, 3:7] * np.array([w, h, w, h])).astype(
                         "int"
                     )
@@ -138,10 +122,10 @@ def _get_face_detections_dnn(
             if boxes:
                 # Apply non-maxima suppression to suppress weak, overlapping bounding boxes
                 indices = cv2.dnn.NMSBoxes(
-                    boxes,
-                    confidences,
-                    self.face_detection_confidence,
-                    self.nms_threshold,
+                    bboxes=boxes,
+                    scores=confidences,
+                    score_threshold=self.cfg_detection.confidence,
+                    nms_threshold=self.cfg_detection.nms_threshold,
                 )
                 if indices is not None:
                     for i in indices:
@@ -200,8 +184,8 @@ def encode_face(self, filename: str, encodings_filename: str) -> None:
                         face_encodings = face_recognition.face_encodings(
                             image,
                             [(right, bottom, left, top)],
-                            num_jitters=self.face_encodings_cfg.num_jitters,
-                            model=self.face_encodings_cfg.model,
+                            num_jitters=self.cfg_recognition.num_jitters,
+                            model=self.cfg_recognition.model,
                         )
                         encodings.extend(face_encodings)
                     else: