From 0465529a540636307429f349a255a7af1a92ee59 Mon Sep 17 00:00:00 2001 From: Vitali Yanushchyk Date: Tue, 12 Nov 2024 04:55:08 -0300 Subject: [PATCH] chg ! apply overrided config for deduplication set --- .../apps/api/deduplication/adapters.py | 10 ++-- .../apps/api/deduplication/config.py | 55 +++++++++++++++++++ .../faces/services/duplication_detector.py | 11 ++-- .../apps/faces/services/image_processor.py | 50 ++++++----------- 4 files changed, 84 insertions(+), 42 deletions(-) create mode 100644 src/hope_dedup_engine/apps/api/deduplication/config.py diff --git a/src/hope_dedup_engine/apps/api/deduplication/adapters.py b/src/hope_dedup_engine/apps/api/deduplication/adapters.py index a6345bc..5fc83bd 100644 --- a/src/hope_dedup_engine/apps/api/deduplication/adapters.py +++ b/src/hope_dedup_engine/apps/api/deduplication/adapters.py @@ -1,6 +1,6 @@ from collections.abc import Callable, Generator -from typing import Any +from hope_dedup_engine.apps.api.deduplication.config import ConfigDefaults from hope_dedup_engine.apps.api.deduplication.registry import DuplicateKeyPair from hope_dedup_engine.apps.api.models import DeduplicationSet from hope_dedup_engine.apps.faces.services.duplication_detector import ( @@ -24,12 +24,12 @@ def run( "reference_pk", "filename" ) } - ds_config: dict[str, Any] = ( - self.deduplication_set.config and self.deduplication_set.config.settings - ) or {} + cfg = ConfigDefaults() + if self.deduplication_set.config: + cfg.apply_config_overrides(self.deduplication_set.config.settings) # ignored key pairs are not handled correctly in DuplicationDetector detector = DuplicationDetector( - tuple[str](filename_to_reference_pk.keys()), ds_config + tuple[str](filename_to_reference_pk.keys()), cfg=cfg ) for first_filename, second_filename, distance in detector.find_duplicates( tracker diff --git a/src/hope_dedup_engine/apps/api/deduplication/config.py b/src/hope_dedup_engine/apps/api/deduplication/config.py new file mode 100644 index 0000000..1aeae3b --- /dev/null +++ b/src/hope_dedup_engine/apps/api/deduplication/config.py @@ -0,0 +1,55 @@ +from dataclasses import dataclass, field +from typing import Any, Literal + +from constance import config as constance_cfg + + +@dataclass +class DetectionConfig: + dnn_files_source: str = constance_cfg.DNN_FILES_SOURCE + dnn_backend: int = constance_cfg.DNN_BACKEND + dnn_target: int = constance_cfg.DNN_TARGET + blob_from_image_scale_factor: float = constance_cfg.BLOB_FROM_IMAGE_SCALE_FACTOR + blob_from_image_mean_values: tuple[float, float, float] = tuple( + map(float, constance_cfg.BLOB_FROM_IMAGE_MEAN_VALUES.split(", ")) + ) + confidence: float = constance_cfg.FACE_DETECTION_CONFIDENCE + nms_threshold: float = constance_cfg.NMS_THRESHOLD + + +@dataclass +class RecognitionConfig: + num_jitters: int = constance_cfg.FACE_ENCODINGS_NUM_JITTERS + model: Literal["small", "large"] = constance_cfg.FACE_ENCODINGS_MODEL + preprocessors: list[str] = field(default_factory=list) + + +@dataclass +class DuplicatesConfig: + tolerance: float = constance_cfg.FACE_DISTANCE_THRESHOLD + + +@dataclass +class ConfigDefaults: + detection: DetectionConfig = field(default_factory=DetectionConfig) + recognition: RecognitionConfig = field(default_factory=RecognitionConfig) + duplicates: DuplicatesConfig = field(default_factory=DuplicatesConfig) + + def apply_config_overrides( + self, config_settings: dict[str, Any] | None = None + ) -> None: + """ + Updates the instance with values from the provided config settings. + + Parameters: + config_settings (dict | None): Optional dictionary of configuration overrides, structured to match + sections in ConfigDefaults (e.g., "detection", "recognition", "duplicates"). Only matching attributes + are updated. No changes are made if `config_settings` is `None` or empty. + """ + if config_settings: + for section_name, section_data in config_settings.items(): + dataclass_section = getattr(self, section_name, None) + if dataclass_section and isinstance(section_data, dict): + for k, v in section_data.items(): + if hasattr(dataclass_section, k): + setattr(dataclass_section, k, v) diff --git a/src/hope_dedup_engine/apps/faces/services/duplication_detector.py b/src/hope_dedup_engine/apps/faces/services/duplication_detector.py index 38191cb..4de541d 100644 --- a/src/hope_dedup_engine/apps/faces/services/duplication_detector.py +++ b/src/hope_dedup_engine/apps/faces/services/duplication_detector.py @@ -7,6 +7,7 @@ import face_recognition import numpy as np +from hope_dedup_engine.apps.api.deduplication.config import ConfigDefaults from hope_dedup_engine.apps.faces.managers import StorageManager from hope_dedup_engine.apps.faces.services.image_processor import ImageProcessor from hope_dedup_engine.apps.faces.validators import IgnorePairsValidator @@ -22,7 +23,7 @@ class DuplicationDetector: def __init__( self, filenames: tuple[str], - ds_config: dict[str, Any] = None, + cfg: ConfigDefaults, ignore_pairs: tuple[tuple[str, str], ...] = (), ) -> None: """ @@ -30,15 +31,17 @@ def __init__( Args: filenames (tuple[str]): The filenames of the images to process. - ds_config (dict[str, Any], optional): The configuration settings for the deduplication set. + cfg (ConfigDefaults): The configuration settings. ignore_pairs (tuple[tuple[str, str]], optional): The pairs of filenames to ignore. Defaults to an empty tuple. """ self.filenames = filenames - self.face_distance_threshold = ds_config.get("duplicates").get("tolerance") + self.face_distance_threshold = cfg.duplicates.tolerance self.ignore_set = IgnorePairsValidator.validate(ignore_pairs) self.storages = StorageManager() - self.image_processor = ImageProcessor(ds_config) + self.image_processor = ImageProcessor( + cfg_detection=cfg.detection, cfg_recognition=cfg.recognition + ) def _encodings_filename(self, filename: str) -> str: """ diff --git a/src/hope_dedup_engine/apps/faces/services/image_processor.py b/src/hope_dedup_engine/apps/faces/services/image_processor.py index fe2b5e5..20e0d17 100644 --- a/src/hope_dedup_engine/apps/faces/services/image_processor.py +++ b/src/hope_dedup_engine/apps/faces/services/image_processor.py @@ -9,18 +9,15 @@ import cv2 import face_recognition import numpy as np -from constance import config +from hope_dedup_engine.apps.api.deduplication.config import ( + DetectionConfig, + RecognitionConfig, +) from hope_dedup_engine.apps.core.exceptions import NotCompliantImageError from hope_dedup_engine.apps.faces.managers import DNNInferenceManager, StorageManager -@dataclass(frozen=True, slots=True) -class FaceEncodingsConfig: - num_jitters: int - model: str - - @dataclass(frozen=True, slots=True) class BlobFromImageConfig: shape: dict[str, int] = field(init=False) @@ -28,13 +25,6 @@ class BlobFromImageConfig: mean_values: tuple[float, float, float] prototxt_path: str - def __post_init__(self) -> None: - object.__setattr__(self, "shape", self._get_shape()) - mean_values = self.mean_values - if isinstance(mean_values, str): - mean_values = tuple(map(float, mean_values.split(", "))) - object.__setattr__(self, "mean_values", mean_values) - def _get_shape(self) -> dict[str, int]: pattern = r"input_shape\s*\{\s*dim:\s*(\d+)\s*dim:\s*(\d+)\s*dim:\s*(\d+)\s*dim:\s*(\d+)\s*\}" with open(self.prototxt_path, "r") as file: @@ -59,7 +49,8 @@ class ImageProcessor: def __init__( self, - ds_config: dict[str, Any] = None, + cfg_detection: DetectionConfig, + cfg_recognition: RecognitionConfig, ) -> None: """ Initialize the ImageProcessor with the required configurations. @@ -67,22 +58,15 @@ def __init__( self.storages = StorageManager() self.net = DNNInferenceManager(self.storages.get_storage("cv2")).get_model() + self.cfg_detection = cfg_detection + self.cfg_recognition = cfg_recognition self.blob_from_image_cfg = BlobFromImageConfig( - scale_factor=config.BLOB_FROM_IMAGE_SCALE_FACTOR, - mean_values=config.BLOB_FROM_IMAGE_MEAN_VALUES, + scale_factor=self.cfg_detection.blob_from_image_scale_factor, + mean_values=self.cfg_detection.blob_from_image_mean_values, prototxt_path=self.storages.get_storage("cv2").path( settings.DNN_FILES.get("prototxt").get("filename") ), ) - self.face_encodings_cfg = FaceEncodingsConfig( - num_jitters=ds_config.get("recognition").get("num_jitters"), - model=ds_config.get("recognition").get("model"), - ) - self.face_detection_confidence: float = ds_config.get("detection").get( - "confidence" - ) - self.distance_threshold: float = ds_config.get("duplicates").get("tolerance") - self.nms_threshold: float = config.NMS_THRESHOLD def _get_face_detections_dnn( self, filename: str @@ -129,7 +113,7 @@ def _get_face_detections_dnn( for i in range(detections.shape[2]): confidence = detections[0, 0, i, 2] # Filter out weak detections by ensuring the confidence is greater than the minimum confidence - if confidence > self.face_detection_confidence: + if confidence > self.cfg_detection.confidence: box = (detections[0, 0, i, 3:7] * np.array([w, h, w, h])).astype( "int" ) @@ -138,10 +122,10 @@ def _get_face_detections_dnn( if boxes: # Apply non-maxima suppression to suppress weak, overlapping bounding boxes indices = cv2.dnn.NMSBoxes( - boxes, - confidences, - self.face_detection_confidence, - self.nms_threshold, + bboxes=boxes, + scores=confidences, + score_threshold=self.cfg_detection.confidence, + nms_threshold=self.cfg_detection.nms_threshold, ) if indices is not None: for i in indices: @@ -200,8 +184,8 @@ def encode_face(self, filename: str, encodings_filename: str) -> None: face_encodings = face_recognition.face_encodings( image, [(right, bottom, left, top)], - num_jitters=self.face_encodings_cfg.num_jitters, - model=self.face_encodings_cfg.model, + num_jitters=self.cfg_recognition.num_jitters, + model=self.cfg_recognition.model, ) encodings.extend(face_encodings) else: