Skip to content

Commit

Permalink
chg ! apply overrided config for deduplication set
Browse files Browse the repository at this point in the history
  • Loading branch information
vitali-yanushchyk-valor committed Nov 12, 2024
1 parent a4b2ee8 commit 0465529
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 42 deletions.
10 changes: 5 additions & 5 deletions src/hope_dedup_engine/apps/api/deduplication/adapters.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from collections.abc import Callable, Generator
from typing import Any

from hope_dedup_engine.apps.api.deduplication.config import ConfigDefaults
from hope_dedup_engine.apps.api.deduplication.registry import DuplicateKeyPair
from hope_dedup_engine.apps.api.models import DeduplicationSet
from hope_dedup_engine.apps.faces.services.duplication_detector import (
Expand All @@ -24,12 +24,12 @@ def run(
"reference_pk", "filename"
)
}
ds_config: dict[str, Any] = (
self.deduplication_set.config and self.deduplication_set.config.settings
) or {}
cfg = ConfigDefaults()
if self.deduplication_set.config:
cfg.apply_config_overrides(self.deduplication_set.config.settings)
# ignored key pairs are not handled correctly in DuplicationDetector
detector = DuplicationDetector(
tuple[str](filename_to_reference_pk.keys()), ds_config
tuple[str](filename_to_reference_pk.keys()), cfg=cfg
)
for first_filename, second_filename, distance in detector.find_duplicates(
tracker
Expand Down
55 changes: 55 additions & 0 deletions src/hope_dedup_engine/apps/api/deduplication/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from dataclasses import dataclass, field
from typing import Any, Literal

from constance import config as constance_cfg


@dataclass
class DetectionConfig:
dnn_files_source: str = constance_cfg.DNN_FILES_SOURCE
dnn_backend: int = constance_cfg.DNN_BACKEND
dnn_target: int = constance_cfg.DNN_TARGET
blob_from_image_scale_factor: float = constance_cfg.BLOB_FROM_IMAGE_SCALE_FACTOR
blob_from_image_mean_values: tuple[float, float, float] = tuple(
map(float, constance_cfg.BLOB_FROM_IMAGE_MEAN_VALUES.split(", "))
)
confidence: float = constance_cfg.FACE_DETECTION_CONFIDENCE
nms_threshold: float = constance_cfg.NMS_THRESHOLD


@dataclass
class RecognitionConfig:
num_jitters: int = constance_cfg.FACE_ENCODINGS_NUM_JITTERS
model: Literal["small", "large"] = constance_cfg.FACE_ENCODINGS_MODEL
preprocessors: list[str] = field(default_factory=list)


@dataclass
class DuplicatesConfig:
tolerance: float = constance_cfg.FACE_DISTANCE_THRESHOLD


@dataclass
class ConfigDefaults:
detection: DetectionConfig = field(default_factory=DetectionConfig)
recognition: RecognitionConfig = field(default_factory=RecognitionConfig)
duplicates: DuplicatesConfig = field(default_factory=DuplicatesConfig)

def apply_config_overrides(
self, config_settings: dict[str, Any] | None = None
) -> None:
"""
Updates the instance with values from the provided config settings.
Parameters:
config_settings (dict | None): Optional dictionary of configuration overrides, structured to match
sections in ConfigDefaults (e.g., "detection", "recognition", "duplicates"). Only matching attributes
are updated. No changes are made if `config_settings` is `None` or empty.
"""
if config_settings:
for section_name, section_data in config_settings.items():
dataclass_section = getattr(self, section_name, None)
if dataclass_section and isinstance(section_data, dict):
for k, v in section_data.items():
if hasattr(dataclass_section, k):
setattr(dataclass_section, k, v)
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import face_recognition
import numpy as np

from hope_dedup_engine.apps.api.deduplication.config import ConfigDefaults
from hope_dedup_engine.apps.faces.managers import StorageManager
from hope_dedup_engine.apps.faces.services.image_processor import ImageProcessor
from hope_dedup_engine.apps.faces.validators import IgnorePairsValidator
Expand All @@ -22,23 +23,25 @@ class DuplicationDetector:
def __init__(
self,
filenames: tuple[str],
ds_config: dict[str, Any] = None,
cfg: ConfigDefaults,
ignore_pairs: tuple[tuple[str, str], ...] = (),
) -> None:
"""
Initialize the DuplicationDetector with the given filenames and ignore pairs.
Args:
filenames (tuple[str]): The filenames of the images to process.
ds_config (dict[str, Any], optional): The configuration settings for the deduplication set.
cfg (ConfigDefaults): The configuration settings.
ignore_pairs (tuple[tuple[str, str]], optional):
The pairs of filenames to ignore. Defaults to an empty tuple.
"""
self.filenames = filenames
self.face_distance_threshold = ds_config.get("duplicates").get("tolerance")
self.face_distance_threshold = cfg.duplicates.tolerance
self.ignore_set = IgnorePairsValidator.validate(ignore_pairs)
self.storages = StorageManager()
self.image_processor = ImageProcessor(ds_config)
self.image_processor = ImageProcessor(
cfg_detection=cfg.detection, cfg_recognition=cfg.recognition
)

def _encodings_filename(self, filename: str) -> str:
"""
Expand Down
50 changes: 17 additions & 33 deletions src/hope_dedup_engine/apps/faces/services/image_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,32 +9,22 @@
import cv2
import face_recognition
import numpy as np
from constance import config

from hope_dedup_engine.apps.api.deduplication.config import (
DetectionConfig,
RecognitionConfig,
)
from hope_dedup_engine.apps.core.exceptions import NotCompliantImageError
from hope_dedup_engine.apps.faces.managers import DNNInferenceManager, StorageManager


@dataclass(frozen=True, slots=True)
class FaceEncodingsConfig:
num_jitters: int
model: str


@dataclass(frozen=True, slots=True)
class BlobFromImageConfig:
shape: dict[str, int] = field(init=False)
scale_factor: float
mean_values: tuple[float, float, float]
prototxt_path: str

def __post_init__(self) -> None:
object.__setattr__(self, "shape", self._get_shape())
mean_values = self.mean_values
if isinstance(mean_values, str):
mean_values = tuple(map(float, mean_values.split(", ")))
object.__setattr__(self, "mean_values", mean_values)

def _get_shape(self) -> dict[str, int]:
pattern = r"input_shape\s*\{\s*dim:\s*(\d+)\s*dim:\s*(\d+)\s*dim:\s*(\d+)\s*dim:\s*(\d+)\s*\}"
with open(self.prototxt_path, "r") as file:
Expand All @@ -59,30 +49,24 @@ class ImageProcessor:

def __init__(
self,
ds_config: dict[str, Any] = None,
cfg_detection: DetectionConfig,
cfg_recognition: RecognitionConfig,
) -> None:
"""
Initialize the ImageProcessor with the required configurations.
"""
self.storages = StorageManager()
self.net = DNNInferenceManager(self.storages.get_storage("cv2")).get_model()

self.cfg_detection = cfg_detection
self.cfg_recognition = cfg_recognition
self.blob_from_image_cfg = BlobFromImageConfig(
scale_factor=config.BLOB_FROM_IMAGE_SCALE_FACTOR,
mean_values=config.BLOB_FROM_IMAGE_MEAN_VALUES,
scale_factor=self.cfg_detection.blob_from_image_scale_factor,
mean_values=self.cfg_detection.blob_from_image_mean_values,
prototxt_path=self.storages.get_storage("cv2").path(
settings.DNN_FILES.get("prototxt").get("filename")
),
)
self.face_encodings_cfg = FaceEncodingsConfig(
num_jitters=ds_config.get("recognition").get("num_jitters"),
model=ds_config.get("recognition").get("model"),
)
self.face_detection_confidence: float = ds_config.get("detection").get(
"confidence"
)
self.distance_threshold: float = ds_config.get("duplicates").get("tolerance")
self.nms_threshold: float = config.NMS_THRESHOLD

def _get_face_detections_dnn(
self, filename: str
Expand Down Expand Up @@ -129,7 +113,7 @@ def _get_face_detections_dnn(
for i in range(detections.shape[2]):
confidence = detections[0, 0, i, 2]
# Filter out weak detections by ensuring the confidence is greater than the minimum confidence
if confidence > self.face_detection_confidence:
if confidence > self.cfg_detection.confidence:
box = (detections[0, 0, i, 3:7] * np.array([w, h, w, h])).astype(
"int"
)
Expand All @@ -138,10 +122,10 @@ def _get_face_detections_dnn(
if boxes:
# Apply non-maxima suppression to suppress weak, overlapping bounding boxes
indices = cv2.dnn.NMSBoxes(
boxes,
confidences,
self.face_detection_confidence,
self.nms_threshold,
bboxes=boxes,
scores=confidences,
score_threshold=self.cfg_detection.confidence,
nms_threshold=self.cfg_detection.nms_threshold,
)
if indices is not None:
for i in indices:
Expand Down Expand Up @@ -200,8 +184,8 @@ def encode_face(self, filename: str, encodings_filename: str) -> None:
face_encodings = face_recognition.face_encodings(
image,
[(right, bottom, left, top)],
num_jitters=self.face_encodings_cfg.num_jitters,
model=self.face_encodings_cfg.model,
num_jitters=self.cfg_recognition.num_jitters,
model=self.cfg_recognition.model,
)
encodings.extend(face_encodings)
else:
Expand Down

0 comments on commit 0465529

Please sign in to comment.