luxonis · aljazkonec1 · Mar 17, 2025 · Mar 17, 2025 · Mar 17, 2025 · Mar 21, 2025
@@ -1,5 +1,6 @@
 from .depth_color_transform import DepthColorTransform
 from .depth_merger import DepthMerger
+from .host_crop_config_creator import CropConfigsCreatorNode
 from .host_spatials_calc import HostSpatialsCalc
 from .parser_generator import ParserGenerator
 from .parsers.base_parser import BaseParser
@@ -57,4 +58,5 @@
     "BaseParser",
     "DetectionParser",
     "EmbeddingsParser",
+    "CropConfigsCreatorNode",
 ]
@@ -0,0 +1,262 @@
+import depthai as dai
+import numpy as np
+
+from depthai_nodes import ImgDetectionExtended, ImgDetectionsExtended
+
+
+class CropConfigsCreatorNode(dai.node.HostNode):
+    """This node is used to create a dai.ImageManipConfigV2() object for every detection
+    in a ImgDetectionsExtended message. The node iterates over a list of n detections
+    and sends a dai.ImgeManipConfigV2 objects for each detection. By default, the node
+    will keep at most the first 100 detections.
+
+    Before use, the source and target image sizes need to be set with the build function.
+    The node assumes the last frame is saved in the dai.ImgManipV2 node and when recieving a detection_message the node sends an empty crop config that skips the frame and loads the next frame in the queue.
+
+    Attributes
+    ----------
+    detections_input : dai.Input
+        The input link for the ImgDetectionsExtended message
+    config_output : dai.Output
+        The output link for the ImageManipConfigV2 messages
+    detections_output : dai.Output
+        The output link for the ImgDetectionsExtended message
+    w : int
+        The width of the source image.
+    h : int
+        The height of the source image.
+    target_w : int
+        The width of the target image.
+    target_h : int
+        The height of the target image.
+    n_detections : int
+        The number of detections to keep.
+    """
+
+    def __init__(self) -> None:
+        """Initializes the node."""
+        super().__init__()
+        self.config_output = self.createOutput(
+            possibleDatatypes=[
+                dai.Node.DatatypeHierarchy(dai.DatatypeEnum.ImageManipConfigV2, True)
+            ]
+        )
+        self.detections_output = self.createOutput(
+            possibleDatatypes=[
+                dai.Node.DatatypeHierarchy(dai.DatatypeEnum.Buffer, True)
+            ]
+        )
+        self._w: int = None
+        self._h: int = None
+        self._target_w: int = None
+        self._target_h: int = None
+        self._n_detections: int = None
+
+    @property
+    def w(self) -> int:
+        """Returns the width of the source image.
+
+        @return: Width of the source image.
+        @rtype: int
+        """
+        return self._w
+
+    @property
+    def h(self) -> int:
+        """Returns the height of the source image.
+
+        @return: Height of the source image.
+        @rtype: int
+        """
+        return self._h
+
+    @property
+    def target_w(self) -> int:
+        """Returns the width of the target image.
+
+        @return: Width of the target image.
+        @rtype: int
+        """
+        return self._target_w
+
+    @property
+    def target_h(self) -> int:
+        """Returns the height of the target image.
+
+        @return: Height of the target image.
+        @rtype: int
+        """
+        return self._target_h
+
+    @property
+    def n_detections(self) -> int:
+        """Returns the number of detections to keep.
+
+        @return: Number of detections to keep.
+        @rtype: int
+        """
+        return self._n_detections
+
+    @w.setter
+    def w(self, w: int):
+        """Sets the width of the source image.
+
+        @param w: Width of the source image.
+        @type w: int
+        @raise TypeError: If w is not an integer.
+        @raise ValueError: If w is less than 1.
+        """
+        self._validate_positive_integer(w)
+        self._w = w
+
+    @h.setter
+    def h(self, h: int):
+        """Sets the height of the source image.
+
+        @param h: Height of the source image.
+        @type h: int
+        @raise TypeError: If h is not an integer.
+        @raise ValueError: If h is less than 1.
+        """
+        self._validate_positive_integer(h)
+        self._h = h
+
+    @target_w.setter
+    def target_w(self, target_w: int):
+        """Sets the width of the target image.
+
+        @param target_w: Width of the target image.
+        @type target_w: int
+        @raise TypeError: If target_w is not an integer.
+        @raise ValueError: If target_w is less than 1.
+        """
+        self._validate_positive_integer(target_w)
+        self._target_w = target_w
+
+    @target_h.setter
+    def target_h(self, target_h: int):
+        """Sets the height of the target image.
+
+        @param target_h: Height of the target image.
+        @type target_h: int
+        @raise TypeError: If target_h is not an integer.
+        @raise ValueError: If target_h is less than 1.
+        """
+        self._validate_positive_integer(target_h)
+        self._target_h = target_h
+
+    @n_detections.setter
+    def n_detections(self, n_detections: int):
+        """Sets the number of detections to keep.
+
+        @param n_detections: Number of detections to keep.
+        @type n_detections: int
+        @raise TypeError: If n_detections is not an integer.
+        @raise ValueError: If n_detections is less than 1.
+        """
+        self._validate_positive_integer(n_detections)
+        self._n_detections = n_detections
+
+    def build(
+        self,
+        detections_input: dai.Node.Output,
+        w: int,
+        h: int,
+        target_w: int,
+        target_h: int,
+        n_detections: int = 100,
+    ) -> "CropConfigsCreatorNode":
+        """Link the node input and set the correct source and target image sizes.
+
+        Parameters
+        ----------
+        detections_input : dai.Node.Output
+            The input link for the ImgDetectionsExtended message
+        w : int
+            The width of the source image.
+        h : int
+            The height of the source image.
+        target_w : int
+            The width of the target image.
+        target_h : int
+            The height of the target image.
+        n_detections : int, optional
+            The number of detections to keep, by default 100
+        """
+        self.w = w
+        self.h = h
+        self.target_w = target_w
+        self.target_h = target_h
+        self.n_detections = n_detections
+        self.link_args(detections_input)
+
+        return self
+
+    def process(self, detections_input: dai.Buffer) -> None:
+        """Process the input detections and create crop configurations. This function is
+        ran every time a new ImgDetectionsExtended message is received.
+
+        Sends the first n  crop configurations to the config_output link in addition
+        send an ImgDetectionsExtended object containing the corresponding detections to
+        the detections_output link.
+        """
+        assert isinstance(detections_input, ImgDetectionsExtended)
+        detections = detections_input.detections
+        sequence_num = detections_input.getSequenceNum()
+        timestamp = detections_input.getTimestamp()
+
+        detections_to_keep = []
+        num_detections = min(len(detections), self._n_detections)
+
+        # Skip the current frame / load new frame
+        cfg = dai.ImageManipConfigV2()
+        cfg.setSkipCurrentImage(True)
+        cfg.setTimestamp(timestamp)
+        cfg.setSequenceNum(sequence_num)
+        send_status = False
+        while not send_status:
+            send_status = self.config_output.trySend(cfg)
+
+        for i in range(num_detections):
+            cfg = dai.ImageManipConfigV2()
+            detection: ImgDetectionExtended = detections[i]
+            detections_to_keep.append(detection)
+            rect = detection.rotated_rect
+            rect = rect.denormalize(self.w, self.h)
+
+            cfg.addCropRotatedRect(rect, normalizedCoords=False)
+            cfg.setOutputSize(self.target_w, self.target_h)
+            cfg.setReusePreviousImage(True)
+            cfg.setTimestamp(timestamp)
+            cfg.setSequenceNum(sequence_num)
+
+            send_status = False
+            while not send_status:
+                send_status = self.config_output.trySend(cfg)
+
+        detections_msg = ImgDetectionsExtended()
+        detections_msg.setSequenceNum(sequence_num)
+        detections_msg.setTimestamp(timestamp)
+        detections_msg.setTransformation(detections_input.transformation)
+        detections_msg.detections = detections_to_keep
+
+        if detections_input.masks.ndim == 2:
+            masks = np.where(
+                detections_input.masks >= num_detections, -1, detections_input.masks
+            )
+            detections_msg.masks = masks
+
+        self.detections_output.send(detections_msg)
+
+    def _validate_positive_integer(self, value: int):
+        """Validates that the set size is a positive integer.
+
+        @param value: The value to validate.
+        @type value: int
+        @raise TypeError: If value is not an integer.
+        @raise ValueError: If value is less than 1.
+        """
+        if not isinstance(value, int):
+            raise TypeError("Value must be an integer.")
+        if value < 1:
+            raise ValueError("Value must be greater than 1.")
@@ -10,6 +10,9 @@ def __init__(self):
     def get(self):
         return self._messages.pop(0)
 
+    def get_all(self):
+        return self._messages
+
     def send(self, item):
         self._messages.append(item)
 
@@ -32,6 +35,14 @@ def send(self, message):
         for queue in self._queues:
             queue.send(message)
 
+    def trySend(self, message):
+        for queue in self._queues:
+            queue.send(message)
+        return True
+
+    def returnQueue(self):
+        return self._queues
+
     def createOutputQueue(self):
         queue = Queue()
         self._queues.append(queue)
@@ -61,7 +72,9 @@ def output(self, output):
         self._output = output
 
     def createOutput(self, possibleDatatypes: List[Tuple[dai.DatatypeEnum, bool]]):
-        return self._output
+        o = Output()
+        o.setPossibleDatatypes(possibleDatatypes)
+        return o
 
     def sendProcessingToPipeline(self, send: bool):
         self._sendProcessingToPipeline = send

@@ -0,0 +1,235 @@
+import depthai as dai
+import numpy as np
+import pytest
+import yaml
+from conftest import Output
+from pytest import FixtureRequest
+
+from depthai_nodes import ImgDetectionExtended, ImgDetectionsExtended
+from depthai_nodes.node import CropConfigsCreatorNode
+
+
+@pytest.fixture
+def crop_configs_creator():
+    return CropConfigsCreatorNode()
+
+
+@pytest.fixture
+def empty_img_detections_extended():
+    detections = ImgDetectionsExtended()
+
+    return detections
+
+
+@pytest.fixture
+def single_img_detections_extended():
+    detections = ImgDetectionsExtended()
+    detection = ImgDetectionExtended()
+    xmin = 0.3
+    xmax = 0.5
+    ymin = 0.3
+    ymax = 0.5
+    x_center = (xmin + xmax) / 2
+    y_center = (ymin + ymax) / 2
+    width = xmax - xmin
+    height = ymax - ymin
+    detection.rotated_rect = (x_center, y_center, width, height, 0)
+    detection.rotated_rect.angle = 0
+    detection.label = 1
+    detection.confidence = 0.9
+    detections.detections = [detection]
+
+    return detections
+
+
+@pytest.fixture
+def img_detections_extended():
+    return create_img_detections_extended()
+
+
+@pytest.fixture
+def img_detections_with_map():
+    detections = create_img_detections_extended()
+    np.random.seed(1)
+    map = np.random.randint(-1, 10, (1000, 1000), dtype=np.int16)
+
+    detections.masks = map
+
+    return detections
+
+
+def create_img_detections_extended():
+    detections = ImgDetectionsExtended()
+
+    c_min = np.linspace(0.1, 0.8, 8)
+    c_max = np.linspace(0.2, 0.9, 8)
+    for xmin, xmax, ymin, ymax in zip(c_min, c_max, c_min, c_max):
+        x_center = (xmin + xmax) / 2
+        y_center = (ymin + ymax) / 2
+        width = xmax - xmin
+        height = ymax - ymin
+        detection = ImgDetectionExtended()
+        detection.rotated_rect = (x_center, y_center, width, height, 0)
+        detection.label = 1
+        detection.confidence = 0.9
+        detections.detections.append(detection)
+
+    return detections
+
+
+@pytest.mark.parametrize(
+    "detections",
+    [
+        "empty_img_detections_extended",
+        "single_img_detections_extended",
+        "img_detections_extended",
+    ],
+)
+def test_img_detections(
+    crop_configs_creator: CropConfigsCreatorNode,
+    request: FixtureRequest,
+    detections: str,
+):
+    img_detections: ImgDetectionsExtended = request.getfixturevalue(detections)
+    img_detections_msg = Output()
+
+    crop_configs_creator.build(
+        detections_input=img_detections_msg, w=1000, h=1000, target_w=640, target_h=480
+    )
+
+    assert crop_configs_creator.w == 1000
+    assert crop_configs_creator.h == 1000
+    assert crop_configs_creator.target_w == 640
+    assert crop_configs_creator.target_h == 480
+    assert crop_configs_creator.n_detections == 100
+
+    q_img_detections_msg = img_detections_msg.createOutputQueue()
+
+    q_configs = crop_configs_creator.config_output.createOutputQueue()
+    q_detections = crop_configs_creator.detections_output.createOutputQueue()
+
+    img_detections_msg.send(img_detections)
+
+    crop_configs_creator.process(q_img_detections_msg.get())
+
+    skip_config: dai.ImageManipConfigV2 = q_configs.get()
+    config_str = str(skip_config)
+    config_dict = yaml.safe_load(config_str)
+    assert config_dict["skipCurrentImage"]
+
+    crop_detections: ImgDetectionsExtended = q_detections.get()
+    assert crop_detections.getSequenceNum() == img_detections.getSequenceNum()
+    assert crop_detections.getTimestamp() == img_detections.getTimestamp()
+    assert crop_detections.transformation == img_detections.transformation
+
+    for true_det, crop_det in zip(
+        img_detections.detections, crop_detections.detections
+    ):
+        true_coords = true_det.rotated_rect.getOuterRect()
+        return_coords = crop_det.rotated_rect.getOuterRect()
+
+        config_msg = q_configs.get()
+        config_str = str(config_msg)
+        config_str = config_str.replace("[", "")
+        config_str = config_str.replace("]", "")
+        config_dict = yaml.safe_load(config_str)
+
+        assert not config_dict["skipCurrentImage"]
+        assert config_dict["reusePreviousImage"]
+        assert np.isclose(config_dict["base"]["outputHeight"], 480)
+        assert np.isclose(config_dict["base"]["outputWidth"], 640)
+
+        assert np.allclose(true_coords, return_coords, atol=1e-8)
+        assert crop_det.label == true_det.label
+        assert crop_det.confidence == true_det.confidence
+
+
+@pytest.mark.parametrize(
+    "detections",
+    [
+        "img_detections_with_map",
+    ],
+)
+def test_map_clipping(
+    crop_configs_creator: CropConfigsCreatorNode,
+    request: FixtureRequest,
+    detections: str,
+):
+    img_detections: ImgDetectionsExtended = request.getfixturevalue(detections)
+    img_detections_msg = Output()
+
+    crop_configs_creator.build(
+        detections_input=img_detections_msg,
+        w=1000,
+        h=1000,
+        target_w=640,
+        target_h=480,
+        n_detections=4,
+    )
+
+    q_img_detections_msg = img_detections_msg.createOutputQueue()
+
+    q_detections = crop_configs_creator.detections_output.createOutputQueue()
+
+    img_detections_msg.send(img_detections)
+
+    crop_configs_creator.process(q_img_detections_msg.get())
+
+    crop_detections: ImgDetectionsExtended = q_detections.get()
+
+    truth_map = img_detections.masks
+    truth_map = np.where(truth_map >= 4, -1, truth_map)
+
+    assert np.allclose(crop_detections.masks, truth_map)
+
+
+@pytest.mark.parametrize(
+    "detections",
+    [
+        "single_img_detections_extended",
+        "img_detections_extended",
+    ],
+)
+def test_num_detections(
+    crop_configs_creator: CropConfigsCreatorNode,
+    request: FixtureRequest,
+    detections: str,
+):
+    img_detections: ImgDetectionsExtended = request.getfixturevalue(detections)
+    img_detections_msg = Output()
+
+    crop_configs_creator.build(
+        detections_input=img_detections_msg,
+        w=1000,
+        h=1000,
+        target_w=640,
+        target_h=480,
+        n_detections=1,
+    )
+
+    assert crop_configs_creator.w == 1000
+    assert crop_configs_creator.h == 1000
+    assert crop_configs_creator.target_w == 640
+    assert crop_configs_creator.target_h == 480
+    assert crop_configs_creator.n_detections == 1
+
+    q_img_detections_msg = img_detections_msg.createOutputQueue()
+
+    q_configs = crop_configs_creator.config_output.createOutputQueue()
+    q_detections = crop_configs_creator.detections_output.createOutputQueue()
+
+    img_detections_msg.send(img_detections)
+
+    crop_configs_creator.process(q_img_detections_msg.get())
+
+    skip_config: dai.ImageManipConfigV2 = q_configs.get()
+    config_str = str(skip_config)
+    config_dict = yaml.safe_load(config_str)
+    assert config_dict["skipCurrentImage"]
+
+    crop_detections: ImgDetectionsExtended = q_detections.get()
+    assert crop_detections.getSequenceNum() == img_detections.getSequenceNum()
+    assert crop_detections.getTimestamp() == img_detections.getTimestamp()
+    assert crop_detections.transformation == img_detections.transformation
+    assert len(crop_detections.detections) == 1
+    assert len(q_configs.get_all()) == 1