Skip to content
2 changes: 2 additions & 0 deletions depthai_nodes/node/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from .depth_color_transform import DepthColorTransform
from .depth_merger import DepthMerger
from .host_crop_config_creator import CropConfigsCreatorNode
from .host_spatials_calc import HostSpatialsCalc
from .parser_generator import ParserGenerator
from .parsers.base_parser import BaseParser
Expand Down Expand Up @@ -57,4 +58,5 @@
"BaseParser",
"DetectionParser",
"EmbeddingsParser",
"CropConfigsCreatorNode",
]
250 changes: 250 additions & 0 deletions depthai_nodes/node/host_crop_config_creator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,250 @@
from typing import Optional

import depthai as dai
import numpy as np

from depthai_nodes import ImgDetectionExtended, ImgDetectionsExtended


class CropConfigsCreatorNode(dai.node.HostNode):
"""A host node for creating crop configurations for n detections. The node iterates
over a list of n detections and sends a dai.ImgeManipConfigV2 object for each
detection. By default, the node will keep at most the first 100 detections.

Before use, the source and target image sizes need to be set with the build function.

Attributes
----------
detections_input : dai.Input
The input link for the ImgDetectionsExtended message
config_output : dai.Output
The output link for the ImageManipConfigV2 messages
detections_output : dai.Output
The output link for the ImgDetectionsExtended message
w : int

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why cannot we work with relative coordinates?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When there is a rotated rectangle, cropping in relative coordinates creates a sheared img crop in the original coordinates due to the aspect ratio in relative coordinates being 1:1 while in absolute coordinates the aspect ratio can be anything (16:9, 3:2, etc.)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

isn't it a bug in depthai? I don't see why it matters if the bbox is rotated or not.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Technically not a bug as it does the correct rotation in relative coordinates. Thing is, that in relative coordinates a rotation is not the same as in absolute coordinates. Think if original coordinates are 1000x500 and you rotate the point (0, 1) by 45 deg in relative coordinates, you get (sqrt(2)/2, sqrt(2)/2) in relative, which is (1000 * sqrt(2) /2, 500 *sqrt(2) /2) in absolute coordinates, but that is not 45 deg in the absolute coordinates.

The width of the source image.
h : int
The height of the source image.
target_w : int
The width of the target image.
target_h : int

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand what does this do. I can see it's used here: cfg.setOutputSize(self.target_w, self.target_h)
But I don't understand it's purpose. Is this a command to resize the crop? If so, what if I don't want to resize it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, because the size of the detected rectanlge can be anything, this is so that the output of crop node is always the same so the cropped images can be used in a second NN for example.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now that you mention it, I doo see that its an assumption that someone will want the same size outputs. I can change it to be an option (eg. equalize_crop_size = True). Thoughts @klemen1999

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the minemaestro app, we were generating crops based on bboxes and we did not resize, unless the resulting crop was then a threshold. So for me, a general node would have these options:

  1. crop and keep the resulting size
  2. crop and set a fixed size. with this, we would need the options to set different resizing options like: stratch or letterbox
  3. crop and resize if resulting size is bigger then threshold

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Forgot about the 1. option. I'll adopt the node such that all 3. options are covered. Thanks!

The height of the target image.
n_detections : int
The number of detections to keep.
"""

def __init__(self):
"""Initializes the node."""
super().__init__()
self.detections_input = self.createInput()
self.config_output = self.createOutput()
self.detections_output = self.createOutput()

self._w: Optional[int] = None
self._h: Optional[int] = None
self._target_w: Optional[int] = None
self._target_h: Optional[int] = None
self._n_detections: Optional[int] = None

def build(
self,
detections_input: dai.Node.Output,
w: int,
h: int,
target_w: int,
target_h: int,
n_detections: int = 100,
) -> "CropConfigsCreatorNode":
"""Link the node input and set the correct source and target image sizes.

Parameters
----------
detections_input : dai.Node.Output
The input link for the ImgDetectionsExtended message
w : int
The width of the source image.
h : int
The height of the source image.
target_w : int
The width of the target image.
target_h : int
The height of the target image.
n_detections : int, optional
The number of detections to keep, by default 100
"""
self.w = w
self.h = h
self.target_w = target_w
self.target_h = target_h
self.n_detections = n_detections
self.link_args(detections_input)

return self

def process(self, detections_input: dai.Buffer) -> None:
"""Process the input detections and create crop configurations. This function is
ran every time a new ImgDetectionsExtended message is received.

Sends the first n detections to the detections_output link and crop
configurations to the config_output link. The first crop config has the
setReusePreviousImage flag set to False, which changes the previous frame for a
new one.
"""
assert isinstance(detections_input, ImgDetectionsExtended)
detections = detections_input.detections
sequence_num = detections_input.getSequenceNum()
timestamp = detections_input.getTimestamp()

detections_to_keep = []
num_detections = min(len(detections), self._n_detections)

# Skip the current frame / load new frame
cfg = dai.ImageManipConfigV2()
cfg.setSkipCurrentImage(True)
cfg.setTimestamp(timestamp)
cfg.setSequenceNum(sequence_num)
send_status = False
while not send_status:
send_status = self.config_output.trySend(cfg)

for i in range(num_detections):
cfg = dai.ImageManipConfigV2()
detection: ImgDetectionExtended = detections[i]
detections_to_keep.append(detection)
rect = detection.rotated_rect
rect = rect.denormalize(self.w, self.h)

cfg.addCropRotatedRect(rect, normalizedCoords=False)
cfg.setOutputSize(self.target_w, self.target_h)
cfg.setReusePreviousImage(True)
cfg.setTimestamp(timestamp)
cfg.setSequenceNum(sequence_num)

send_status = False
while not send_status:
send_status = self.config_output.trySend(cfg)

detections_input.detections = detections_to_keep
if detections_input.masks.ndim == 2:
detections_input.masks = np.where(
detections_input.masks >= num_detections, -1, detections_input.masks
)
self.detections_output.send(detections_input)

def _validate_positive_integer(self, value: int):
"""Validates that the set size is a positive integer.

@param value: The value to validate.
@type value: int
@raise TypeError: If value is not an integer.
@raise ValueError: If value is less than 1.
"""
if not isinstance(value, int):
raise TypeError("Value must be an integer.")
if value < 1:
raise ValueError("Value must be greater than 1.")

@property
def w(self) -> int:
"""Returns the width of the source image.

@return: Width of the source image.
@rtype: int
"""
return self._w

@property
def h(self) -> int:
"""Returns the height of the source image.

@return: Height of the source image.
@rtype: int
"""
return self._h

@property
def target_w(self) -> int:
"""Returns the width of the target image.

@return: Width of the target image.
@rtype: int
"""
return self._target_w

@property
def target_h(self) -> int:
"""Returns the height of the target image.

@return: Height of the target image.
@rtype: int
"""
return self._target_h

@property
def n_detections(self) -> int:
"""Returns the number of detections to keep.

@return: Number of detections to keep.
@rtype: int
"""
return self._n_detections

@w.setter
def w(self, w: int):
"""Sets the width of the source image.

@param w: Width of the source image.
@type w: int
@raise TypeError: If w is not an integer.
@raise ValueError: If w is less than 1.
"""
self._validate_positive_integer(w)
self._w = w

@h.setter
def h(self, h: int):
"""Sets the height of the source image.

@param h: Height of the source image.
@type h: int
@raise TypeError: If h is not an integer.
@raise ValueError: If h is less than 1.
"""
self._validate_positive_integer(h)
self._h = h

@target_w.setter
def target_w(self, target_w: int):
"""Sets the width of the target image.

@param target_w: Width of the target image.
@type target_w: int
@raise TypeError: If target_w is not an integer.
@raise ValueError: If target_w is less than 1.
"""
self._validate_positive_integer(target_w)
self._target_w = target_w

@target_h.setter
def target_h(self, target_h: int):
"""Sets the height of the target image.

@param target_h: Height of the target image.
@type target_h: int
@raise TypeError: If target_h is not an integer.
@raise ValueError: If target_h is less than 1.
"""
self._validate_positive_integer(target_h)
self._target_h = target_h

@n_detections.setter
def n_detections(self, n_detections: int):
"""Sets the number of detections to keep.

@param n_detections: Number of detections to keep.
@type n_detections: int
@raise TypeError: If n_detections is not an integer.
@raise ValueError: If n_detections is less than 1.
"""
self._validate_positive_integer(n_detections)
self._n_detections = n_detections
Loading