diff --git a/depthai_sdk/docs/source/conf.py b/depthai_sdk/docs/source/conf.py index 518fea8f7..be6d4bf19 100644 --- a/depthai_sdk/docs/source/conf.py +++ b/depthai_sdk/docs/source/conf.py @@ -22,7 +22,7 @@ author = 'Luxonis' # The full version, including alpha/beta/rc tags -release = '1.12.1' +release = '1.13.0' # -- General configuration --------------------------------------------------- diff --git a/depthai_sdk/docs/source/features/ai_models.rst b/depthai_sdk/docs/source/features/ai_models.rst index a14f4b44c..02affd2e6 100644 --- a/depthai_sdk/docs/source/features/ai_models.rst +++ b/depthai_sdk/docs/source/features/ai_models.rst @@ -53,6 +53,15 @@ The following table lists all the models supported by the SDK. The model name is * - ``face-detection-retail-0004`` - `OMZ `__ - 33 + * - ``facemesh_192x192`` + - `DMZ `__ + - 32 + * - ``facial_landmarks_68_160x160`` + - 32 + - `DMZ `__ + * - ``human-pose-estimation-0001`` + - `OMZ `__ + - 8 * - ``mobilenet-ssd`` - `OMZ `__ - 31 @@ -60,34 +69,37 @@ The following table lists all the models supported by the SDK. The model name is - `DMZ `__ - / * - ``pedestrian-detection-adas-0002`` - - `OMZ `__ + - `OMZ `__ - 19 * - ``person-detection-0200`` - - `OMZ `__ + - `OMZ `__ - 14 * - ``person-detection-retail-0013`` - - `OMZ `__ + - `OMZ `__ - 15 * - ``person-reidentification-retail-0288`` - - `OMZ `__ + - `OMZ `__ - 33 * - ``person-vehicle-bike-detection-crossroad-1016`` - - `OMZ `__ + - `OMZ `__ - 12 + * - ``sbd_mask_classification_224x224`` + - `DMZ `__ + - 64+ * - ``vehicle-detection-0202`` - - `OMZ `__ + - `OMZ `__ - 14 * - ``vehicle-detection-adas-0002`` - - `OMZ `__ + - `OMZ `__ - 14 * - ``vehicle-license-plate-detection-barrier-0106`` - - `OMZ `__ + - `OMZ `__ - 29 * - ``yolo-v3-tf`` - - `OMZ `__ + - `OMZ `__ - 3.5 * - ``yolo-v3-tiny-tf`` - - `OMZ `__ + - `OMZ `__ - 33 * - ``yolov4_coco_608x608`` - `DMZ `__ diff --git a/depthai_sdk/docs/source/fundamentals/visualizer.rst b/depthai_sdk/docs/source/fundamentals/visualizer.rst index aa5ce408b..cbecd1a12 100644 --- a/depthai_sdk/docs/source/fundamentals/visualizer.rst +++ b/depthai_sdk/docs/source/fundamentals/visualizer.rst @@ -200,10 +200,6 @@ Visualizer :members: :undoc-members: -.. autoclass:: depthai_sdk.visualize.visualizer.Platform - :members: - :undoc-members: - Objects ------- diff --git a/depthai_sdk/docs/source/oak-camera.rst b/depthai_sdk/docs/source/oak-camera.rst index a06a36b00..60d327c3b 100644 --- a/depthai_sdk/docs/source/oak-camera.rst +++ b/depthai_sdk/docs/source/oak-camera.rst @@ -17,8 +17,8 @@ Interoperability with DepthAI API --------------------------------- DepthAI SDK was developed with `DepthAI API `__ interoperability in mind. -Users can access all depthai API nodes inside components, and after ``oak.build()`` also the `dai.Pipeline `__ -and `dai.Device `__ objects. +Users can access all depthai API nodes inside components, along with the `dai.Pipeline `__ (``oak.pipeline``) +and `dai.Device `__ (``oak.device``) objects. .. literalinclude:: ../../examples/mixed/api_interop.py :language: python diff --git a/depthai_sdk/docs/source/tutorials/code_samples.rst b/depthai_sdk/docs/source/tutorials/code_samples.rst index 1364dc2d4..03f82e918 100644 --- a/depthai_sdk/docs/source/tutorials/code_samples.rst +++ b/depthai_sdk/docs/source/tutorials/code_samples.rst @@ -47,8 +47,7 @@ are presented with code. .. rubric:: NN - :ref:`Age-Gender Inference` - Demonstrates age-gender inference - :ref:`Custom Decode Function` - Demonstrates custom decoding function -- :ref:`Deeplabv3 Person Segmentation` - Demonstrates Deeplabv3 person segmentation -- :ref:`Emotion Recognition` - Demonstrates emotion recognition +- :ref:`Emotion Recognition` - Demonstrates emotion recognition - :ref:`Face Detection RGB` - Run face detection on RGB camera - :ref:`Face Detection Mono` - Run face detection on mono camera - :ref:`Human Pose Estimation` - Run human pose estimation inference diff --git a/depthai_sdk/docs/source/visualizer_formats/example.json b/depthai_sdk/docs/source/visualizer_formats/example.json index 56f270aad..8914791c3 100644 --- a/depthai_sdk/docs/source/visualizer_formats/example.json +++ b/depthai_sdk/docs/source/visualizer_formats/example.json @@ -1,5 +1,4 @@ { - "platform": "pc", "frame_shape": [720, 1280], "config": { "output": { diff --git a/depthai_sdk/docs/source/visualizer_formats/format.json b/depthai_sdk/docs/source/visualizer_formats/format.json index 10d826ad7..5cc0c548a 100644 --- a/depthai_sdk/docs/source/visualizer_formats/format.json +++ b/depthai_sdk/docs/source/visualizer_formats/format.json @@ -1,11 +1,4 @@ { - "platform": { - "type": "string", - "enum": [ - "pc", - "robothub" - ] - }, "frame_shape": { "type": "array", "items": { diff --git a/depthai_sdk/examples/CameraComponent/camera_encode.py b/depthai_sdk/examples/CameraComponent/camera_encode.py new file mode 100644 index 000000000..481a52aec --- /dev/null +++ b/depthai_sdk/examples/CameraComponent/camera_encode.py @@ -0,0 +1,9 @@ +from depthai_sdk import OakCamera + +with OakCamera() as oak: + color = oak.create_camera('color', encode='h265') + + oak.visualize(color.out.encoded, fps=True, scale=2/3) + # By default, it will stream non-encoded frames + oak.visualize(color, fps=True, scale=2/3) + oak.start(blocking=True) diff --git a/depthai_sdk/examples/CameraComponent/preview_all_cameras.py b/depthai_sdk/examples/CameraComponent/preview_all_cameras.py index 3d3257910..9b1b102ec 100644 --- a/depthai_sdk/examples/CameraComponent/preview_all_cameras.py +++ b/depthai_sdk/examples/CameraComponent/preview_all_cameras.py @@ -1,6 +1,6 @@ from depthai_sdk import OakCamera with OakCamera() as oak: - cams = oak.create_all_cameras() - oak.visualize(cams) + cams = oak.create_all_cameras(resolution='max') + oak.visualize(cams, fps=True) oak.start(blocking=True) diff --git a/depthai_sdk/examples/CameraComponent/rotated.py b/depthai_sdk/examples/CameraComponent/rotated.py index d80311036..dbfaf7ec4 100644 --- a/depthai_sdk/examples/CameraComponent/rotated.py +++ b/depthai_sdk/examples/CameraComponent/rotated.py @@ -1,8 +1,6 @@ from depthai_sdk import OakCamera with OakCamera(rotation=90) as oak: - color = oak.create_camera('color', resolution='1080p') - left = oak.create_camera('left', resolution='400p') - right = oak.create_camera('right', resolution='400p') - oak.visualize([color, left, right], fps=True) + all_cams = oak.create_all_cameras() + oak.visualize(all_cams, fps=True) oak.start(blocking=True) diff --git a/depthai_sdk/examples/IMUComponent/imu.py b/depthai_sdk/examples/IMUComponent/imu.py index 9ebafac5a..3db1d2d6d 100644 --- a/depthai_sdk/examples/IMUComponent/imu.py +++ b/depthai_sdk/examples/IMUComponent/imu.py @@ -3,5 +3,7 @@ with OakCamera() as oak: imu = oak.create_imu() imu.config_imu(report_rate=400, batch_report_threshold=5) - oak.visualize(imu.out.main) + # DepthAI viewer should open, and IMU data can be viewed on the right-side panel, + # under "Stats" tab (right of the "Device Settings" tab). + oak.visualize(imu.out.main, visualizer='viewer') oak.start(blocking=True) diff --git a/depthai_sdk/examples/IMUComponent/imu_rerun.py b/depthai_sdk/examples/IMUComponent/imu_rerun.py deleted file mode 100644 index f0ca9c694..000000000 --- a/depthai_sdk/examples/IMUComponent/imu_rerun.py +++ /dev/null @@ -1,30 +0,0 @@ -from depthai_sdk import OakCamera -from depthai_sdk.classes.packets import IMUPacket -import rerun as rr -import subprocess -import depthai as dai - -def callback(packet: IMUPacket): - for d in packet.data: - gyro: dai.IMUReportGyroscope = d.gyroscope - accel: dai.IMUReportAccelerometer = d.acceleroMeter - mag: dai.IMUReportMagneticField = d.magneticField - rot: dai.IMUReportRotationVectorWAcc = d.rotationVector - print(accel.x, accel.y, accel.z) - rr.log_scalar('world/accel_x', accel.x, color=(255,0,0)) - rr.log_scalar('world/accel_y', accel.y, color=(0,255,0)) - rr.log_scalar('world/accel_z', accel.z, color=(0,0,255)) - - -with OakCamera() as oak: - subprocess.Popen(["rerun", "--memory-limit", "200MB"]) - rr.init("Rerun ", spawn=False) - rr.connect() - - - imu = oak.create_imu() - imu.config_imu(report_rate=10, batch_report_threshold=2) - print(oak.device.getConnectedIMU()) - oak.callback(imu, callback=callback) - oak.start(blocking=True) - diff --git a/depthai_sdk/examples/NNComponent/age-gender.py b/depthai_sdk/examples/NNComponent/age-gender.py index d1715933f..241f878b8 100644 --- a/depthai_sdk/examples/NNComponent/age-gender.py +++ b/depthai_sdk/examples/NNComponent/age-gender.py @@ -14,7 +14,7 @@ def callback(packet: TwoStagePacket): gender_str = "Woman" if gender[0] > gender[1] else "Man" visualizer.add_text(f'{gender_str}\nAge: {age}', - bbox=(*det.top_left, *det.bottom_right), + bbox=packet.bbox.get_relative_bbox(det.bbox), position=TextPosition.BOTTOM_RIGHT) frame = visualizer.draw(packet.frame) diff --git a/depthai_sdk/examples/NNComponent/custom_decode.py b/depthai_sdk/examples/NNComponent/custom_decode.py index f627d950e..dcec18a71 100644 --- a/depthai_sdk/examples/NNComponent/custom_decode.py +++ b/depthai_sdk/examples/NNComponent/custom_decode.py @@ -1,13 +1,11 @@ import blobconverter -import cv2 import numpy as np -from depthai import NNData - +import depthai as dai from depthai_sdk import OakCamera -from depthai_sdk.classes import Detections, DetectionPacket +from depthai_sdk.classes import Detections -def decode(nn_data: NNData) -> Detections: +def decode(nn_data: dai.NNData) -> Detections: """ Custom decode function for the NN component. Decode function has to accept NNData argument. The return type should preferably be a class that inherits from depthai_sdk.classes.GenericNNOutput, @@ -18,29 +16,27 @@ def decode(nn_data: NNData) -> Detections: layer = nn_data.getFirstLayerFp16() results = np.array(layer).reshape((1, 1, -1, 7)) dets = Detections(nn_data) - for result in results[0][0]: - if result[2] > 0.5: + if result[2] > 0.3: label = int(result[1]) conf = result[2] bbox = result[3:] - dets.add(label, conf, bbox) + det = dai.ImgDetection() + det.confidence = conf + det.label = label + det.xmin = bbox[0] + det.ymin = bbox[1] + det.xmax = bbox[2] + det.ymax = bbox[3] + dets.detections.append(det) return dets - -def callback(packet: DetectionPacket): - visualizer = packet.visualizer - frame = packet.frame - frame = visualizer.draw(frame) - cv2.imshow('Custom decode function', frame) - - with OakCamera() as oak: color = oak.create_camera('color') - nn_path = blobconverter.from_zoo(name='person-detection-0200', version='2021.4') + nn_path = blobconverter.from_zoo(name='person-detection-0200', version='2021.4', shaves=6) nn = oak.create_nn(nn_path, color, decode_fn=decode) - oak.visualize(nn, callback=callback) + oak.visualize(nn) oak.start(blocking=True) diff --git a/depthai_sdk/examples/NNComponent/deeplabv3_person.py b/depthai_sdk/examples/NNComponent/deeplabv3_person.py deleted file mode 100644 index e641227c5..000000000 --- a/depthai_sdk/examples/NNComponent/deeplabv3_person.py +++ /dev/null @@ -1,11 +0,0 @@ -from depthai_sdk import OakCamera - - -with OakCamera() as oak: - color = oak.create_camera('color', resolution='1080p') - - nn = oak.create_nn('deeplabv3_person', color) - nn.config_nn(resize_mode='letterbox') # Options: 'letterbox', 'crop', 'stretch' - - visualizer = oak.visualize([nn, nn.out.passthrough], fps=True) - oak.start(blocking=True) diff --git a/depthai_sdk/examples/NNComponent/emotion-recognition.py b/depthai_sdk/examples/NNComponent/emotion-recognition.py index 3d44a9f82..43e6bf3e8 100644 --- a/depthai_sdk/examples/NNComponent/emotion-recognition.py +++ b/depthai_sdk/examples/NNComponent/emotion-recognition.py @@ -16,7 +16,7 @@ def callback(packet: TwoStagePacket): emotion_name = emotions[np.argmax(emotion_results)] visualizer.add_text(emotion_name, - bbox=(*det.top_left, *det.bottom_right), + bbox=packet.bbox.get_relative_bbox(det.bbox), position=TextPosition.BOTTOM_RIGHT) visualizer.draw(packet.frame) diff --git a/depthai_sdk/examples/NNComponent/nn_component.py b/depthai_sdk/examples/NNComponent/nn_component.py index d50c28cc6..3bc89fc26 100644 --- a/depthai_sdk/examples/NNComponent/nn_component.py +++ b/depthai_sdk/examples/NNComponent/nn_component.py @@ -4,7 +4,8 @@ color = oak.create_camera('color') # List of models that are supported out-of-the-box by the SDK: # https://docs.luxonis.com/projects/sdk/en/latest/features/ai_models/#sdk-supported-models - nn = oak.create_nn('yolov7tiny_coco_640x352', color) + nn = oak.create_nn('yolov5n_coco_416x416', color) + nn.config_nn(resize_mode='stretch') oak.visualize([nn.out.main], fps=True) oak.visualize(nn.out.passthrough) oak.start(blocking=True) diff --git a/depthai_sdk/examples/NNComponent/spatial_detection.py b/depthai_sdk/examples/NNComponent/spatial_detection.py index bce3ef57d..646963748 100644 --- a/depthai_sdk/examples/NNComponent/spatial_detection.py +++ b/depthai_sdk/examples/NNComponent/spatial_detection.py @@ -15,6 +15,6 @@ calc_algo=dai.SpatialLocationCalculatorAlgorithm.AVERAGE ) - oak.visualize([nn.out.main], fps=True) - oak.visualize(nn.out.passthrough) + oak.visualize(nn.out.main, fps=True) + oak.visualize([nn.out.passthrough, nn.out.spatials]) oak.start(blocking=True) diff --git a/depthai_sdk/examples/PointcloudComponent/pointcloud.py b/depthai_sdk/examples/PointcloudComponent/pointcloud.py index fb741ef01..c746a11e3 100644 --- a/depthai_sdk/examples/PointcloudComponent/pointcloud.py +++ b/depthai_sdk/examples/PointcloudComponent/pointcloud.py @@ -1,23 +1,9 @@ -import cv2 from depthai_sdk import OakCamera -from depthai_sdk.classes.packets import PointcloudPacket, FramePacket -import rerun as rr -import subprocess -import time - -subprocess.Popen(["rerun", "--memory-limit", "200MB"]) -time.sleep(1) # Wait til rerun spins up -rr.init("Rerun ", spawn=False) -rr.connect() - -def callback(packet: PointcloudPacket): - colors = packet.color_frame.getCvFrame()[..., ::-1] # BGR to RGB - rr.log_image('Color Image', colors) - points = packet.points.reshape(-1, 3) - rr.log_points("Pointcloud", points, colors=colors.reshape(-1, 3)) - with OakCamera() as oak: - pcl = oak.create_pointcloud() - oak.callback(pcl, callback=callback) + color = oak.camera('color') + stereo = oak.create_stereo() + stereo.config_stereo(align=color) + pcl = oak.create_pointcloud(stereo=stereo, colorize=color) + oak.visualize(pcl, visualizer='depthai-viewer') oak.start(blocking=True) diff --git a/depthai_sdk/examples/mixed/api_interop.py b/depthai_sdk/examples/mixed/api_interop.py index f5ea3baed..ec8a5cc08 100644 --- a/depthai_sdk/examples/mixed/api_interop.py +++ b/depthai_sdk/examples/mixed/api_interop.py @@ -6,15 +6,12 @@ nn = oak.create_nn('mobilenet-ssd', color) oak.visualize([nn.out.passthrough, nn], fps=True) - # Build the pipeline, connect to the oak, update components. Place interop logic AFTER oak.build() - pipeline = oak.build() - nn.node.setNumInferenceThreads(2) # Configure components' nodes - features = pipeline.create(dai.node.FeatureTracker) # Create new pipeline nodes + features = oak.pipeline.create(dai.node.FeatureTracker) # Create new pipeline nodes color.node.video.link(features.inputImage) - out = pipeline.create(dai.node.XLinkOut) + out = oak.pipeline.create(dai.node.XLinkOut) out.setStreamName('features') features.outputFeatures.link(out.input) diff --git a/depthai_sdk/examples/mixed/collision_avoidance.py b/depthai_sdk/examples/mixed/collision_avoidance.py index 214fdcbdd..eb74b4d30 100644 --- a/depthai_sdk/examples/mixed/collision_avoidance.py +++ b/depthai_sdk/examples/mixed/collision_avoidance.py @@ -1,6 +1,6 @@ from depthai_sdk import OakCamera from depthai_sdk.visualize.configs import StereoColor -from depthai_sdk.classes.packets import DepthPacket +from depthai_sdk.classes.packets import DisparityDepthPacket import math import depthai as dai import cv2 @@ -11,7 +11,7 @@ slc_data = [] -def cb(packet: DepthPacket): +def cb(packet: DisparityDepthPacket): global slc_data fontType = cv2.FONT_HERSHEY_TRIPLEX @@ -56,8 +56,6 @@ def cb(packet: DepthPacket): oak.visualize([stereo], fps=True, callback=cb) - oak.build() - slc = oak.pipeline.create(dai.node.SpatialLocationCalculator) for x in range(15): for y in range(9): diff --git a/depthai_sdk/examples/mixed/packet_callback.py b/depthai_sdk/examples/mixed/packet_callback.py new file mode 100644 index 000000000..6d690d007 --- /dev/null +++ b/depthai_sdk/examples/mixed/packet_callback.py @@ -0,0 +1,45 @@ +from depthai_sdk import OakCamera +from depthai_sdk.classes.packets import FramePacket +from datetime import timedelta +from typing import Dict +import cv2 + +def cb_1(packet: FramePacket): + # Called from main thread, so we can call cv2.imshow + cv2.imshow('Color frames from cb', packet.frame) + +def cb_2(packets: Dict[str, FramePacket]): + print(packets) + # Sycned packets. + ts_color = packets['color'].get_timestamp() + ts_left = packets['left'].get_timestamp() + ts_imu = packets['imu'].get_timestamp() + print(f"---- New synced packets. Diff between color and left: {abs(ts_color-ts_left) / timedelta(milliseconds=1)} ms, color and IMU: {abs(ts_imu-ts_color) / timedelta(milliseconds=1)} ms") + + for name, packet in packets.items(): + print(f'Packet {name}, timestamp: {packet.get_timestamp()}, Seq number: {packet.get_sequence_num()}') + +with OakCamera() as oak: + color = oak.create_camera('color', fps=32) + left = oak.create_camera('left', fps=30) + right = oak.create_camera('right', fps=30) + imu = oak.create_imu() + + oak.callback( + color, # Outputs whose packets we want to receive via callback + callback=cb_1, # Callback function + main_thread=True # Whether to call the callback in the main thread. For OpenCV's imshow to work, it must be called in the main thread. + ) + + cb_handler = oak.callback( + [left, right, color, imu], + callback=cb_2, + main_thread=False # Will be called from a different thread, instead of putting packets into queue and waiting for main thread to pick it up. + ) + # Timestamp syncing all 3 streams. We selected (1000/30) / 2 as threshold_ms, because + # left/right are slower (30FPS), so threshold should be about 16ms. This means SDK will discard some + # color packets (2 per second), but we will have synced frames. + cb_handler.configure_syncing(threshold_ms=int((1000/30) / 2)) + + # oak.show_graph() + oak.start(blocking=True) \ No newline at end of file diff --git a/depthai_sdk/examples/mixed/packet_queue.py b/depthai_sdk/examples/mixed/packet_queue.py new file mode 100644 index 000000000..768967b07 --- /dev/null +++ b/depthai_sdk/examples/mixed/packet_queue.py @@ -0,0 +1,46 @@ +from queue import Empty +from depthai_sdk import OakCamera +from depthai_sdk.classes.packets import FramePacket +from datetime import timedelta +from typing import Dict +import cv2 + +with OakCamera() as oak: + color = oak.create_camera('color', fps=32) + left = oak.create_camera('left', fps=30) + right = oak.create_camera('right', fps=30) + imu = oak.create_imu() + + q1 = oak.queue(color, max_size=5).get_queue() + + # Timestamp syncing all 3 streams. We selected (1000/30) / 2 as threshold_ms, because + # left/right are slower (30FPS), so threshold should be about 16ms. This means SDK will discard some + # color packets (2 per second), but we will have synced frames. + q2 = oak.queue([left, right, color, imu], max_size=5).configure_syncing(threshold_ms=int((1000/30) / 2)).get_queue() + + # oak.show_graph() + oak.start() + + while oak.running(): + oak.poll() + + # This will block until a new packet arrives + p: FramePacket = q1.get(block=True) + cv2.imshow('Video from q1', p.frame) + + try: + packets: Dict[str, FramePacket] = q2.get(block=False) + + ts_color = packets[color].get_timestamp() + ts_left = packets[left].get_timestamp() + ts_imu = packets[imu].get_timestamp() + print(f"---- New synced packets. Diff between color and left: {abs(ts_color-ts_left) / timedelta(milliseconds=1)} ms, color and IMU: {abs(ts_imu-ts_color) / timedelta(milliseconds=1)} ms") + + for name, packet in packets.items(): + print(f'Packet {name}, timestamp: {packet.get_timestamp()}, Seq number: {packet.get_sequence_num()}') + if not hasattr(packet, 'frame'): + continue # IMUPacket doesn't have a frame + cv2.imshow(name, packet.frame) + except Empty: + # q2.get(block=False) will throw Empty exception if there are no new packets + pass \ No newline at end of file diff --git a/depthai_sdk/examples/mixed/speed_calculation.py b/depthai_sdk/examples/mixed/speed_calculation.py index 3380b0533..f029beb72 100644 --- a/depthai_sdk/examples/mixed/speed_calculation.py +++ b/depthai_sdk/examples/mixed/speed_calculation.py @@ -1,13 +1,17 @@ import cv2 from depthai_sdk import OakCamera +from depthai_sdk.classes.packets import TrackerPacket -def callback(packet): - for detection in packet.detections: - print(f'Speed: {detection.speed:.02f} m/s, {detection.speed_kmph:.02f} km/h, {detection.speed_mph:.02f} mph') +def callback(packet: TrackerPacket): + for obj_id, tracklets in packet.tracklets.items(): + if len(tracklets) != 0: + tracklet = tracklets[-1] + if tracklet.speed is not None: + print(f'Speed for object {obj_id}: {tracklet.speed:.02f} m/s, {tracklet.speed_kmph:.02f} km/h, {tracklet.speed_mph:.02f} mph') - frame = packet.visualizer.draw(packet.frame) + frame = packet.visualizer.draw(packet.decode()) cv2.imshow('Speed estimation', frame) diff --git a/depthai_sdk/examples/mixed/switch_between_models.py b/depthai_sdk/examples/mixed/switch_between_models.py index 8619ec892..9ae8332a1 100644 --- a/depthai_sdk/examples/mixed/switch_between_models.py +++ b/depthai_sdk/examples/mixed/switch_between_models.py @@ -3,7 +3,7 @@ import depthai as dai import cv2 -# We use callback, so we only have cv2 window for all models +# We use callback, so we only have cv2 window for both models def cb(packet: DetectionPacket): frame = packet.visualizer.draw(packet.frame) cv2.imshow('Frame', frame) @@ -32,13 +32,18 @@ def cb(packet: DetectionPacket): # We can have multiple models here, not just 2 object detection models nn1 = oak.create_nn('yolov6nr3_coco_640x352', input=script.outputs['out1']) + nn1.config_nn(resize_mode='stretch') # otherwise, BB mappings will be incorrect nn2 = oak.create_nn('mobilenet-ssd', input=script.outputs['out2']) + nn2.config_nn(resize_mode='stretch') # otherwise, BB mappings will be incorrect + # We will send "switch" message via XLinkIn xin = oak.pipeline.create(dai.node.XLinkIn) xin.setStreamName('switch') xin.out.link(script.inputs['switch']) + # We don't want syncing, we just want either of the model packets in the callback oak.visualize([nn1, nn2], fps=True, callback=cb) + oak.visualize([nn1.out.passthrough, nn2.out.passthrough], fps=True) # oak.show_graph() diff --git a/depthai_sdk/examples/mixed/sync_multiple_outputs.py b/depthai_sdk/examples/mixed/sync_multiple_outputs.py index a61c5f0d0..64197a018 100644 --- a/depthai_sdk/examples/mixed/sync_multiple_outputs.py +++ b/depthai_sdk/examples/mixed/sync_multiple_outputs.py @@ -3,15 +3,17 @@ from depthai_sdk import OakCamera with OakCamera() as oak: - color = oak.create_camera('color', encode='h264', name='color') - nn = oak.create_nn('mobilenet-ssd', color, name='mobilenet') - nn2 = oak.create_nn('face-detection-retail-0004', color, name='face-detection') - # oak.visualize([nn.out.main, nn.out.passthrough]) - # oak.visualize(nn.out.spatials, scale=1 / 2) + color = oak.create_camera('color', encode='h264') + nn = oak.create_nn('mobilenet-ssd', color) + nn2 = oak.create_nn('face-detection-retail-0004', color) + def cb(msgs: Dict): - print('synced!', msgs) + print('====== New synced packets! ======') + for name, packet in msgs.items(): + print(f"Packet '{name}' with timestamp:", packet.get_timestamp(), 'Seq number:', packet.get_sequence_num(), 'Object', packet) - oak.sync([color.out.encoded, nn.out.passthrough, nn.out.main, nn2.out.main], cb) + oak.callback([nn.out.passthrough, nn.out.encoded, nn2.out.encoded], cb) \ + .configure_syncing(enable_sync=True, threshold_ms=30) # oak.show_graph() oak.start(blocking=True) diff --git a/depthai_sdk/examples/recording/encode.py b/depthai_sdk/examples/recording/encode.py index bf098931e..44b235d12 100644 --- a/depthai_sdk/examples/recording/encode.py +++ b/depthai_sdk/examples/recording/encode.py @@ -9,7 +9,9 @@ nn = oak.create_nn('mobilenet-ssd', color, spatial=stereo) # Sync & save all (encoded) streams - oak.record([color.out.encoded, left.out.encoded, right.out.encoded], './record', RecordType.VIDEO) + oak.record([color.out.encoded, left.out.encoded, right.out.encoded], './record', RecordType.VIDEO) \ + .configure_syncing(enable_sync=True, threshold_ms=50) + oak.visualize([color.out.encoded], fps=True) oak.start(blocking=True) diff --git a/depthai_sdk/examples/recording/encoder_preview.py b/depthai_sdk/examples/recording/encoder_preview.py index 52fa83ff0..f519b808f 100644 --- a/depthai_sdk/examples/recording/encoder_preview.py +++ b/depthai_sdk/examples/recording/encoder_preview.py @@ -3,18 +3,21 @@ from depthai_sdk import OakCamera from depthai_sdk.recorders.video_writers.av_writer import AvWriter -rec = AvWriter(Path('./'), 'color', 'mjpeg', fps=30) +fourcc = 'h264' # Can be 'mjpeg', 'h264', or 'hevc' +rec = AvWriter(Path('./'), 'color', fourcc=fourcc) def save_raw_mjpeg(packet): rec.write(packet.msg) - with OakCamera() as oak: - color = oak.create_camera('color', encode='MJPEG', fps=30) + color = oak.create_camera('color', encode=fourcc, fps=20) + + # Stream encoded video packets to host. For visualization, we decode them + # on the host side, and for callback we write encoded frames directly to disk. + oak.visualize(color.out.encoded, scale=2 / 3, fps=True) + oak.callback(color.out.encoded, callback=save_raw_mjpeg) - oak.visualize(color, scale=2 / 3, fps=True) - oak.callback(color, callback=save_raw_mjpeg) oak.start(blocking=True) rec.close() diff --git a/depthai_sdk/examples/recording/mcap_record_imu.py b/depthai_sdk/examples/recording/mcap_record_imu.py index ce865d8fd..084f108e0 100644 --- a/depthai_sdk/examples/recording/mcap_record_imu.py +++ b/depthai_sdk/examples/recording/mcap_record_imu.py @@ -8,6 +8,7 @@ imu = oak.create_imu() imu.config_imu(report_rate=500, batch_report_threshold=5) + # Note that for MCAP recording, user has to have ROS installed recorder = oak.record([imu, stereo.out.depth], './', RecordType.MCAP) oak.visualize([left, stereo]) diff --git a/depthai_sdk/examples/recording/record_all.py b/depthai_sdk/examples/recording/record_all.py new file mode 100644 index 000000000..3e5f7e9a2 --- /dev/null +++ b/depthai_sdk/examples/recording/record_all.py @@ -0,0 +1,22 @@ +from depthai_sdk import OakCamera, RecordType +from depthai_sdk.args_parser import ArgsParser +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument('--recordStreams', action='store_true', help="Record frames to file") +parser.add_argument('--saveStreamsTo', type=str, help="Save frames to directory", default="./record") +args= ArgsParser.parseArgs(parser=parser) + +with OakCamera(args=args) as oak: + cams = oak.create_all_cameras() + left = oak.camera('left') + right = oak.camera('right') + if left is not None and right is not None: + stereo = oak.create_stereo(left=left, right=right) + oak.visualize(stereo) + # Sync & save all streams + if args["recordStreams"]: + oak.record(cams, args["saveStreamsTo"], RecordType.VIDEO_LOSSLESS).configure_syncing(True, 50) + oak.visualize(cams, fps=True) + + oak.start(blocking=True) diff --git a/depthai_sdk/examples/recording/rosbag_record.py b/depthai_sdk/examples/recording/rosbag_record.py index 995e84260..4f462dd59 100644 --- a/depthai_sdk/examples/recording/rosbag_record.py +++ b/depthai_sdk/examples/recording/rosbag_record.py @@ -1,15 +1,18 @@ from depthai_sdk import OakCamera, RecordType with OakCamera() as oak: + color = oak.create_camera('color', encode='jpeg', fps=30) left = oak.create_camera('left', resolution='800p', encode='jpeg', fps=30) right = oak.create_camera('right', resolution='800p', encode='jpeg', fps=30) stereo = oak.create_stereo(left=left, right=right) + stereo.config_stereo(align=color) imu = oak.create_imu() imu.config_imu(report_rate=400, batch_report_threshold=5) - # DB3 / ROSBAG - oak.record([left.out.encoded, right.out.encoded, stereo.out.depth, imu], 'records', record_type=RecordType.DB3) + # DB3 / ROSBAG. ROSBAG doesn't require having ROS installed, while DB3 does. + record_components = [left.out.encoded, color.out.encoded, right.out.encoded, stereo.out.depth, imu] + oak.record(record_components, 'record', record_type=RecordType.ROSBAG) - # Record left only - oak.visualize(left) + # Visualize only color stream + oak.visualize(color.out.encoded) oak.start(blocking=True) diff --git a/depthai_sdk/requirements.txt b/depthai_sdk/requirements.txt index 162165241..01580b840 100644 --- a/depthai_sdk/requirements.txt +++ b/depthai_sdk/requirements.txt @@ -10,3 +10,4 @@ marshmallow==3.17.0 xmltodict sentry-sdk==1.21.0 depthai-pipeline-graph==0.0.5 +ahrs==0.3.1 diff --git a/depthai_sdk/setup.py b/depthai_sdk/setup.py index 9bda36cb4..6891734b6 100644 --- a/depthai_sdk/setup.py +++ b/depthai_sdk/setup.py @@ -9,7 +9,7 @@ setup( name='depthai-sdk', - version='1.12.1', + version='1.13.0', description='This package provides an abstraction of the DepthAI API library.', long_description=io.open("README.md", encoding="utf-8").read(), long_description_content_type="text/markdown", diff --git a/depthai_sdk/src/depthai_sdk/__init__.py b/depthai_sdk/src/depthai_sdk/__init__.py index 9412b350b..516a1e620 100644 --- a/depthai_sdk/src/depthai_sdk/__init__.py +++ b/depthai_sdk/src/depthai_sdk/__init__.py @@ -3,15 +3,14 @@ from depthai_sdk.constants import CV2_HAS_GUI_SUPPORT from depthai_sdk.logger import set_logging_level from depthai_sdk.oak_camera import OakCamera -from depthai_sdk.oak_device import OakDevice from depthai_sdk.previews import * from depthai_sdk.record import * from depthai_sdk.replay import * from depthai_sdk.utils import * -from depthai_sdk.utils import _create_config, get_config_field +from depthai_sdk.utils import _create_config, get_config_field, _sentry_before_send from depthai_sdk.visualize import * -__version__ = '1.12.1' +__version__ = '1.13.0' def __import_sentry(sentry_dsn: str) -> None: @@ -23,21 +22,13 @@ def __import_sentry(sentry_dsn: str) -> None: traces_sample_rate=1.0, release=f'depthai_sdk@{__version__}', with_locals=False, + before_send=_sentry_before_send ) except: pass -config_exists = False -# Check if sentry is enabled -try: - sentry_status = get_config_field('sentry') - config_exists = True -except FileNotFoundError: - sentry_status = False - -if config_exists and sentry_status: - sentry_dsn = get_config_field('sentry_dsn') +sentry_dsn = get_config_field('sentry_dsn') +sentry_status = get_config_field('sentry') +if sentry_dsn and sentry_status: __import_sentry(sentry_dsn) -elif not config_exists: - _create_config() diff --git a/depthai_sdk/src/depthai_sdk/args_parser.py b/depthai_sdk/src/depthai_sdk/args_parser.py index e1e5e9958..96a190ab4 100644 --- a/depthai_sdk/src/depthai_sdk/args_parser.py +++ b/depthai_sdk/src/depthai_sdk/args_parser.py @@ -92,7 +92,8 @@ def parseArgs(parser: argparse.ArgumentParser = None) -> Dict[str, Any]: parser.add_argument("-monof", "--monoFps", type=float, help="Mono cam fps: max 60.0 for H:720 or H:800, max 120.0 for H:400. Default: %(default)s") parser.add_argument('-fps', '--fps', type=float, help='Camera FPS applied to all sensors') - + parser.add_argument('-defaultRes', '--defaultResolution', type=str, choices=[None, 'min', 'max'], + help="Default resolution preset for the cameras that don't have a specific resolution set. Default: %(default)s") # ColorCamera ISP values parser.add_argument('-isp', '--ispScale', type=_commaSeparated(None), help="Sets ColorCamera's ISP scale") parser.add_argument('-sharpness', '--sharpness', type=_checkRange(0, 4), diff --git a/depthai_sdk/src/depthai_sdk/classes/enum.py b/depthai_sdk/src/depthai_sdk/classes/enum.py index dffc3ada1..5c23cdbbf 100644 --- a/depthai_sdk/src/depthai_sdk/classes/enum.py +++ b/depthai_sdk/src/depthai_sdk/classes/enum.py @@ -1,6 +1,7 @@ from enum import IntEnum from typing import Union + class ResizeMode(IntEnum): """ If NN input frame is in different aspect ratio than what the model expect, we have 3 different @@ -10,7 +11,7 @@ class ResizeMode(IntEnum): LETTERBOX = 0 # Preserves full FOV by padding/letterboxing, but smaller frame means less features which might decrease NN accuracy STRETCH = 1 # Preserves full FOV, but frames are stretched to match the FOV, which might decrease NN accuracy CROP = 2 # Crops some FOV to match the required FOV, then scale. No potential NN accuracy decrease. - FULL_CROP = 3 # No scaling is done, cropping is applied and FOV can be reduced by a lot + FULL_CROP = 3 # No scaling is done, cropping is applied and FOV can be reduced by a lot # Parse string to ResizeMode @staticmethod @@ -30,4 +31,3 @@ def parse(mode: Union[str, 'ResizeMode']) -> 'ResizeMode': else: raise ValueError(f"Unknown resize mode {mode}! 'Options (case insensitive):" \ "STRETCH, CROP, LETTERBOX. Using default LETTERBOX mode.") - diff --git a/depthai_sdk/src/depthai_sdk/classes/nn_results.py b/depthai_sdk/src/depthai_sdk/classes/nn_results.py index 31282c98a..9dc8af9e1 100644 --- a/depthai_sdk/src/depthai_sdk/classes/nn_results.py +++ b/depthai_sdk/src/depthai_sdk/classes/nn_results.py @@ -5,10 +5,54 @@ These will be integrated into depthai-core, bonus points for on-device decoding of some popular models. """ from dataclasses import dataclass -from typing import List, Tuple, Any +from datetime import timedelta +from typing import List, Tuple, Any, Union, Optional +import depthai as dai import numpy as np -from depthai import NNData, ImgDetection + +from depthai_sdk.visualize.bbox import BoundingBox + + +@dataclass +class Detection: + # Original ImgDetection + img_detection: Union[None, dai.ImgDetection, dai.SpatialImgDetection] + label_str: str + confidence: float + color: Tuple[int, int, int] + bbox: BoundingBox + angle: Optional[int] + ts: Optional[timedelta] + + @property + def top_left(self) -> Tuple[float, float]: + return self.bbox.top_left() + + @property + def bottom_right(self) -> Tuple[float, float]: + return self.bbox.bottom_right() + + +@dataclass +class TrackingDetection(Detection): + tracklet: dai.Tracklet + filtered_2d: BoundingBox + filtered_3d: dai.Point3f + speed: Union[float, None] # m/s + + @property + def speed_kmph(self) -> float: + return self.speed * 3.6 + + @property + def speed_mph(self) -> float: + return self.speed * 2.236936 + + +@dataclass +class TwoStageDetection(Detection): + nn_data: dai.NNData class GenericNNOutput: @@ -16,9 +60,20 @@ class GenericNNOutput: Generic NN output, to be used for higher-level abstractions (eg. automatic visualization of results). """ - def __init__(self, nn_data: NNData): + def __init__(self, nn_data: Union[dai.NNData, dai.ImgDetections, dai.SpatialImgDetections]): self.nn_data = nn_data + def getTimestamp(self) -> timedelta: + return self.nn_data.getTimestamp() + + def getSequenceNum(self) -> int: + return self.nn_data.getSequenceNum() + + +@dataclass +class ExtendedImgDetection(dai.ImgDetection): + angle: int + # First we have Object detection results, which are already standarized with dai.ImgDetections @@ -28,25 +83,12 @@ class Detections(GenericNNOutput): Detection results containing bounding boxes, labels and confidences. Optionally can contain rotation angles. """ - def __init__(self, nn_data: NNData, is_rotated: bool = False): + def __init__(self, + nn_data: Union[dai.NNData, dai.ImgDetections, dai.SpatialImgDetections], + is_rotated: bool = False): GenericNNOutput.__init__(self, nn_data) - - self.detections = [] + self.detections: List[ExtendedImgDetection] = [] self.is_rotated = is_rotated - if is_rotated: - self.angles = [] - - def add(self, label: int, confidence: float, bbox: Tuple[float, ...], angle: int = 0) -> None: - det = ImgDetection() - det.label = label - det.confidence = confidence - det.xmin = bbox[0] - det.ymin = bbox[1] - det.xmax = bbox[2] - det.ymax = bbox[3] - self.detections.append(det) - if self.is_rotated: - self.angles.append(angle) @dataclass @@ -58,7 +100,7 @@ class SemanticSegmentation(GenericNNOutput): # In core, extend from NNData """ mask: List[np.ndarray] # 2D np.array for each class - def __init__(self, nn_data: NNData, mask: List[np.ndarray]): + def __init__(self, nn_data: dai.NNData, mask: List[np.ndarray]): super().__init__(nn_data) self.mask = mask @@ -72,7 +114,7 @@ class ImgLandmarks(GenericNNOutput): # In core, extend from NNData """ def __init__(self, - nn_data: NNData, + nn_data: dai.NNData, landmarks: List[List[Any]] = None, landmarks_indices: List[List[int]] = None, pairs: List[Tuple[int, int]] = None, @@ -93,6 +135,5 @@ class InstanceSegmentation(GenericNNOutput): masks: List[np.ndarray] # 2D np.array for each instance labels: List[int] # Class label for each instance - def __init__(self, nn_data: NNData, masks: List[np.ndarray], labels: List[int]): + def __init__(self, nn_data: dai.NNData, masks: List[np.ndarray], labels: List[int]): raise NotImplementedError('Instance segmentation not yet implemented') - super().__init__(nn_data) diff --git a/depthai_sdk/src/depthai_sdk/classes/output_config.py b/depthai_sdk/src/depthai_sdk/classes/output_config.py deleted file mode 100644 index 77a6144a9..000000000 --- a/depthai_sdk/src/depthai_sdk/classes/output_config.py +++ /dev/null @@ -1,211 +0,0 @@ -import os -from abc import abstractmethod -from pathlib import Path -from typing import Optional, Callable, List, Union - -import depthai as dai -from depthai_sdk.oak_outputs.syncing import SequenceNumSync -from depthai_sdk.oak_outputs.xout.xout_base import XoutBase -from depthai_sdk.oak_outputs.xout.xout_depth import XoutDepth -from depthai_sdk.oak_outputs.xout.xout_frames import XoutFrames -from depthai_sdk.record import Record -from depthai_sdk.recorders.video_recorder import VideoRecorder -from depthai_sdk.trigger_action.actions.abstract_action import Action -from depthai_sdk.trigger_action.actions.record_action import RecordAction -from depthai_sdk.trigger_action.trigger_action import TriggerAction -from depthai_sdk.trigger_action.triggers.abstract_trigger import Trigger -from depthai_sdk.visualize.visualizer import Visualizer - -def find_new_name(name: str, names: List[str]): - while True: - arr = name.split(' ') - num = arr[-1] - if num.isnumeric(): - arr[-1] = str(int(num) + 1) - name = " ".join(arr) - else: - name = f"{name} 2" - if name not in names: - return name - -class BaseConfig: - @abstractmethod - def setup(self, pipeline: dai.Pipeline, device: dai.Device, names: List[str]) -> List[XoutBase]: - raise NotImplementedError() - - -class OutputConfig(BaseConfig): - """ - Saves callbacks/visualizers until the device is fully initialized. I'll admit it's not the cleanest solution. - """ - - def __init__(self, output: Callable, - callback: Callable, - visualizer: Visualizer = None, - visualizer_enabled: bool = False, - record_path: Optional[str] = None): - self.output = output # Output of the component (a callback) - self.callback = callback # Callback that gets called after syncing - self.visualizer = visualizer - self.visualizer_enabled = visualizer_enabled - self.record_path = record_path - - def setup(self, pipeline: dai.Pipeline, device, names: List[str]) -> List[XoutBase]: - xoutbase: XoutBase = self.output(pipeline, device) - xoutbase.setup_base(self.callback) - - if xoutbase.name in names: # Stream name already exist, append a number to it - xoutbase.name = find_new_name(xoutbase.name, names) - names.append(xoutbase.name) - - recorder = None - if self.record_path: - recorder = VideoRecorder() - - if isinstance(xoutbase, XoutDepth): - raise NotImplementedError('Depth recording is not implemented yet.' - 'Please use OakCamera.record() instead.') - - recorder.update(Path(self.record_path), device, [xoutbase]) - - if self.visualizer: - xoutbase.setup_visualize(visualizer=self.visualizer, - visualizer_enabled=self.visualizer_enabled, - name=xoutbase.name) - - if self.record_path: - xoutbase.setup_recorder(recorder=recorder) - - return [xoutbase] - - -class RecordConfig(BaseConfig): - def __init__(self, outputs: List[Callable], rec: Record): - self.outputs = outputs - self.rec = rec - - def setup(self, pipeline: dai.Pipeline, device: dai.Device, _) -> List[XoutBase]: - xouts: List[XoutFrames] = [] - for output in self.outputs: - xoutbase: XoutFrames = output(pipeline, device) - xoutbase.setup_base(None) - xouts.append(xoutbase) - - self.rec.setup_base(None) - self.rec.start(device, xouts) - - return [self.rec] - - -class RosStreamConfig(BaseConfig): - outputs: List[Callable] - ros = None - - def __init__(self, outputs: List[Callable]): - self.outputs = outputs - - def setup(self, pipeline: dai.Pipeline, device, names: List[str]) -> List[XoutBase]: - xouts: List[XoutFrames] = [] - for output in self.outputs: - xoutbase: XoutFrames = output(pipeline, device) - xoutbase.setup_base(None) - xouts.append(xoutbase) - - envs = os.environ - if 'ROS_VERSION' not in envs: - raise Exception('ROS installation not found! Please install or source the ROS you would like to use.') - - version = envs['ROS_VERSION'] - if version == '1': - raise Exception('ROS1 publsihing is not yet supported!') - from depthai_sdk.integrations.ros.ros1_streaming import Ros1Streaming - self.ros = Ros1Streaming() - elif version == '2': - from depthai_sdk.integrations.ros.ros2_streaming import Ros2Streaming - self.ros = Ros2Streaming() - else: - raise Exception(f"ROS version '{version}' not recognized! Should be either '1' or '2'") - - self.ros.update(device, xouts) - return [self] - - def new_msg(self, name, msg): - self.ros.new_msg(name, msg) - - def check_queue(self, block): - pass # No queues - - def start_fps(self): - pass - - # def is_ros1(self) -> bool: - # try: - # import rospy - # return True - # except: - # return False - # - # def is_ros2(self): - # try: - # import rclpy - # return True - # except: - # return False - - -class SyncConfig(BaseConfig, SequenceNumSync): - def __init__(self, outputs: List[Callable], callback: Callable): - self.outputs = outputs - self.callback = callback - - SequenceNumSync.__init__(self, len(outputs)) - - self.packets = dict() - - def new_packet(self, packet): - # print('new packet', packet, packet.name, 'seq num',packet.imgFrame.getSequenceNum()) - synced = self.sync( - packet.msg.getSequenceNum(), - packet.name, - packet - ) - if synced: - self.callback(synced) - - def setup(self, pipeline: dai.Pipeline, device: dai.Device, _) -> List[XoutBase]: - xouts = [] - for output in self.outputs: - xoutbase: XoutBase = output(pipeline, device) - xoutbase.setup_base(self.new_packet) - xouts.append(xoutbase) - - return xouts - - -class TriggerActionConfig(BaseConfig): - def __init__(self, trigger: Trigger, action: Union[Callable, Action]): - self.trigger = trigger - self.action = Action(None, action) if isinstance(action, Callable) else action - - def setup(self, pipeline: dai.Pipeline, device, _) -> List[XoutBase]: - controller = TriggerAction(self.trigger, self.action) - - trigger_xout: XoutBase = self.trigger.input(pipeline, device) - trigger_xout.setup_base(controller.new_packet_trigger) - # without setting visualizer up, XoutNnResults.on_callback() won't work - trigger_xout.setup_visualize(visualizer=Visualizer(), name=trigger_xout.name, visualizer_enabled=False) - - if isinstance(self.action, Callable): - return [trigger_xout] - - action_xouts = [] - if self.action.inputs: - for output in self.action.inputs: - xout: XoutBase = output(pipeline, device) - xout.setup_base(controller.new_packet_action) - action_xouts.append(xout) - - if isinstance(self.action, RecordAction): - self.action.setup(device, action_xouts) # creates writers for VideoRecorder() - - return [trigger_xout] + action_xouts diff --git a/depthai_sdk/src/depthai_sdk/classes/packet_handlers.py b/depthai_sdk/src/depthai_sdk/classes/packet_handlers.py new file mode 100644 index 000000000..6b2a08ae4 --- /dev/null +++ b/depthai_sdk/src/depthai_sdk/classes/packet_handlers.py @@ -0,0 +1,346 @@ +import logging +import os +from abc import abstractmethod +from queue import Queue, Empty +from typing import Optional, Callable, List, Union, Dict + +import depthai as dai + +from depthai_sdk.classes.packets import BasePacket +from depthai_sdk.components.component import Component, ComponentOutput +from depthai_sdk.oak_outputs.fps import FPS +from depthai_sdk.oak_outputs.syncing import TimestampSync +from depthai_sdk.oak_outputs.xout.xout_base import XoutBase, ReplayStream +from depthai_sdk.oak_outputs.xout.xout_frames import XoutFrames +from depthai_sdk.record import Record +from depthai_sdk.trigger_action.actions.abstract_action import Action +from depthai_sdk.trigger_action.actions.record_action import RecordAction +from depthai_sdk.trigger_action.trigger_action import TriggerAction +from depthai_sdk.trigger_action.triggers.abstract_trigger import Trigger +from depthai_sdk.visualize.visualizer import Visualizer + + +class BasePacketHandler: + def __init__(self, main_thread=False): + self.fps = FPS() + self.queue = Queue(2) if main_thread else None + self.outputs: List[ComponentOutput] + self.sync = None + + self._packet_names = {} # Check for duplicate packet name, raise error if found (user error) + + @abstractmethod + def setup(self, pipeline: dai.Pipeline, device: dai.Device, xout_streams: Dict[str, List]): + raise NotImplementedError() + + def get_fps(self) -> float: + return self.fps.fps() + + def _new_packet_callback(self, packet: BasePacket): + """ + Callback from XoutBase. Don't override it. Does FPS counting and calls new_packet(). + """ + if self.sync is not None: + packet = self.sync.sync(packet.get_timestamp(), packet.name, packet) + if packet is None: + return + + self.fps.next_iter() + if self.queue: + if self.queue.full(): + self.queue.get() # Remove oldest packet + self.queue.put(packet) + else: + self.new_packet(packet) + + def configure_syncing(self, + enable_sync: bool = True, + threshold_ms: int = 17): + """ + If multiple outputs are used, then PacketHandler can do timestamp syncing of multiple packets + before calling new_packet(). + Args: + enable_sync: If True, then syncing is enabled. + threshold_ms: Maximum time difference between packets in milliseconds. + """ + if enable_sync: + if len(self.outputs) < 2: + logging.error('Syncing requires at least 2 outputs! Skipping syncing.') + return + self.sync = TimestampSync(len(self.outputs), threshold_ms) + + def _poll(self): + """ + Called from main thread. + """ + if self.queue: + try: + packet = self.queue.get_nowait() + self.new_packet(packet) + except Empty: + pass + + @abstractmethod + def new_packet(self, packet): + raise NotImplementedError() + + def close(self): + """ + Used as a cleanup method (eg. close recording), other classes can override it. + """ + pass + + def _save_outputs(self, output: Union[List, ComponentOutput, Component]): + if not isinstance(output, List): + output = [output] + + for i in range(len(output)): + if isinstance(output[i], Component): + # Select default (main) output of the component + output[i] = output[i].out.main + + self.outputs = output + + def _create_xout(self, + pipeline: dai.Pipeline, + xout: XoutBase, + xout_streams: Dict, + custom_callback: Callable = None, + custom_packet_postfix: str = None): + # Check for duplicate packet name, raise error if found (user error) + if custom_packet_postfix: + xout.set_packet_name_postfix(custom_packet_postfix) + + name = xout.get_packet_name() + if name in self._packet_names: + raise ValueError( + f'User specified duplicate packet name "{name}"! Please specify unique names (or leave empty) for each component output.') + self._packet_names[name] = True + + # Assign which callback to call when packet is prepared + xout.new_packet_callback = custom_callback or self._new_packet_callback + + for xstream in xout.xstreams(): + if xstream.name not in xout_streams: + xout_streams[xstream.name] = [] + if not isinstance(xstream, ReplayStream): + xlink = pipeline.createXLinkOut() + xlink.setStreamName(xstream.name) + xstream.stream.link(xlink.input) + xout_streams[xstream.name].append(xout.device_msg_callback) + + +class VisualizePacketHandler(BasePacketHandler): + def __init__(self, + outputs, + visualizer: Visualizer, + callback: Callable = None, + record_path: Optional[str] = None, + main_thread: bool = True, + ): + self._save_outputs(outputs) + + if 1 < len(self.outputs) and record_path is not None: + raise Exception('Recording multiple streams is not supported! ' + 'Call oak.visualize(out, record_path="vid.mp4") for each stream separately') + + self.callback = callback # Callback that gets called after syncing + self.visualizer = visualizer + self.record_path = record_path + self.recorder = None + # Main thread: if opencv visualizer, then we need to poll it + super().__init__(main_thread) + + def setup(self, pipeline: dai.Pipeline, device: dai.Device, xout_streams: Dict[str, List]): + for output in self.outputs: + xout: XoutBase = output(device) + self._create_xout(pipeline, xout, xout_streams) + + def new_packet(self, packet: BasePacket): + # Create visualizer objects for the visualizer. These objects will then be visualized + # by the selected visualizer + packet.prepare_visualizer_objects(self.visualizer) + + if self.callback: + # Add self.visualizer to packet attributes + packet.visualizer = self.visualizer + self.callback(packet) + else: + self.visualizer.show(packet) + + if self.recorder: + self.recorder.write(packet) + + def close(self): + self.visualizer.close() + + +class RecordPacketHandler(BasePacketHandler): + def __init__(self, outputs, recorder: Record): + self._save_outputs(outputs) + self.recorder = recorder + super().__init__() + + def setup(self, pipeline: dai.Pipeline, device: dai.Device, xout_streams: Dict[str, List]): + xouts: List[XoutFrames] = [] + for output in self.outputs: + xout = output(device) + xouts.append(xout) + self._create_xout(pipeline, xout, xout_streams) + + self.recorder.start(device, xouts) + + def new_packet(self, packet: BasePacket): + self.recorder.write(packet) + + def close(self): + self.recorder.close() + + +class CallbackPacketHandler(BasePacketHandler): + def __init__(self, outputs, callback: Callable, main_thread=False): + self._save_outputs(outputs) + self.callback = callback + super().__init__(main_thread) + + def setup(self, pipeline: dai.Pipeline, device: dai.Device, xout_streams: Dict[str, List]): + for output in self.outputs: + xout = output(device) + self._create_xout(pipeline, xout, xout_streams) + + def new_packet(self, packet): + self.callback(packet) + + +class QueuePacketHandler(BasePacketHandler): + def __init__(self, outputs, max_size: int): + super().__init__() + self._save_outputs(outputs) + self.queue = Queue(max_size) + + def get_queue(self) -> Queue: + return self.queue + + def setup(self, pipeline: dai.Pipeline, device: dai.Device, xout_streams: Dict[str, List]): + for output in self.outputs: + xout = output(device) + self._create_xout(pipeline, xout, xout_streams) + + def configure_syncing(self, + enable_sync: bool = True, + threshold_ms: int = 17) -> 'QueuePacketHandler': + """ + If multiple outputs are used, then PacketHandler can do timestamp syncing of multiple packets + before calling new_packet(). + Args: + enable_sync: If True, then syncing is enabled. + threshold_ms: Maximum time difference between packets in milliseconds. + """ + super().configure_syncing(enable_sync, threshold_ms) + return self + + def new_packet(self, packet): + # It won't be called, we just added this function to satisfy the abstract class + pass + + +class RosPacketHandler(BasePacketHandler): + def __init__(self, outputs): + super().__init__() + self._save_outputs(outputs) + + envs = os.environ + if 'ROS_VERSION' not in envs: + raise Exception('ROS installation not found! Please install or source the ROS you would like to use.') + + version = envs['ROS_VERSION'] + if version == '1': + raise Exception('ROS1 publsihing is not yet supported!') + from depthai_sdk.integrations.ros.ros1_streaming import Ros1Streaming + self.ros = Ros1Streaming() + elif version == '2': + from depthai_sdk.integrations.ros.ros2_streaming import Ros2Streaming + self.ros = Ros2Streaming() + else: + raise Exception(f"ROS version '{version}' not recognized! Should be either '1' or '2'") + + def setup(self, pipeline: dai.Pipeline, device: dai.Device, xout_streams: Dict[str, List]): + xouts = [] + for output in self.outputs: + xout = output(device) + self._create_xout(pipeline, xout, xout_streams) + xouts.append(xout) + + self.ros.update(device, xouts) + + def new_packet(self, packet): + # self.ros.new_msg(name, msg) + # TODO: implement + pass + + # def is_ros1(self) -> bool: + # try: + # import rospy + # return True + # except: + # return False + # + # def is_ros2(self): + # try: + # import rclpy + # return True + # except: + # return False + + +class TriggerActionPacketHandler(BasePacketHandler): + def __init__(self, trigger: Trigger, action: Union[Callable, Action]): + super().__init__() + self.trigger = trigger + self.action = Action(None, action) if isinstance(action, Callable) else action + self.controller = TriggerAction(self.trigger, self.action) + + def setup(self, pipeline: dai.Pipeline, device: dai.Device, xout_streams: Dict[str, List]): + trigger_xout: XoutBase = self.trigger.input(device) + self._create_xout(pipeline=pipeline, + xout=trigger_xout, + xout_streams=xout_streams, + custom_callback=self.controller.new_packet_trigger, + custom_packet_postfix='trigger') + + if isinstance(self.action, Callable): + self._save_outputs([trigger_xout]) + return + + action_xouts = [] + if self.action.inputs: + for output in self.action.inputs: + xout: XoutBase = output(device) + xout.new_packet_callback = self.controller.new_packet_action + self._create_xout(pipeline=pipeline, + xout=xout, + xout_streams=xout_streams, + custom_callback=self.controller.new_packet_action, + custom_packet_postfix='action') + action_xouts.append(xout) + + if isinstance(self.action, RecordAction): + self.action.setup(device, action_xouts) # creates writers for VideoRecorder() + + self._save_outputs([trigger_xout] + action_xouts) + + def new_packet(self, packet): + pass + + +class StreamPacketHandler(BasePacketHandler): + """ + TODO. API: + oak.stream_rtsp([color, left, right], port=8888) + oak.stream_webrtc(color, port=8881) + + Creates a server and just sends forward + the frames. Doesn't use any queues. + + """ + pass diff --git a/depthai_sdk/src/depthai_sdk/classes/packets.py b/depthai_sdk/src/depthai_sdk/classes/packets.py index 0fa36c904..931821b0f 100644 --- a/depthai_sdk/src/depthai_sdk/classes/packets.py +++ b/depthai_sdk/src/depthai_sdk/classes/packets.py @@ -1,58 +1,65 @@ -from typing import Tuple, List, Union, Optional +from abc import ABC, abstractmethod +from datetime import timedelta +from typing import Sequence, Tuple, List, Union, Optional, Dict, Callable import depthai as dai import numpy as np +from depthai_sdk.classes import ImgLandmarks, SemanticSegmentation +from depthai_sdk.classes.nn_results import Detection, TrackingDetection, TwoStageDetection +from depthai_sdk.visualize.bbox import BoundingBox +from depthai_sdk.visualize.configs import StereoColor, TextPosition +from depthai_sdk.visualize.visualizer import Visualizer + try: import cv2 except ImportError: cv2 = None -class _Detection: - # Original ImgDetection - img_detection: dai.ImgDetection - label_str: str - color: Tuple[int, int, int] - - # Normalized bounding box - top_left: Tuple[int, int] - bottom_right: Tuple[int, int] - - def centroid(self) -> Tuple[int, int]: - return ( - int((self.bottom_right[0] + self.top_left[0]) / 2), - int((self.bottom_right[1] + self.top_left[1]) / 2), - ) +class BasePacket(ABC): + """ + Base class for all packets. + """ - def get_bbox(self) -> Tuple[float, float, float, float]: - return self.img_detection.xmin, self.img_detection.ymin, self.img_detection.xmax, self.img_detection.ymax + def __init__(self, name: str): + self.name = name + def prepare_visualizer_objects(self, visualizer: 'Visualizer') -> None: + """ + Prepare visualizer objects (boxes, lines, text, etc.), so visualizer can draw them on the frame. -class _TrackingDetection(_Detection): - tracklet: dai.Tracklet - speed: float = 0.0 # m/s - speed_kmph: float = 0.0 # km/h - speed_mph: float = 0.0 # mph + Args: + visualizer: Visualizer object. + """ + pass + @abstractmethod + def get_timestamp(self) -> timedelta: + raise NotImplementedError() -class _TwoStageDetection(_Detection): - nn_data: dai.NNData + @abstractmethod + def get_sequence_num(self) -> int: + raise NotImplementedError() -class NNDataPacket: +class NNDataPacket(BasePacket): """ Contains only dai.NNData message """ - name: str # NNData stream name - msg: dai.NNData # Original depthai message def __init__(self, name: str, nn_data: dai.NNData): - self.name = name self.msg = nn_data + super().__init__(name) + + def get_timestamp(self) -> timedelta: + return self.msg.getTimestamp() + def get_sequence_num(self) -> int: + return self.msg.getTimestampDevice() -class FramePacket: + +class FramePacket(BasePacket): """ Contains only dai.ImgFrame message and cv2 frame, which is used by visualization logic. """ @@ -60,65 +67,268 @@ class FramePacket: def __init__(self, name: str, msg: dai.ImgFrame, - frame: Optional[np.ndarray], - visualizer: 'Visualizer' = None): - self.name = name + ): self.msg = msg - self.frame = frame + self._get_codec = None + self.__frame = None + super().__init__(name) + + @property + def frame(self): + if self.__frame is None: + self.__frame = self.decode() + return self.__frame - self.visualizer = visualizer + def get_timestamp(self) -> timedelta: + return self.msg.getTimestampDevice(dai.CameraExposureOffset.MIDDLE) + def get_sequence_num(self) -> int: + return self.msg.getSequenceNum() -class PointcloudPacket: + def set_decode_codec(self, get_codec: Callable): + self._get_codec = get_codec + + def decode(self) -> Optional[np.ndarray]: + if self._get_codec is None: + return self.msg.getCvFrame() if cv2 else None + + codec = self._get_codec() + if codec is None: + return self.msg.getCvFrame() if cv2 else None + + # PyAV decoding support H264, H265, JPEG and Lossless JPEG + enc_packets = codec.parse(self.msg.getData()) + if len(enc_packets) == 0: + return None + + frames = codec.decode(enc_packets[-1]) + if not frames: + return None + + return frames[0].to_ndarray(format='bgr24') + + def get_size(self) -> Tuple[int, int]: + return self.msg.getWidth(), self.msg.getHeight() + + +class DisparityPacket(FramePacket): def __init__(self, name: str, - points: np.ndarray, - depth_map: dai.ImgFrame, - color_frame: Optional[np.ndarray], - visualizer: 'Visualizer' = None): - self.name = name - self.points = points - self.depth_imgFrame = dai.ImgFrame - self.color_frame = color_frame - self.visualizer = visualizer + img: dai.ImgFrame, + multiplier: float, + disparity_map: Optional[np.ndarray] = None, + colorize: StereoColor = None, + colormap: int = None, + mono_frame: Optional[dai.ImgFrame] = None, + ): + """ + disparity_map might be filtered, eg. if WLS filter is enabled + """ + super().__init__(name=name, msg=img) + self.mono_frame = mono_frame + self.disparity_map = disparity_map + self.multiplier = multiplier + self.colorize = colorize + self.colormap = colormap + + def get_disparity(self) -> np.ndarray: + if self.disparity_map is not None: + return self.disparity_map + else: + self.msg.getFrame() + + def get_colorized_frame(self, visualizer) -> np.ndarray: + frame = self.get_disparity() + colorized_disp = frame * self.multiplier + + try: + mono_frame = self.mono_frame.getCvFrame() + except AttributeError: + mono_frame = None + + stereo_config = visualizer.config.stereo + + colorize = self.colorize or stereo_config.colorize + if self.colormap is not None: + colormap = self.colormap + else: + colormap = stereo_config.colormap + colormap[0] = [0, 0, 0] # Invalidate pixels 0 to be black + + if mono_frame is not None and colorized_disp.ndim == 2 and mono_frame.ndim == 3: + colorized_disp = colorized_disp[..., np.newaxis] + + if colorize == StereoColor.GRAY: + pass + elif colorize == StereoColor.RGB: + colorized_disp = cv2.applyColorMap(colorized_disp.astype(np.uint8), colormap) + elif colorize == StereoColor.RGBD: + colorized_disp = cv2.applyColorMap( + (colorized_disp + mono_frame * 0.5).astype(np.uint8), colormap + ) + return colorized_disp class DepthPacket(FramePacket): - mono_frame: dai.ImgFrame + def __init__(self, name: str, + msg: dai.ImgFrame): + super().__init__(name, msg) + self.depth = msg.getFrame() + +class DisparityDepthPacket(DisparityPacket): def __init__(self, name: str, img_frame: dai.ImgFrame, - mono_frame: Optional[dai.ImgFrame], - depth_map: Optional[np.ndarray] = None, - visualizer: 'Visualizer' = None): - super().__init__(name=name, - msg=img_frame, - frame=img_frame.getCvFrame() if cv2 else None, - visualizer=visualizer) + colorize: StereoColor = None, + colormap: int = None, + mono_frame: Optional[dai.ImgFrame] = None, + disp_scale_factor=255 / 95, + ): + # DepthPacket.__init__(self, name=name, msg=img_frame) + super().__init__( + name=name, + img=img_frame, + disparity_map=None, + multiplier=255 / 95, + colorize=colorize, + colormap=colormap, + mono_frame=mono_frame, + ) + self.disp_scale_factor = disp_scale_factor - if mono_frame is not None: - self.mono_frame = mono_frame + def get_disparity(self) -> np.ndarray: + with np.errstate(divide='ignore'): + disparity = self.disp_scale_factor / self.msg.getFrame() + disparity[disparity == np.inf] = 0 + return disparity + # def get_colorized_frame(self, visualizer) -> np.ndarray: + # Convert depth to disparity for nicer visualization + + +class PointcloudPacket(BasePacket): + def __init__(self, + name: str, + points: np.ndarray, + depth_map: dai.ImgFrame, + colorize_frame: Optional[dai.ImgFrame]): + super().__init__(name=name) + self.points = points + self.colorize_frame = colorize_frame.getCvFrame() if colorize_frame is not None else None self.depth_map = depth_map -class SpatialBbMappingPacket(FramePacket): + def get_sequence_num(self) -> int: + return self.depth_map.getSequenceNum() + + def get_timestamp(self) -> timedelta: + return self.depth_map.getTimestampDevice() + + +class SpatialBbMappingPacket(DisparityDepthPacket): """ Output from Spatial Detection nodes - depth frame + bounding box mappings. Inherits FramePacket. """ - spatials: dai.SpatialImgDetections def __init__(self, name: str, msg: dai.ImgFrame, spatials: dai.SpatialImgDetections, - visualizer: 'Visualizer' = None): + disp_scale_factor: float): super().__init__(name=name, - msg=msg, - frame=msg.getFrame() if cv2 else None, - visualizer=visualizer) + img_frame=msg, + disp_scale_factor=disp_scale_factor) self.spatials = spatials + def prepare_visualizer_objects(self, vis: Visualizer) -> None: + # Add detections to packet + for detection in self.spatials.detections: + br = detection.boundingBoxMapping.roi.bottomRight() + tl = detection.boundingBoxMapping.roi.topLeft() + bbox = BoundingBox([tl.x, tl.y, br.x, br.y]) + # Add detections to visualizer + vis.add_bbox( + bbox=bbox, + thickness=3, + color=(0, 0, 0) + ) + vis.add_bbox( + bbox=bbox, + thickness=1, + color=(255, 255, 255) + ) + + +class NnOutputPacket(FramePacket): + """ + NN result + image frame. Inherits FramePacket. + """ + + def __init__(self, + name: str, + msg: dai.ImgFrame, + nn_data: dai.NNData, + bbox: BoundingBox + ): + super().__init__(name=name, + msg=msg) + self.nn_data = nn_data + self.bbox = bbox + + +class ImgLandmarksPacket(NnOutputPacket): + """ + Output from Landmarks Estimation nodes - image frame + landmarks. Inherits NnOutputPacket. + """ + + def __init__(self, + name: str, + msg: dai.ImgFrame, + nn_data: dai.NNData, + landmarks: ImgLandmarks, + bbox: BoundingBox): + super().__init__(name=name, + msg=msg, + nn_data=nn_data, + bbox=bbox) + self.landmarks = landmarks + + def prepare_visualizer_objects(self, vis: Visualizer) -> None: + all_landmarks = self.landmarks.landmarks + all_landmarks_indices = self.landmarks.landmarks_indices + colors = self.landmarks.colors + w, h = self.get_size() + for landmarks, indices in zip(all_landmarks, all_landmarks_indices): + for i, landmark in enumerate(landmarks): + # Map normalized coordinates to frame coordinates + l = [(int(point[0] * w), int(point[1] * h)) for point in landmark] + idx = indices[i] + + vis.add_line(pt1=tuple(l[0]), pt2=tuple(l[1]), color=colors[idx], thickness=4) + vis.add_circle(coords=tuple(l[0]), radius=8, color=colors[idx], thickness=-1) + vis.add_circle(coords=tuple(l[1]), radius=8, color=colors[idx], thickness=-1) + + +class SemanticSegmentationPacket(NnOutputPacket): + """ + Output from Semantic Segmentation nodes - image frame + segmentation mask. Inherits NnOutputPacket. + """ + + def __init__(self, + name: str, + msg: dai.ImgFrame, + nn_data: dai.NNData, + segmentation: SemanticSegmentation, + bbox: BoundingBox): + super().__init__(name=name, + msg=msg, + nn_data=nn_data, + bbox=bbox) + self.segmentation = segmentation + + def prepare_visualizer_objects(self, vis: Visualizer) -> None: + raise NotImplementedError('Semantic segmentation visualization is not implemented yet!') + class DetectionPacket(FramePacket): """ @@ -128,26 +338,44 @@ class DetectionPacket(FramePacket): def __init__(self, name: str, msg: dai.ImgFrame, - img_detections: Union[dai.ImgDetections, dai.SpatialImgDetections], - visualizer: 'Visualizer' = None): + dai_msg: Union[dai.ImgDetections, dai.SpatialImgDetections, dai.NNData], + bbox: BoundingBox, + ): + super().__init__(name=name, - msg=msg, - frame=msg.getCvFrame() if cv2 else None, - visualizer=visualizer) - self.img_detections = img_detections - self.detections = [] + msg=msg) + + self.img_detections = dai_msg + self.bbox = bbox + self.detections: List[Detection] = [] def _is_spatial_detection(self) -> bool: return isinstance(self.img_detections, dai.SpatialImgDetections) - def _add_detection(self, img_det: dai.ImgDetection, bbox: np.ndarray, txt: str, color) -> None: - det = _Detection() - det.img_detection = img_det - det.label_str = txt - det.color = color - det.top_left = (bbox[0], bbox[1]) - det.bottom_right = (bbox[2], bbox[3]) - self.detections.append(det) + def prepare_visualizer_objects(self, vis: Visualizer) -> None: + # Add detections to packet + for detection in self.detections: + # Add detections to visualizer + vis.add_bbox( + bbox=self.bbox.get_relative_bbox(detection.bbox), + # label=detection.label_str, + color=detection.color, + ) + vis.add_text( + f'{detection.label_str} {100 * detection.confidence:.0f}%', + bbox=self.bbox.get_relative_bbox(detection.bbox), + position=TextPosition.TOP_LEFT, + ) + # Spatial coordinates + if type(detection.img_detection) == dai.SpatialImgDetection: + x_meters = detection.img_detection.spatialCoordinates.x / 1000 + y_meters = detection.img_detection.spatialCoordinates.y / 1000 + z_meters = detection.img_detection.spatialCoordinates.z / 1000 + vis.add_text( + f'X: {x_meters:.2f}m\nY: {y_meters:.2f}m\nZ: {z_meters:.2f}m', + bbox=self.bbox.get_relative_bbox(detection.bbox), + position=TextPosition.BOTTOM_LEFT, + ) class TrackerPacket(FramePacket): @@ -159,32 +387,62 @@ def __init__(self, name: str, msg: dai.ImgFrame, tracklets: dai.Tracklets, - visualizer: 'Visualizer' = None): + bbox: BoundingBox, + ): super().__init__(name=name, - msg=msg, - frame=msg.getCvFrame() if cv2 else None, - visualizer=visualizer) - self.detections: List[_TrackingDetection] = [] - self.daiTracklets = tracklets + msg=msg) - def _add_detection(self, img_det: dai.ImgDetection, bbox: np.ndarray, txt: str, color): - det = _TrackingDetection() - det.img_detection = img_det - det.label_str = txt - det.color = color - det.top_left = (bbox[0], bbox[1]) - det.bottom_right = (bbox[2], bbox[3]) - self.detections.append(det) + # int: object_id, list: TrackingDetection + self.tracklets: Dict[int, List[TrackingDetection]] = {} + self.daiTracklets = tracklets + self.bbox = bbox def _is_spatial_detection(self) -> bool: coords = self.daiTracklets.tracklets[0].spatialCoordinates return coords.x != 0.0 or coords.y != 0.0 or coords.z != 0.0 - def _get_spatials(self, det: dai.ImgDetection) -> dai.Point3f: - # Not the cleanest solution, but oh well - for t in self.daiTracklets.tracklets: - if t.srcImgDetection == det: - return t.spatialCoordinates + def prepare_visualizer_objects(self, visualizer: Visualizer) -> None: + tracking_config = visualizer.config.tracking + for obj_id, tracking_dets in self.tracklets.items(): + tracking_det = tracking_dets[-1] # Get the last detection + bb = tracking_det.filtered_2d or tracking_det.bbox + visualizer.add_bbox( + bbox=self.bbox.get_relative_bbox(bb), + label=f"[{obj_id}] {tracking_det.label_str}", + color=tracking_det.color, + ) + visualizer.add_text( + f'{tracking_det.label_str} {100 * tracking_det.confidence:.0f}%', + bbox=self.bbox.get_relative_bbox(bb), + position=TextPosition.TOP_LEFT, + ) + if visualizer.config.tracking.show_speed and \ + tracking_det.speed is not None: + visualizer.add_text( + text=f"{tracking_det.speed:.2f} m/s", + color=tracking_det.color, + bbox=self.bbox.get_relative_bbox(bb), + position=TextPosition.BOTTOM_RIGHT, + ) + w, h = self.get_size() + tracklet_length = 0 + for i in reversed(range(len(tracking_dets) - 1)): + p1 = self.bbox.get_relative_bbox(tracking_dets[i].bbox).get_centroid().denormalize((h, w)) + p2 = self.bbox.get_relative_bbox(tracking_dets[i + 1].bbox).get_centroid().denormalize((h, w)) + + if tracking_config.max_length != -1: + tracklet_length += np.linalg.norm(np.array(p1) - np.array(p2)) + if tracking_config.max_length < tracklet_length: + break + + thickness = tracking_config.line_thickness + if tracking_config.fading_tails: + thickness = max(1, int(np.ceil(thickness * i / len(tracking_dets)))) + + visualizer.add_line(pt1=p1, pt2=p2, + color=tracking_dets[i].color, + thickness=thickness + ) class TwoStagePacket(DetectionPacket): @@ -197,18 +455,19 @@ def __init__(self, name: str, img_detections: dai.ImgDetections, nn_data: List[dai.NNData], labels: List[int], - visualizer: 'Visualizer' = None): + bbox: BoundingBox + ): super().__init__(name=name, msg=msg, - img_detections=img_detections, - visualizer=visualizer) - self.frame = self.msg.getCvFrame() if cv2 else None + dai_msg=img_detections, + bbox=bbox + ) self.nnData = nn_data self.labels = labels self._cntr = 0 def _add_detection(self, img_det: dai.ImgDetection, bbox: np.ndarray, txt: str, color): - det = _TwoStageDetection() + det = TwoStageDetection() det.img_detection = img_det det.color = color det.top_left = (bbox[0], bbox[1]) @@ -222,27 +481,57 @@ def _add_detection(self, img_det: dai.ImgDetection, bbox: np.ndarray, txt: str, self.detections.append(det) -class IMUPacket: - def __init__(self, data: List[dai.IMUData]): - self.data = data +class IMUPacket(BasePacket): + def __init__(self, name, packet: dai.IMUPacket, rotation=None): + self.packet = packet + super().__init__(name) + + self.acceleroMeter = packet.acceleroMeter + self.gyroscope = packet.gyroscope + self.magneticField = packet.magneticField + self.rotationVector = rotation if rotation is not None else packet.rotationVector + + # Check which reports are available + self.available_reports: Dict[str, dai.IMUReport] = {} + for i, val in enumerate([self.acceleroMeter, self.gyroscope, self.magneticField, self.rotationVector]): + if (i == 3 and rotation) or val.getTimestampDevice() != timedelta(0): + self.available_reports[val.__class__.__name__] = val + + def get_imu_vals(self) -> Tuple[Sequence, Sequence, Sequence, Sequence]: + """ + Returns imu values in a tuple. Returns in format (accelerometer_values, gyroscope_values, quaternion, magnetometer_values) + """ + return ( + [self.acceleroMeter.x, self.acceleroMeter.y, self.acceleroMeter.z], + [self.gyroscope.x, self.gyroscope.y, self.gyroscope.z], + [self.rotationVector.i, self.rotationVector.j, self.rotationVector.k, self.rotationVector.real], + [self.magneticField.x, self.magneticField.y, self.magneticField.z] + ) def __str__(self): - packet_details = [] - - for imu_data in self.data: - # TODO print more details if needed - accelerometer_str = 'Accelerometer [m/s^2]: (x: %.2f, y: %.2f, z: %.2f)' % ( - imu_data.acceleroMeter.x, - imu_data.acceleroMeter.y, - imu_data.acceleroMeter.z - ) + accelerometer_str = 'Accelerometer [m/s^2]: (x: %.2f, y: %.2f, z: %.2f)' % ( + self.packet.acceleroMeter.x, + self.packet.acceleroMeter.y, + self.packet.acceleroMeter.z + ) - gyroscope_str = 'Gyroscope [rad/s]: (x: %.2f, y: %.2f, z: %.2f)' % ( - imu_data.gyroscope.x, - imu_data.gyroscope.y, - imu_data.gyroscope.z - ) + gyroscope_str = 'Gyroscope [rad/s]: (x: %.2f, y: %.2f, z: %.2f)' % ( + self.packet.gyroscope.x, + self.packet.gyroscope.y, + self.packet.gyroscope.z + ) + + return f'IMU Packet: {accelerometer_str} {gyroscope_str}' + + def _get_imu_report(self) -> dai.IMUReport: + """ + Get the first available IMU report + """ + for name, val in self.available_reports.items(): + return val - packet_details.append(f'{accelerometer_str}, {gyroscope_str})') + def get_timestamp(self) -> timedelta: + return self._get_imu_report().getTimestampDevice() - return f'IMU Packet: {packet_details}' + def get_sequence_num(self) -> int: + return self._get_imu_report().getSequenceNum() diff --git a/depthai_sdk/src/depthai_sdk/components/camera_component.py b/depthai_sdk/src/depthai_sdk/components/camera_component.py index d304bfcae..34ae1534a 100644 --- a/depthai_sdk/src/depthai_sdk/components/camera_component.py +++ b/depthai_sdk/src/depthai_sdk/components/camera_component.py @@ -2,27 +2,27 @@ from typing import Dict from depthai_sdk.classes.enum import ResizeMode +from depthai_sdk.components.camera_control import CameraControl from depthai_sdk.components.camera_helper import * -from depthai_sdk.components.component import Component -from depthai_sdk.components.parser import parse_resolution, parse_encode, parse_camera_socket +from depthai_sdk.components.component import Component, ComponentOutput +from depthai_sdk.components.parser import parse_resolution, parse_encode, encoder_profile_to_fourcc from depthai_sdk.oak_outputs.xout.xout_base import XoutBase, StreamXout, ReplayStream from depthai_sdk.oak_outputs.xout.xout_frames import XoutFrames -from depthai_sdk.oak_outputs.xout.xout_h26x import XoutH26x -from depthai_sdk.oak_outputs.xout.xout_mjpeg import XoutMjpeg from depthai_sdk.replay import Replay -from depthai_sdk.components.camera_control import CameraControl +from depthai_sdk.types import Resolution class CameraComponent(Component): def __init__(self, device: dai.Device, pipeline: dai.Pipeline, - source: Union[str, dai.CameraBoardSocket], + source: dai.CameraBoardSocket, resolution: Optional[Union[ str, dai.ColorCameraProperties.SensorResolution, dai.MonoCameraProperties.SensorResolution ]] = None, fps: Optional[float] = None, encode: Union[None, str, bool, dai.VideoEncoderProperties.Profile] = None, + sensor_type: Optional[dai.CameraSensorType] = None, rotation: Optional[int] = None, replay: Optional[Replay] = None, name: Optional[str] = None, @@ -39,14 +39,19 @@ def __init__(self, resolution (optional): Camera resolution, eg. '800p' or '4k' fps (float, optional): Camera FPS encode: Encode streams before sending them to the host. Either True (use default), or mjpeg/h264/h265 + sensor_type: To force color/mono/tof camera rotation (int, optional): Rotate the camera by 90, 180, 270 degrees replay (Replay object): Replay object to use for mocking the camera name (str, optional): Name of the output stream args (Dict): Use user defined arguments when constructing the pipeline """ super().__init__() + # _replay should be set before .out, as it's used in .out + self._replay: Optional[Replay] = replay self.out = self.Out(self) + self._pipeline = pipeline + self._device = device self.node: Optional[Union[dai.node.ColorCamera, dai.node.MonoCamera, dai.node.XLinkIn]] = None self.encoder: Optional[dai.node.VideoEncoder] = None @@ -55,8 +60,13 @@ def __init__(self, self.stream_size: Optional[Tuple[int, int]] = None # Output size self._source = str(source) + if self._source.startswith('CameraBoardSocket.'): + self._source = self._source[len('CameraBoardSocket.'):] + + self._socket = source self._replay: Optional[Replay] = replay self._args: Dict = args + self.name = name if rotation not in [None, 0, 90, 180, 270]: @@ -66,8 +76,14 @@ def __init__(self, self._preview_num_frames_pool = 4 if self.is_replay(): - if source.casefold() not in list(map(lambda x: x.casefold(), self._replay.getStreams())): + stream_name = None + for name, stream in self._replay.streams.items(): + if stream.get_socket() == self._socket: + stream_name = name + break + if stream_name is None: raise Exception(f"{source} stream was not found in specified depthai-recording!") + self._source = stream_name res = self._replay.getShape(self._source) # print('resolution', res) # resize = getResize(res, width=1200) @@ -91,44 +107,22 @@ def __init__(self, # Livestreaming, not replay else: node_type: dai.node = None - if isinstance(source, str): - source = source.upper() - # When sensors can be either color or mono (eg. AR0234), we allow specifying it - if "," in source: # For sensors that support multiple - parts = source.split(',') - source = parts[0] - if parts[1] in ["C", "COLOR"]: - node_type = dai.node.ColorCamera - elif parts[1] in ["M", "MONO"]: - node_type = dai.node.MonoCamera - else: - raise Exception( - "Please specify sensor type with c/color or m/mono after the ','" - " - eg. `cam = oak.create_camera('cama,c')`" - ) - elif source in ["COLOR", "RGB"]: - for features in device.getConnectedCameraFeatures(): - if dai.CameraSensorType.COLOR in features.supportedTypes: - source = features.socket - break - if not isinstance(source, dai.CameraBoardSocket): - raise ValueError("Couldn't find a color camera!") - - socket = parse_camera_socket(source) - sensor = [f for f in device.getConnectedCameraFeatures() if f.socket == socket][0] - - if node_type is None: # User specified camera type - type = sensor.supportedTypes[0] - if type == dai.CameraSensorType.COLOR: - node_type = dai.node.ColorCamera - elif type == dai.CameraSensorType.MONO: - node_type = dai.node.MonoCamera - else: - raise Exception(f"{sensor} doesn't support either COLOR or MONO ") + sensors = [f for f in device.getConnectedCameraFeatures() if f.socket == source] + if len(sensors) == 0: + raise Exception(f"No camera found on user-specified socket {source}") + sensor = sensors[0] + + sensor_type = sensor_type or sensor.supportedTypes[0] + if sensor_type == dai.CameraSensorType.COLOR: + node_type = dai.node.ColorCamera + elif sensor_type == dai.CameraSensorType.MONO: + node_type = dai.node.MonoCamera + else: + raise Exception(f"{sensor} doesn't support either COLOR or MONO ") # Create the node, and set the socket self.node = pipeline.create(node_type) - self.node.setBoardSocket(socket) + self.node.setBoardSocket(source) self._resolution_forced: bool = resolution is not None if resolution: @@ -147,13 +141,22 @@ def __init__(self, if not self._resolution_forced: # Find the closest resolution sensor = [f for f in device.getConnectedCameraFeatures() if f.socket == self.node.getBoardSocket()][0] sensor_type = dai.CameraSensorType.COLOR if dai.node.ColorCamera else dai.CameraSensorType.MONO - res = getClosesResolution(sensor, sensor_type, width=1300) + targetWidthRes = 1300 + targetWidthIsp = targetWidthRes + if self._args["defaultResolution"] == "min": + targetWidthRes = 0 + targetWidthIsp = 1300 # Still keep the same target for the ISP + elif self._args["defaultResolution"] == "max": + targetWidthRes = 1000000 # Some big number + targetWidthIsp = targetWidthRes + res = getClosesResolution(sensor, sensor_type, width=targetWidthRes) self.node.setResolution(res) - scale = getClosestIspScale(self.node.getIspSize(), width=1300, videoEncoder=(self.encoder is not None)) + scale = getClosestIspScale(self.node.getIspSize(), width=targetWidthIsp, + videoEncoder=(encode is not None)) self.node.setIspScale(*scale) curr_size = self.node.getVideoSize() - closest = getClosestVideoSize(*curr_size) + closest = getClosestVideoSize(*curr_size, videoEncoder=encode) self.node.setVideoSize(*closest) self.node.setVideoNumFramesPool(2) # We will increase it later if we are streaming to host @@ -197,7 +200,6 @@ def __init__(self, if self._args: self._config_camera_args(self._args) - # Runtime camera control self.control = CameraControl() self._control_xlink_in = None @@ -205,7 +207,8 @@ def __init__(self, self._control_xlink_in = pipeline.create(dai.node.XLinkIn) self._control_xlink_in.setStreamName(f"{self.node.id}_inputControl") self._control_xlink_in.out.link(self.node.inputControl) - self._control_xlink_in.setMaxDataSize(1) # CameraControl message doesn't use any additional data (only metadata) + # CameraControl message doesn't use any additional data (only metadata) + self._control_xlink_in.setMaxDataSize(1) def on_pipeline_started(self, device: dai.Device): if self._control_xlink_in is not None: @@ -223,23 +226,21 @@ def _create_rotation_manip(self, pipeline: dai.Pipeline, rotation: int): rot_manip.setMaxOutputFrameSize(w * h * 3) return rot_manip - # Should be mono/color camera agnostic. Also call this from __init__ if args is enabled def config_camera(self, # preview: Union[None, str, Tuple[int, int]] = None, size: Union[None, Tuple[int, int], str] = None, resize_mode: ResizeMode = ResizeMode.CROP, fps: Optional[float] = None, - resolution: Optional[Union[ - str, dai.ColorCameraProperties.SensorResolution, dai.MonoCameraProperties.SensorResolution - ]] = None + resolution: Optional[Resolution] = None ) -> None: """ Configure resolution, scale, FPS, etc. """ - - # TODO - if fps: self.set_fps(fps) - if resolution: self._set_resolution(resolution) + # TODO: Should be mono/color camera agnostic. Also call this from __init__ if args is enabled + if fps: + self.set_fps(fps) + if resolution: + self._set_resolution(resolution) if size: from .parser import parse_size @@ -255,7 +256,6 @@ def config_camera(self, raise ValueError("Currently only ResizeMode.CROP is supported mode for specifying size!") else: # TODO: Use ImageManip to set mono frame size - raise NotImplementedError("Not yet implemented") def _config_camera_args(self, args: Dict): @@ -294,17 +294,16 @@ def control_with_nn(self, detection_component: 'NNComponent', auto_focus=True, a :param detection_component: NNComponent that will be used to control the camera :param auto_focus: Enable auto focus to the object :param auto_exposure: Enable auto exposure to the object - :param auto_white_balance: auto white balance to the object """ if not auto_focus and not auto_exposure: - logging.error( - 'Attempted to control camera with NN, but both Auto-Focus and Auto-Exposure were disabled! Attempt ignored.' - ) + logging.error('Attempted to control camera with NN, ' + 'but both Auto-Focus and Auto-Exposure were disabled! Attempt ignored.') return + if 'NNComponent' not in str(type(detection_component)): raise ValueError('nn_component must be an instance of NNComponent!') - if not detection_component._is_detector(): + if not detection_component.is_detector(): raise ValueError('nn_component must be a object detection model (YOLO/MobileNetSSD based)!') from depthai_sdk.components.control_camera_with_nn import control_camera_with_nn @@ -315,7 +314,7 @@ def control_with_nn(self, detection_component: 'NNComponent', auto_focus=True, a nn_output=detection_component.node.out, resize_mode=detection_component._ar_resize_mode, resolution=self.node.getResolution(), - nn_size = detection_component._size, + nn_size=detection_component._size, af=auto_focus, ae=auto_exposure, debug=debug @@ -338,17 +337,14 @@ def config_color_camera(self, chroma_denoise: Optional[int] = None, ) -> None: if not self.is_color(): - logging.info( - 'Attempted to configure ColorCamera, but this component doesn\'t have it. Config attempt ignored.' - ) + logging.info('Attempted to configure ColorCamera, ' + 'but this component doesn\'t have it. Config attempt ignored.') return if self.is_replay(): logging.info('Tried configuring ColorCamera, but replaying is enabled. Config attempt ignored.') return - self.node: dai.node.ColorCamera - if interleaved is not None: self.node.setInterleaved(interleaved) if color_order: if isinstance(color_order, str): @@ -377,7 +373,12 @@ def config_color_camera(self, def _set_resolution(self, resolution): if not self.is_replay(): - self.node.setResolution(parse_resolution(type(self.node), resolution)) + if isinstance(resolution, str) and resolution.lower() in ['max', 'maximum']: + sensor = [f for f in self._device.getConnectedCameraFeatures() if f.socket == self._socket][0] + resolution = get_max_resolution(type(self.node), sensor) + else: + resolution = parse_resolution(type(self.node), resolution) + self.node.setResolution(resolution) # TODO: support potentially downscaling depthai-recording def is_replay(self) -> bool: @@ -437,17 +438,19 @@ def config_encoder_mjpeg(self, if lossless is not None: self.encoder.setLossless(lossless) - def get_stream_xout(self) -> StreamXout: - if self.is_replay(): - return ReplayStream(self._source) + def get_stream_xout(self, fourcc: Optional[str] = None) -> StreamXout: + if self.encoder is not None and fourcc is not None: + return StreamXout(self.encoder.bitstream, name=self.name or self._source + '_bitstream') + elif self.is_replay(): + return ReplayStream(self.name or self._source) elif self.is_mono(): - return StreamXout(self.node.id, self.stream, name=self.name) + return StreamXout(self.stream, name=self.name or self._source + '_mono') else: # ColorCamera self.node.setVideoNumFramesPool(self._num_frames_pool) self.node.setPreviewNumFramesPool(self._preview_num_frames_pool) # node.video instead of preview (self.stream) was used to reduce bandwidth # consumption by 2 (3bytes/pixel vs 1.5bytes/pixel) - return StreamXout(self.node.id, self.node.video, name=self.name) + return StreamXout(self.node.video, name=self.name or self._source + '_video') def set_num_frames_pool(self, num_frames: int, preview_num_frames: Optional[int] = None): """ @@ -461,61 +464,32 @@ def set_num_frames_pool(self, num_frames: int, preview_num_frames: Optional[int] if preview_num_frames is not None: self._preview_num_frames_pool = preview_num_frames + def get_fourcc(self) -> Optional[str]: + if self.encoder is None: + return None + return encoder_profile_to_fourcc(self._encoder_profile) + """ Available outputs (to the host) of this component """ class Out: + class CameraOut(ComponentOutput): + def __call__(self, device: dai.Device, fourcc: Optional[str] = None) -> XoutBase: + return XoutFrames(self._comp.get_stream_xout(fourcc), fourcc).set_comp_out(self) + + class ReplayOut(ComponentOutput): + def __call__(self, device: dai.Device) -> XoutBase: + return XoutFrames(ReplayStream(self._comp._source)).set_comp_out(self) + + class EncodedOut(CameraOut): + def __call__(self, device: dai.Device) -> XoutBase: + return super().__call__(device, fourcc=self._comp.get_fourcc()) + + def __init__(self, camera_component: 'CameraComponent'): - self._comp = camera_component - - def main(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutBase: - """ - Default output. Uses either camera(), replay(), or encoded() depending on the component settings. - """ - if self._comp.encoder: - return self.encoded(pipeline, device) - elif self._comp.is_replay(): - return self.replay(pipeline, device) - else: - return self.camera(pipeline, device) - - def camera(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutFrames: - """ - Streams camera output to the OAK camera. Produces FramePacket. - """ - out = XoutFrames(self._comp.get_stream_xout(), self._comp.get_fps()) - out.name = self._comp._source - return self._comp._create_xout(pipeline, out) - - def replay(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutBase: - """ - If depthai-recording was used, it won't stream anything, but it will instead use frames that were sent to the OAK. - Produces FramePacket. - """ - out = XoutFrames(ReplayStream(self._comp._source), self._comp.get_fps()) - return self._comp._create_xout(pipeline, out) - - def encoded(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutBase: - """ - If encoding was enabled, it will stream bitstream from VideoEncoder node to the host. - Produces FramePacket. - """ - if self._comp._encoder_profile == dai.VideoEncoderProperties.Profile.MJPEG: - out = XoutMjpeg( - frames=StreamXout(self._comp.encoder.id, self._comp.encoder.bitstream, name=self._comp.name), - color=self._comp.is_color(), - lossless=self._comp.encoder.getLossless(), - fps=self._comp.encoder.getFrameRate(), - frame_shape=self._comp.stream_size - ) - else: - out = XoutH26x( - frames=StreamXout(self._comp.encoder.id, self._comp.encoder.bitstream, name=self._comp.name), - color=self._comp.is_color(), - profile=self._comp._encoder_profile, - fps=self._comp.encoder.getFrameRate(), - frame_shape=self._comp.stream_size - ) - out.name = self._comp._source - return self._comp._create_xout(pipeline, out) + self.replay = self.ReplayOut(camera_component) + self.camera = self.CameraOut(camera_component) + self.encoded = self.EncodedOut(camera_component) + + self.main = self.replay if camera_component.is_replay() else self.camera diff --git a/depthai_sdk/src/depthai_sdk/components/camera_control.py b/depthai_sdk/src/depthai_sdk/components/camera_control.py index a5c8107ea..fb871c62d 100644 --- a/depthai_sdk/src/depthai_sdk/components/camera_control.py +++ b/depthai_sdk/src/depthai_sdk/components/camera_control.py @@ -1,6 +1,7 @@ -import depthai as dai -from itertools import cycle import logging +from itertools import cycle + +import depthai as dai logger = logging.getLogger(__name__) @@ -18,17 +19,22 @@ 'chroma_denoise': (0, 4) } + def clamp(value, min_value, max_value): return max(min(value, max_value), min_value) + class CameraControl: def __init__(self): self.queue = None - self._cycle_awb_mode = cycle([item for name, item in vars(dai.CameraControl.AutoWhiteBalanceMode).items() if name.isupper()]) - self._cycle_ab_mode = cycle([item for name, item in vars(dai.CameraControl.AntiBandingMode).items() if name.isupper()]) + self._cycle_awb_mode = cycle( + [item for name, item in vars(dai.CameraControl.AutoWhiteBalanceMode).items() if name.isupper()]) + self._cycle_ab_mode = cycle( + [item for name, item in vars(dai.CameraControl.AntiBandingMode).items() if name.isupper()]) # self._cycle_effect_mode = cycle([item for name, item in vars(dai.CameraControl.EffectMode).items() if name.isupper()]) - self._cycle_af_mode = cycle([item for name, item in vars(dai.CameraControl.AutoFocusMode).items() if name.isupper()]) + self._cycle_af_mode = cycle( + [item for name, item in vars(dai.CameraControl.AutoFocusMode).items() if name.isupper()]) self._current_vals = { 'exposure_time': 20000, @@ -76,7 +82,9 @@ def exposure_time_up(self, step=500): logger.error(f'Exposure time cannot be greater than {LIMITS["exposure"][1]}') return self._current_vals['exposure_time'] += step - self.send_controls({'exposure': {'manual': [self._current_vals['exposure_time'], self._current_vals['sensitivity']]}}) + self.send_controls( + {'exposure': {'manual': [self._current_vals['exposure_time'], self._current_vals['sensitivity']]}}) + def exposure_time_down(self, step=500): """ Decrease exposure time by step. @@ -87,7 +95,8 @@ def exposure_time_down(self, step=500): logger.error(f'Exposure time cannot be less than {LIMITS["exposure"][0]}') return self._current_vals['exposure_time'] -= step - self.send_controls({'exposure': {'manual': [self._current_vals['exposure_time'], self._current_vals['sensitivity']]}}) + self.send_controls( + {'exposure': {'manual': [self._current_vals['exposure_time'], self._current_vals['sensitivity']]}}) def sensitivity_up(self, step=50): """ @@ -99,7 +108,8 @@ def sensitivity_up(self, step=50): logger.error(f'Sensitivity cannot be greater than {LIMITS["gain"][1]}') return self._current_vals['sensitivity'] += step - self.send_controls({'exposure': {'manual': [self._current_vals['exposure_time'], self._current_vals['sensitivity']]}}) + self.send_controls( + {'exposure': {'manual': [self._current_vals['exposure_time'], self._current_vals['sensitivity']]}}) def sensitivity_down(self, step=50): """ @@ -111,7 +121,8 @@ def sensitivity_down(self, step=50): logger.error(f'Sensitivity cannot be less than {LIMITS["gain"][0]}') return self._current_vals['sensitivity'] -= step - self.send_controls({'exposure': {'manual': [self._current_vals['exposure_time'], self._current_vals['sensitivity']]}}) + self.send_controls( + {'exposure': {'manual': [self._current_vals['exposure_time'], self._current_vals['sensitivity']]}}) def focus_up(self, step=3): """ @@ -297,7 +308,8 @@ def send_controls(self, controls: dict = None): if controls.get('white-balance', None) is not None: if controls['white-balance'].get('mode', None) is not None: if isinstance(controls["focus"]["mode"], str): - controls["white-balance"]["mode"] = getattr(dai.CameraControl.AutoFocusMode, controls["white-balance"]["mode"]) + controls["white-balance"]["mode"] = getattr(dai.CameraControl.AutoFocusMode, + controls["white-balance"]["mode"]) logger.info(f'Setting white balance mode to {controls["white-balance"]["mode"]}.') ctrl.setAutoWhiteBalanceMode(controls["white-balance"]["mode"]) if controls['white-balance'].get('lock', None) is not None: diff --git a/depthai_sdk/src/depthai_sdk/components/camera_helper.py b/depthai_sdk/src/depthai_sdk/components/camera_helper.py index 2fc802319..1f4215084 100644 --- a/depthai_sdk/src/depthai_sdk/components/camera_helper.py +++ b/depthai_sdk/src/depthai_sdk/components/camera_helper.py @@ -1,19 +1,17 @@ import math -import depthai as dai -from typing import * -import numpy as np -import cv2 +from typing import List, Tuple, Dict, Any, Optional, Union +import depthai as dai -monoResolutions: Dict[dai.MonoCameraProperties.SensorResolution, Tuple[int,int]] = { - dai.MonoCameraProperties.SensorResolution.THE_1200_P: (1920, 1200), # Monochrome AR0234 - dai.MonoCameraProperties.SensorResolution.THE_800_P: (1280, 800), # OV9282 +monoResolutions: Dict[dai.MonoCameraProperties.SensorResolution, Tuple[int, int]] = { + dai.MonoCameraProperties.SensorResolution.THE_1200_P: (1920, 1200), # Monochrome AR0234 + dai.MonoCameraProperties.SensorResolution.THE_800_P: (1280, 800), # OV9282 dai.MonoCameraProperties.SensorResolution.THE_720_P: (1280, 720), - dai.MonoCameraProperties.SensorResolution.THE_480_P: (640, 480), # OV7251 + dai.MonoCameraProperties.SensorResolution.THE_480_P: (640, 480), # OV7251 dai.MonoCameraProperties.SensorResolution.THE_400_P: (640, 400), } -colorResolutions: Dict[dai.ColorCameraProperties.SensorResolution, Tuple[int,int]] = { +colorResolutions: Dict[dai.ColorCameraProperties.SensorResolution, Tuple[int, int]] = { dai.ColorCameraProperties.SensorResolution.THE_5312X6000: (5312, 6000), # IMX582 cropped dai.ColorCameraProperties.SensorResolution.THE_13_MP: (4208, 3120), # AR214 dai.ColorCameraProperties.SensorResolution.THE_12_MP: (4056, 3040), # IMX378, IMX477, IMX577 @@ -27,10 +25,11 @@ dai.ColorCameraProperties.SensorResolution.THE_720_P: (1280, 720), } -sensorResolutions: Dict[Any, Tuple[int,int]] = [] +sensorResolutions: Dict[Any, Tuple[int, int]] = [] sensorResolutions.extend(monoResolutions) sensorResolutions.extend(colorResolutions) + def availableIspScales() -> List[Tuple[int, Tuple[int, int]]]: """ Calculates all supported @@ -47,19 +46,17 @@ def availableIspScales() -> List[Tuple[int, Tuple[int, int]]]: lst.sort(reverse=True) return lst -def getClosestVideoSize(width: int, height: int, videoEncoder: bool=False) -> Tuple[int, int]: + +def getClosestVideoSize(width: int, height: int, videoEncoder: bool = False) -> Tuple[int, int]: """ For colorCamera.video output """ - while True: - if width % 2 == 0: # YUV420/NV12 width needs to be an even number to be convertible to BGR on host using cv2 - if not videoEncoder or width % 32 == 0: # VideoEncoder HW limitation - width must be divisible by 32 - break - width -= 1 - while True: - if height % 2 == 0: # YUV420/NV12 height needs to be an even number to be convertible to BGR on host using cv2 - break - height -= 1 + width_divider = 32 if videoEncoder else 2 + width = (width // width_divider) * width_divider + + height_divider = 8 if videoEncoder else 2 + height = (height // height_divider) * height_divider + return (width, height) @@ -82,10 +79,10 @@ def getClosestIspScale(camResolution: Tuple[int, int], """ if width and height: raise ValueError("You have to specify EITHER width OR height to calculate desired ISP scaling options!") - if not width and not height: + if width is None and height is None: raise ValueError("You have to provide width or height calculate desired ISP scaling options!") - minError = 99999 + minError = 999999 ispScale: List[int] = None for ratio, (n, d) in availableIspScales(): newW = int((camResolution[0] * n - 1) / d + 1) @@ -102,7 +99,7 @@ def getClosestIspScale(camResolution: Tuple[int, int], if newW % 2 != 0 or newH % 2 != 0: continue - err = abs((newW - width) if width else (newH - height)) + err = abs((newW - width) if width is not None else (newH - height)) if err < minError: ispScale = [n, d, n, d] minError = err @@ -166,11 +163,28 @@ def setCameraControl(control: dai.CameraControl, # TODO: Add contrast, exposure compensation, brightness, manual exposure, and saturation -def get_sensor_resolution(type: dai.CameraSensorType, width: int, height: int) -> Tuple[Union[dai.ColorCameraProperties.SensorResolution, dai.MonoCameraProperties.SensorResolution], Tuple[int,int]]: - def get_res(resolutions: Dict[Any, Tuple[int,int]]): +def get_max_resolution(node: dai.node, sensor: dai.CameraFeatures) -> Union[ + dai.ColorCameraProperties.SensorResolution, dai.MonoCameraProperties.SensorResolution]: + max_res = None + max_num = 0 + for conf in sensor.configs: + if node == dai.node.ColorCamera and conf.type != dai.CameraSensorType.COLOR: + continue + if node == dai.node.MonoCamera and conf.type != dai.CameraSensorType.MONO: + continue + (res, size) = get_sensor_resolution(conf.type, conf.width, conf.height) + if size[0] * size[1] > max_num: + max_num = size[0] * size[1] + max_res = res + return max_res + + +def get_sensor_resolution(type: dai.CameraSensorType, width: int, height: int) -> Tuple[ + Union[dai.ColorCameraProperties.SensorResolution, dai.MonoCameraProperties.SensorResolution], Tuple[int, int]]: + def get_res(resolutions: Dict[Any, Tuple[int, int]]): for res, (w, h) in resolutions.items(): if width == w and height == h: - return (res, (w,h)) + return (res, (w, h)) if type == dai.CameraSensorType.COLOR: return get_res(colorResolutions) @@ -179,11 +193,12 @@ def get_res(resolutions: Dict[Any, Tuple[int,int]]): else: raise Exception('Camera sensor type unknown!', type) + def get_resolution_size( resolution: Union[ dai.ColorCameraProperties.SensorResolution, dai.MonoCameraProperties.SensorResolution - ]) -> Tuple[int,int]: + ]) -> Tuple[int, int]: if resolution in colorResolutions: return colorResolutions[resolution] elif resolution in monoResolutions: @@ -191,18 +206,19 @@ def get_resolution_size( else: raise Exception('Camera sensor resolution unknown!', resolution) + def getClosesResolution(sensor: dai.CameraFeatures, type: dai.CameraSensorType, width: Optional[int] = None, height: Optional[int] = None, ): if width and height: raise ValueError("You have to specify EITHER width OR height to calculate desired ISP scaling options!") - if not width and not height: + if width is None and height is None: raise ValueError("You have to provide width or height calculate desired ISP scaling options!") - minError = 99999 + minError = 999999 closestRes = None - desired, i = (width, 0) if width else (height, 1) + desired, i = (width, 0) if width is not None else (height, 1) resolutions = [get_sensor_resolution(type, conf.width, conf.height) for conf in sensor.configs if conf.type == type] @@ -215,11 +231,11 @@ def getClosesResolution(sensor: dai.CameraFeatures, def getResize(size: Tuple[int, int], - width: Optional[int] = None, - height: Optional[int] = None) -> Tuple[int, int]: + width: Optional[int] = None, + height: Optional[int] = None) -> Tuple[int, int]: if width and height: raise ValueError("You have to specify EITHER width OR height to calculate desired ISP scaling options!") - if not width and not height: + if width is None and height is None: raise ValueError("You have to provide width or height calculate desired ISP scaling options!") if width: diff --git a/depthai_sdk/src/depthai_sdk/components/component.py b/depthai_sdk/src/depthai_sdk/components/component.py index f49e4bb76..972e74f70 100644 --- a/depthai_sdk/src/depthai_sdk/components/component.py +++ b/depthai_sdk/src/depthai_sdk/components/component.py @@ -1,24 +1,12 @@ -from abc import ABC, abstractmethod -from typing import Optional - +from abc import ABC import depthai as dai -from depthai_sdk.oak_outputs.xout.xout_base import XoutBase, ReplayStream - class Component(ABC): """ SDK component is used as an abstraction to the current DepthAI API node or group of nodes. """ - def forced_openvino_version(self) -> Optional[dai.OpenVINO.Version]: - """ - Checks whether the component forces a specific OpenVINO version. Only used by NNComponent (which overrides this - method). This function is called after Camera has been configured and right before we connect to the OAK camera. - @return: Forced OpenVINO version (optional). - """ - return None - def on_pipeline_started(self, device: dai.Device) -> None: """ This function gets called after the pipeline has been started. It is called from the main thread. @@ -26,23 +14,61 @@ def on_pipeline_started(self, device: dai.Device) -> None: """ pass - def _stream_name_ok(self, pipeline: dai.Pipeline, name: str) -> bool: - # Check if there's already an XLinkOut stream with this name - for node in pipeline.getAllNodes(): - if isinstance(node, dai.node.XLinkOut) and node.getStreamName() == name: - return False - return True - def _create_xout(self, pipeline: dai.Pipeline, xout: XoutBase) -> XoutBase: - for xstream in xout.xstreams(): - if not self._stream_name_ok(pipeline, xstream.name): - continue + # So users can use: + # packets: Dict[Packet] = q.get() + # depthPacket = packets['depth'] + # depthPacket = packets[stereoComp] + def __str__(self): + return self.out.main.__str__() + + def __hash__(self): + return self.__str__().__hash__() + + def __eq__(self, other): + if isinstance(other, Component): + return str(self) == str(other) + elif isinstance(other, str): + return str(self) == other + else: + return False + +class ComponentOutput(ABC): + """ + Output of a component + """ + def __init__(self, component: Component): + """ + If user hasn't specified component's output name, we will + generate one in Xout class + """ + self.name = None + self._comp = component + + def set_name(self, name: str) -> 'ComponentOutput': + """ + Name component's output, which will be used for packet names. If not specified, it + will be generated automatically after pipeline is started (after `oak.start()`) by + combining all Xout Stream names (eg. "6_out;3_out"). + """ + self.name = name + return self + + # So users can use: + # packets: Dict[Packet] = q.get() + # depthPacket = packets['depth'] + # depthPacket = packets[stereoComp.out.depth] + def __str__(self): + return self.name - if isinstance(xstream, ReplayStream): - continue + def __hash__(self): + return self.__str__().__hash__() - xlink = pipeline.createXLinkOut() - xlink.setStreamName(xstream.name) - xstream.stream.link(xlink.input) + def __eq__(self, other): + if isinstance(other, ComponentOutput): + return str(self) == str(other) + elif isinstance(other, str): + return str(self) == other + else: + return False - return xout diff --git a/depthai_sdk/src/depthai_sdk/components/control_camera_with_nn.py b/depthai_sdk/src/depthai_sdk/components/control_camera_with_nn.py index f804f4974..2ec76015b 100644 --- a/depthai_sdk/src/depthai_sdk/components/control_camera_with_nn.py +++ b/depthai_sdk/src/depthai_sdk/components/control_camera_with_nn.py @@ -1,9 +1,12 @@ +from pathlib import Path +from string import Template +from typing import Union, Tuple + import depthai as dai + from depthai_sdk.classes.enum import ResizeMode -from typing import Union, Tuple from depthai_sdk.components.camera_helper import get_resolution_size -from pathlib import Path -from string import Template + def control_camera_with_nn( pipeline: dai.Pipeline, @@ -15,7 +18,7 @@ def control_camera_with_nn( af: bool, ae: bool, debug: bool - ): +): sensor_resolution = get_resolution_size(resolution) # width / height (old ar) sensor_ar = sensor_resolution[0] / sensor_resolution[1] @@ -34,10 +37,9 @@ def control_camera_with_nn( init = f"xmin = 0; ymin = {-cropping}; xmax = 1; ymax = {1 + cropping}" else: init = f"xmin = {cropping}; ymin = 0; xmax = {1 - cropping}; ymax = 1" - else: # Stretch + else: # Stretch init = f"xmin=0; ymin=0; xmax=1; ymax=1" - resize_str = f"new_xmin=xmin+width*det.xmin; new_ymin=ymin+height*det.ymin; new_xmax=xmin+width*det.xmax; new_ymax=ymin+height*det.ymax;" denormalize = f"startx=int(new_xmin*{sensor_resolution[0]}); starty=int(new_ymin*{sensor_resolution[1]}); new_width=int((new_xmax-new_xmin)*{sensor_resolution[0]}); new_height=int((new_ymax-new_ymin)*{sensor_resolution[1]});" control_str = '' @@ -46,7 +48,6 @@ def control_camera_with_nn( if af: control_str += f"control.setAutoFocusRegion(startx, starty, new_width, new_height);" - script_node = pipeline.create(dai.node.Script) script_node.setProcessor(dai.ProcessorType.LEON_CSS) # More stable diff --git a/depthai_sdk/src/depthai_sdk/components/imu_component.py b/depthai_sdk/src/depthai_sdk/components/imu_component.py index 34451f703..ec3a63398 100644 --- a/depthai_sdk/src/depthai_sdk/components/imu_component.py +++ b/depthai_sdk/src/depthai_sdk/components/imu_component.py @@ -2,7 +2,7 @@ import depthai as dai -from depthai_sdk.components.component import Component, XoutBase +from depthai_sdk.components.component import Component, ComponentOutput from depthai_sdk.oak_outputs.xout.xout_base import StreamXout from depthai_sdk.oak_outputs.xout.xout_imu import XoutIMU @@ -14,9 +14,15 @@ def __init__(self, self.out = self.Out(self) super().__init__() + + self.imu_name: str = device.getConnectedIMU() self.node = pipeline.createIMU() + self.fps = 100 self.config_imu() # Default settings, component won't work without them + def get_imu_name(self) -> str: + return self.imu_name + def config_imu(self, sensors: List[dai.IMUSensor] = None, report_rate: int = 100, @@ -43,18 +49,13 @@ def config_imu(self, self.node.setMaxBatchReports(maxBatchReports=max_batch_reports) self.node.enableFirmwareUpdate(enable_firmware_update) + self.fps = report_rate + class Out: + class ImuOut(ComponentOutput): + def __call__(self, device: dai.Device): + return XoutIMU(StreamXout(self._comp.node.out, name='imu'), self._comp.fps).set_comp_out(self) + def __init__(self, imu_component: 'IMUComponent'): - self._comp = imu_component - - def main(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutBase: - """ - Default output. Uses either camera(), replay(), or encoded() depending on the component settings. - """ - return self.text(pipeline, device) - - def text(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutBase: - out = self._comp.node.out - out = StreamXout(self._comp.node.id, out) - imu_out = XoutIMU(out) - return self._comp._create_xout(pipeline, imu_out) + self.main = self.ImuOut(imu_component) + self.text = self.main diff --git a/depthai_sdk/src/depthai_sdk/components/multi_stage_nn.py b/depthai_sdk/src/depthai_sdk/components/multi_stage_nn.py index 183bea93c..f3dcafe45 100644 --- a/depthai_sdk/src/depthai_sdk/components/multi_stage_nn.py +++ b/depthai_sdk/src/depthai_sdk/components/multi_stage_nn.py @@ -2,9 +2,10 @@ from pathlib import Path from string import Template from typing import Tuple, Optional, List -from depthai_sdk.classes.enum import ResizeMode + import depthai as dai +from depthai_sdk.classes.enum import ResizeMode from depthai_sdk.types import GenericNeuralNetwork @@ -45,7 +46,7 @@ def __init__(self, self.init = f"xmin = 0; ymin = {-cropping}; xmax = 1; ymax = {1 + cropping}" else: self.init = f"xmin = {cropping}; ymin = 0; xmax = {1 - cropping}; ymax = 1" - else: # Stretch + else: # Stretch self.init = f"xmin=0; ymin=0; xmax=1; ymax=1" self.script: dai.node.Script = pipeline.create(dai.node.Script) @@ -55,7 +56,7 @@ def __init__(self, detection_node.out.link(self.script.inputs['detections']) high_res_frames.link(self.script.inputs['frames']) - self.configure() # User might later call this again with different parameters + self.configure() # User might later call this again with different parameters self.manip: dai.node.ImageManip = pipeline.create(dai.node.ImageManip) self.manip.initialConfig.setResize(size) @@ -66,13 +67,18 @@ def __init__(self, self.script.outputs['manip_img'].link(self.manip.inputImage) self.out: dai.Node.Output = self.manip.out + self.whitelist_labels: Optional[List[int]] = None + self.scale_bb: Optional[Tuple[int, int]] = None + def configure(self, debug: bool = False, whitelist_labels: Optional[List[int]] = None, scale_bb: Optional[Tuple[int, int]] = None) -> None: """ Args: - config (MultiStageConfig, optional): Configuration object. Defaults to None. + debug (bool, optional): Enable debug mode. Defaults to False. + whitelist_labels (Optional[List[int]], optional): List of labels to keep. Defaults to None. + scale_bb (Optional[Tuple[int, int]], optional): Scale bounding box. Defaults to None. """ # Used later for visualization self.whitelist_labels = whitelist_labels diff --git a/depthai_sdk/src/depthai_sdk/components/nn_component.py b/depthai_sdk/src/depthai_sdk/components/nn_component.py index 5126d9f46..6449f9841 100644 --- a/depthai_sdk/src/depthai_sdk/components/nn_component.py +++ b/depthai_sdk/src/depthai_sdk/components/nn_component.py @@ -5,6 +5,9 @@ from pathlib import Path from typing import Callable, Union, List, Dict +from depthai_sdk.types import NNNode +from depthai_sdk.visualize.bbox import BoundingBox + try: import blobconverter except ImportError: @@ -12,17 +15,17 @@ from depthai_sdk.classes.nn_config import Config from depthai_sdk.components.camera_component import CameraComponent -from depthai_sdk.components.component import Component +from depthai_sdk.components.component import Component, ComponentOutput from depthai_sdk.integrations.roboflow import RoboflowIntegration from depthai_sdk.components.multi_stage_nn import MultiStageNN from depthai_sdk.components.nn_helper import * from depthai_sdk.classes.enum import ResizeMode from depthai_sdk.components.parser import * from depthai_sdk.components.stereo_component import StereoComponent +from depthai_sdk.visualize.visualizer_helper import depth_to_disp_factor from depthai_sdk.oak_outputs.xout.xout_base import StreamXout, XoutBase from depthai_sdk.oak_outputs.xout.xout_frames import XoutFrames from depthai_sdk.oak_outputs.xout.xout_nn import XoutTwoStage, XoutNnResults, XoutSpatialBbMappings, XoutNnData -from depthai_sdk.oak_outputs.xout.xout_nn_encoded import XoutNnMjpeg, XoutNnH26x from depthai_sdk.oak_outputs.xout.xout_tracker import XoutTracker from depthai_sdk.replay import Replay @@ -36,10 +39,9 @@ def __init__(self, nn_type: Optional[str] = None, # Either 'yolo' or 'mobilenet' decode_fn: Optional[Callable] = None, tracker: bool = False, # Enable object tracker - only for Object detection models - spatial: Union[None, bool, StereoComponent] = None, + spatial: Optional[StereoComponent] = None, replay: Optional[Replay] = None, args: Dict = None, # User defined args - name: Optional[str] = None ) -> None: """ Neural Network component abstracts: @@ -56,32 +58,27 @@ def __init__(self, spatial (bool, default False): Enable getting Spatial coordinates (XYZ), only for Obj detectors. Yolo/SSD use on-device spatial calc, others on-host (gen2-calc-spatials-on-host) replay (Replay object): Replay args (Any, optional): Use user defined arguments when constructing the pipeline - name (str, optional): Name of the output stream """ super().__init__() - self.name = name self.out = self.Out(self) self.triggers = defaultdict(list) - self.node: Optional[ - dai.node.NeuralNetwork, - dai.node.MobileNetDetectionNetwork, - dai.node.MobileNetSpatialDetectionNetwork, - dai.node.YoloDetectionNetwork, - dai.node.YoloSpatialDetectionNetwork] = None + self.node: Optional[NNNode] = None # ImageManip used to resize the input to match the expected NN input size self.image_manip: Optional[dai.node.ImageManip] = None self.x_in: Optional[dai.node.XLinkIn] = None # Used for multi-stage pipeline + # Tracker: self.tracker = pipeline.createObjectTracker() if tracker else None - self.apply_tracking_filter = False + self.apply_tracking_filter = True # Enable by default + self.calculate_speed = True self.forget_after_n_frames = None - self.calculate_speed = False # Private properties self._ar_resize_mode: ResizeMode = ResizeMode.LETTERBOX # Default - self._input: Union[CameraComponent, 'NNComponent', dai.Node.Output] = input # Input to the NNComponent node passed on initialization + # Input to the NNComponent node passed on initialization + self._input: Union[CameraComponent, 'NNComponent', dai.Node.Output] = input self._stream_input: dai.Node.Output # Node Output that will be used as the input for this NNComponent self._blob: Optional[dai.OpenVINO.Blob] = None @@ -97,7 +94,7 @@ def __init__(self, self._input_queue = Optional[None] # Input queue for multi-stage pipeline - self._spatial: Optional[Union[bool, StereoComponent]] = spatial + self._spatial: Optional[StereoComponent] = spatial self._replay: Optional[Replay] = replay # Replay module # For visualizer @@ -141,11 +138,8 @@ def __init__(self, # Creates ImageManip node that resizes the input to match the expected NN input size. # DepthAI uses CHW (Planar) channel layout and BGR color order convention. self.image_manip = pipeline.createImageManip() - self.image_manip.setFrameType(dai.RawImgFrame.Type.BGR888p) - self.image_manip.setMaxOutputFrameSize(self._size[0] * self._size[1] * 3) - self.image_manip.inputImage.setBlocking(False) - self.image_manip.inputImage.setQueueSize(2) - self._ar_resize_mode = ResizeMode.LETTERBOX # Default + # Configures ImageManip node. Letterbox by default + self._change_resize_mode(ResizeMode.LETTERBOX) if isinstance(self._input, CameraComponent): self._stream_input = self._input.stream @@ -157,15 +151,15 @@ def __init__(self, self._stream_input.link(self.image_manip.inputImage) # Link ImageManip output to NN node self.image_manip.out.link(self.node.input) - elif self._is_multi_stage(): + elif self.is_multi_stage(): # Here, ImageManip will only crop the high-res frame to correct aspect ratio # (without resizing!) and it also acts as a buffer (by default, its pool size is set to 20). self.image_manip = pipeline.createImageManip() self.image_manip.setNumFramesPool(20) self._input._stream_input.link(self.image_manip.inputImage) - frame_full_size = self._input._input.stream_size + frame_full_size = self._get_input_frame_size() - if self._input._is_detector(): + if self._input.is_detector(): self.image_manip.setMaxOutputFrameSize(frame_full_size[0] * frame_full_size[1] * 3) # Create script node, get HQ frames from input. @@ -208,23 +202,15 @@ def __init__(self, ) if self._spatial: - if isinstance(self._spatial, bool): # Create new StereoComponent - self._spatial = StereoComponent(device, pipeline, args=self._args, replay=self._replay) - if isinstance(self._spatial, StereoComponent): - self._stereo_node: dai.node.StereoDepth = self._spatial.node - self._spatial.depth.link(self.node.inputDepth) - self._spatial.config_stereo(align=self._input) + self._stereo_node: dai.node.StereoDepth = self._spatial.node + self._spatial.depth.link(self.node.inputDepth) + self._spatial.config_stereo(align=self._input) # Configure Spatial Detection Network if self._args: - if self._is_spatial(): + if self.is_spatial(): self._config_spatials_args(self._args) - def forced_openvino_version(self) -> Optional[dai.OpenVINO.Version]: - # TODO: remove this once 2.23 is released, and just reset the ImageManip. - self._change_resize_mode(self._ar_resize_mode) - return None - def get_name(self): model = self._config.get('model', None) if model is not None: @@ -274,9 +260,9 @@ def _parse_node_type(self, nn_type: str) -> None: self._node_type = dai.node.NeuralNetwork if nn_type: if nn_type.upper() == 'YOLO': - self._node_type = dai.node.YoloSpatialDetectionNetwork if self._is_spatial() else dai.node.YoloDetectionNetwork + self._node_type = dai.node.YoloSpatialDetectionNetwork if self.is_spatial() else dai.node.YoloDetectionNetwork elif nn_type.upper() == 'MOBILENET': - self._node_type = dai.node.MobileNetSpatialDetectionNetwork if self._is_spatial() else dai.node.MobileNetDetectionNetwork + self._node_type = dai.node.MobileNetSpatialDetectionNetwork if self.is_spatial() else dai.node.MobileNetDetectionNetwork def _config_spatials_args(self, args): if not isinstance(args, Dict): @@ -353,24 +339,27 @@ def _blob_from_config(self, model: Dict, version: Union[None, str, dai.OpenVINO. Gets the blob from the config file. """ if isinstance(version, dai.OpenVINO.Version): - vals = str(version).split('_') - version = f"{vals[1]}.{vals[2]}" + version = str(version) + if isinstance(version, str): + if version.startswith('VERSION_'): + version = version[8:] + if '_' in version: + vals = version.split('_') + version = f'{vals[0]}.{vals[1]}' if 'model_name' in model: # Use blobconverter to download the model zoo_type = model.get("zoo", 'intel') return blobconverter.from_zoo(model['model_name'], zoo_type=zoo_type, shaves=6, # TODO: Calculate ideal shave amount - version=version - ) + version=version) if 'xml' in model and 'bin' in model: return blobconverter.from_openvino(xml=model['xml'], bin=model['bin'], data_type="FP16", # Myriad X shaves=6, # TODO: Calculate ideal shave amount - version=version - ) + version=version) raise ValueError("Specified `model` values in json config files are incorrect!") @@ -381,14 +370,17 @@ def _change_resize_mode(self, mode: ResizeMode) -> None: Args: mode (ResizeMode): Resize mode to use """ - if self._is_multi_stage(): + if self.is_multi_stage(): return # We need high-res frames for multi-stage NN, so we can crop them later self._ar_resize_mode = mode - # TODO: uncomment this when depthai 2.21.3 is released. In some cases (eg. - # setting first crop, then letterbox), the last config isn't used. - # self.image_manip.initialConfig.set(dai.RawImageManipConfig()) + # Reset ImageManip node config + self.image_manip.initialConfig.set(dai.RawImageManipConfig()) + self.image_manip.setFrameType(dai.RawImgFrame.Type.BGR888p) + self.image_manip.setMaxOutputFrameSize(self._size[0] * self._size[1] * 3) + self.image_manip.inputImage.setBlocking(False) + self.image_manip.inputImage.setQueueSize(2) if self._ar_resize_mode == ResizeMode.CROP: self.image_manip.initialConfig.setResize(self._size) @@ -417,7 +409,7 @@ def config_multistage_nn(self, scale_bb (Tuple[int, int], optional): Scale detection bounding boxes (x, y) before cropping the frame. In %. num_frame_pool (int, optional): Number of frames to pool for inference. If None, will use the default value. """ - if not self._is_multi_stage(): + if not self.is_multi_stage(): logging.warning("Input to this model was not a NNComponent, so 2-stage NN inferencing isn't possible!" "This configuration attempt will be ignored.") return @@ -456,7 +448,7 @@ def config_tracker(self, apply_tracking_filter: Optional[bool] = None, forget_after_n_frames: Optional[int] = None, calculate_speed: Optional[bool] = None - ): + ) -> None: """ Configure Object Tracker node (if it's enabled). @@ -503,11 +495,11 @@ def config_tracker(self, if calculate_speed is not None: self.calculate_speed = calculate_speed - def config_yolo_from_metadata(self, metadata: Dict): + def config_yolo_from_metadata(self, metadata: Dict) -> None: """ Configures (Spatial) Yolo Detection Network node with a dictionary. Calls config_yolo(). """ - return self.config_yolo( + self.config_yolo( num_classes=metadata['classes'], coordinate_size=metadata['coordinates'], anchors=metadata['anchors'], @@ -527,7 +519,7 @@ def config_yolo(self, """ Configures (Spatial) Yolo Detection Network node. """ - if not self._is_yolo(): + if not self.is_yolo(): logging.warning('This is not a YOLO detection network! This configuration attempt will be ignored.') return @@ -545,7 +537,8 @@ def config_yolo(self, def config_nn(self, conf_threshold: Optional[float] = None, - resize_mode: Union[ResizeMode, str] = None): + resize_mode: Union[ResizeMode, str] = None + ) -> None: """ Configures the Detection Network node. @@ -555,17 +548,17 @@ def config_nn(self, """ if resize_mode: self._ar_resize_mode = ResizeMode.parse(resize_mode) - # TODO: After 2.23 is released, uncomment this - # self._change_resize_mode(self._ar_resize_mode) + self._change_resize_mode(self._ar_resize_mode) - if conf_threshold is not None and self._is_detector(): + if conf_threshold is not None and self.is_detector(): self.node.setConfidenceThreshold(conf_threshold) def config_spatial(self, bb_scale_factor: Optional[float] = None, lower_threshold: Optional[int] = None, upper_threshold: Optional[int] = None, - calc_algo: Optional[dai.SpatialLocationCalculatorAlgorithm] = None): + calc_algo: Optional[dai.SpatialLocationCalculatorAlgorithm] = None + ) -> None: """ Configures the Spatial Detection Network node. @@ -575,7 +568,7 @@ def config_spatial(self, upper_threshold (int, optional): Specifies upper threshold in depth units (millimeter by default) for depth values which will used to calculate spatial data calc_algo (dai.SpatialLocationCalculatorAlgorithm, optional): Specifies spatial location calculator algorithm: Average/Min/Max """ - if not self._is_spatial(): + if not self.is_spatial(): logging.warning('This is not a Spatial Detection network! This configuration attempt will be ignored.') return @@ -588,247 +581,218 @@ def config_spatial(self, if calc_algo: self.node.setSpatialCalculationAlgorithm(calc_algo) - def _update_config(self): + def _update_config(self) -> None: if self.node is None or self._config is None: return nn_config = self._config.get("nn_config", {}) meta = nn_config.get('NN_specific_metadata', None) - if self._is_yolo() and meta: + if self.is_yolo() and meta: self.config_yolo_from_metadata(metadata=meta) self.config_nn(conf_threshold=nn_config.get('conf_threshold', None)) + def _get_camera_comp(self) -> CameraComponent: + if self.is_multi_stage(): + return self._input._get_camera_comp() + return self._input + + def _get_input_frame_size(self) -> Tuple[int, int]: + # TODO: if user passes node output as the NN input (eg. examples/mixed/switch_between_models.py), + # this function will fail + return self._get_camera_comp().stream_size + + # + def get_bbox(self) -> BoundingBox: + if self.is_multi_stage(): + return self._input.get_bbox() + else: + try: + stream_size = self._get_input_frame_size() + old_ar = stream_size[0] / stream_size[1] + new_ar = self._size[0] / self._size[1] + return BoundingBox().resize_to_aspect_ratio(old_ar, new_ar, self._ar_resize_mode) + except (AttributeError, ZeroDivisionError, ValueError): + return BoundingBox() + """ Available outputs (to the host) of this component """ class Out: - def __init__(self, nn_component: 'NNComponent'): - self._comp = nn_component - def main(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutBase: + class MainOut(ComponentOutput): """ Default output. Streams NN results and high-res frames that were downscaled and used for inferencing. Produces DetectionPacket or TwoStagePacket (if it's 2. stage NNComponent). """ - if self._comp._is_multi_stage(): - input_nn = self._comp._input - if input_nn._input.encoder: - return self.encoded(pipeline=pipeline, device=device) - elif self._comp._input.encoder: - return self.encoded(pipeline=pipeline, device=device) - - if self._comp._is_multi_stage(): - det_nn_out = StreamXout(id=self._comp._input.node.id, - out=self._comp._input.node.out, - name=self._comp._input.name) - second_nn_out = StreamXout(id=self._comp.node.id, out=self._comp.node.out, name=self._comp.name) - - out = XoutTwoStage(det_nn=self._comp._input, - second_nn=self._comp, - frames=self._comp._input._input.get_stream_xout(), - det_out=det_nn_out, - second_nn_out=second_nn_out, - device=device, - input_queue_name="input_queue" if self._comp.x_in else None) - else: - det_nn_out = StreamXout(id=self._comp.node.id, out=self._comp.node.out, name=self._comp.name) - input_stream = self._comp._stream_input - out = XoutNnResults(det_nn=self._comp, - frames=StreamXout(id=input_stream.getParent().id, - out=input_stream, - name=self._comp.name), - nn_results=det_nn_out) - return self._comp._create_xout(pipeline, out) - - def passthrough(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutBase: - """ - Default output. Streams NN results and passthrough frames (frames used for inferencing) - Produces DetectionPacket or TwoStagePacket (if it's 2. stage NNComponent). - """ - if self._comp._is_multi_stage(): - det_nn_out = StreamXout(id=self._comp._input.node.id, - out=self._comp._input.node.out, - name=self._comp._input.name) - frames = StreamXout(id=self._comp._input.node.id, - out=self._comp._input.node.passthrough, - name=self._comp.name) - second_nn_out = StreamXout(self._comp.node.id, self._comp.node.out, name=self._comp.name) - - out = XoutTwoStage(det_nn=self._comp._input, - second_nn=self._comp, - frames=frames, - det_out=det_nn_out, - second_nn_out=second_nn_out, + def __call__(self, device: dai.Device, fourcc: Optional[str] = None) -> XoutBase: + if self._comp.is_multi_stage(): + det_nn_out = StreamXout(out=self._comp._input.node.out) + second_nn_out = StreamXout(out=self._comp.node.out) + + return XoutTwoStage(det_nn=self._comp._input, + second_nn=self._comp, + frames=self._comp._input._input.get_stream_xout(), + det_out=det_nn_out, + second_nn_out=second_nn_out, + device=device, + input_queue_name="input_queue" if self._comp.x_in else None, + bbox=self._comp.get_bbox()).set_fourcc(fourcc).set_comp_out(self) + else: + # TODO: refactor. This is a bit hacky, as we want to support passing node output as the input + # to the NNComponent. In such case, we don't have access to VideoEnc (inside CameraComponent) + det_nn_out = StreamXout(out=self._comp.node.out) + input_stream = self._comp._stream_input + if fourcc is None: + frame_stream = StreamXout(out=input_stream) + else: + frame_stream = self._comp._get_camera_comp().get_stream_xout(fourcc) + return XoutNnResults(det_nn=self._comp, + frames=frame_stream, + nn_results=det_nn_out, + bbox=self._comp.get_bbox()).set_fourcc(fourcc).set_comp_out(self) + + class PassThroughOut(ComponentOutput): + def __call__(self, device: dai.Device) -> XoutBase: + """ + Default output. Streams NN results and passthrough frames (frames used for inferencing) + Produces DetectionPacket or TwoStagePacket (if it's 2. stage NNComponent). + """ + if self._comp.is_multi_stage(): + return XoutTwoStage(det_nn=self._comp._input, + second_nn=self._comp, + frames=StreamXout(out=self._comp._input.node.passthrough), + det_out=StreamXout(out=self._comp._input.node.out), + second_nn_out=StreamXout(self._comp.node.out), + device=device, + input_queue_name="input_queue" if self._comp.x_in else None, + bbox=self._comp.get_bbox()).set_comp_out(self) + else: + return XoutNnResults(det_nn=self._comp, + frames=StreamXout(out=self._comp.node.passthrough), + nn_results=StreamXout(out=self._comp.node.out), + bbox=BoundingBox() + ).set_comp_out(self) + + class ImgManipOut(ComponentOutput): + def __call__(self, device: dai.Device) -> XoutBase: + return XoutFrames(StreamXout(out=self._comp.image_manip.out)).set_comp_out(self) + + class InputOut(ComponentOutput): + def __call__(self, device: dai.Device) -> XoutBase: + return XoutFrames(StreamXout(out=self._comp._stream_input)).set_comp_out(self) + + class SpatialOut(ComponentOutput): + def __call__(self, device: dai.Device) -> XoutSpatialBbMappings: + """ + Streams depth and bounding box mappings (``SpatialDetectionNework.boundingBoxMapping``). Produces SpatialBbMappingPacket. + """ + if not self._comp.is_spatial(): + raise Exception('SDK tried to output spatial data (depth + bounding box mappings),' + 'but this is not a Spatial Detection network!') + + return XoutSpatialBbMappings( + device=device, + stereo=self._comp._stereo_node, + frames=StreamXout(out=self._comp.node.passthroughDepth), + configs=StreamXout(out=self._comp.node.out), + dispScaleFactor=depth_to_disp_factor(device, self._comp._stereo_node), + bbox=self._comp.get_bbox() + ).set_comp_out(self) + + class TwoStageOut(ComponentOutput): + def __call__(self, device: dai.Device) -> XoutFrames: + """ + Streams 2. stage cropped frames to the host. Produces FramePacket. + """ + if not self._comp.is_multi_stage(): + raise Exception( + 'SDK tried to output TwoStage crop frames, but this is not a Two-Stage NN component!') + + return XoutFrames(frames=StreamXout(out=self._comp._multi_stage_nn.manip.out)).set_comp_out(self) + + class TrackerOut(ComponentOutput): + def __call__(self, device: dai.Device) -> XoutTracker: + """ + Streams ObjectTracker tracklets and high-res frames that were downscaled and used for inferencing. Produces TrackerPacket. + """ + if not self._comp.is_tracker(): + raise Exception('Tracker was not enabled! Enable with cam.create_nn("[model]", tracker=True)!') + + self._comp.node.passthrough.link(self._comp.tracker.inputDetectionFrame) + self._comp.node.out.link(self._comp.tracker.inputDetections) + + # TODO: add support for full frame tracking + self._comp.node.passthrough.link(self._comp.tracker.inputTrackerFrame) + + return XoutTracker(det_nn=self._comp, + frames=self._comp._input.get_stream_xout(), # CameraComponent device=device, - input_queue_name="input_queue" if self._comp.x_in else None) - else: - det_nn_out = StreamXout(id=self._comp.node.id, out=self._comp.node.out, name=self._comp.name) - frames = StreamXout(id=self._comp.node.id, out=self._comp.node.passthrough, name=self._comp.name) - - out = XoutNnResults(det_nn=self._comp, - frames=frames, - nn_results=det_nn_out) - - return self._comp._create_xout(pipeline, out) + tracklets=StreamXout(self._comp.tracker.out), + bbox=self._comp.get_bbox(), + apply_kalman=self._comp.apply_tracking_filter, + forget_after_n_frames=self._comp.forget_after_n_frames, + calculate_speed=self._comp.calculate_speed, + ).set_comp_out(self) + + class EncodedOut(MainOut): + def __call__(self, device: dai.Device) -> XoutNnResults: + """ + Streams NN results and encoded frames (frames used for inferencing) + Produces DetectionPacket or TwoStagePacket (if it's 2. stage NNComponent). + """ + # A bit hacky, maybe we can remove this alltogether + return super().__call__(device, fourcc=self._comp._get_camera_comp().get_fourcc()) + + class NnDataOut(ComponentOutput): + def __call__(self, device: dai.Device) -> XoutNnData: + node_output = self._comp.node.out if \ + type(self._comp.node) == dai.node.NeuralNetwork else \ + self._comp.node.outNetwork + + return XoutNnData(xout=StreamXout(node_output)).set_comp_out(self) - def image_manip(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutBase: - out = XoutFrames(frames=StreamXout(id=self._comp.image_manip.id, - out=self._comp.image_manip.out, - name=self._comp.name)) - return self._comp._create_xout(pipeline, out) - - def input(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutBase: - out = XoutFrames(frames=StreamXout(id=self._comp._input.node.id, - out=self._comp._stream_input, - name=self._comp.name)) - return self._comp._create_xout(pipeline, out) - - def spatials(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutSpatialBbMappings: - """ - Streams depth and bounding box mappings (``SpatialDetectionNework.boundingBoxMapping``). Produces SpatialBbMappingPacket. - """ - if not self._comp._is_spatial(): - raise Exception('SDK tried to output spatial data (depth + bounding box mappings),' - 'but this is not a Spatial Detection network!') - - out = XoutSpatialBbMappings( - device=device, - stereo=self._comp._stereo_node, - frames=StreamXout(id=self._comp.node.id, out=self._comp.node.passthroughDepth, name=self._comp.name), - configs=StreamXout(id=self._comp.node.id, out=self._comp.node.out, name=self._comp.name) - ) - - return self._comp._create_xout(pipeline, out) - - def twostage_crops(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutFrames: - """ - Streams 2. stage cropped frames to the host. Produces FramePacket. - """ - if not self._comp._is_multi_stage(): - raise Exception('SDK tried to output TwoStage crop frames, but this is not a Two-Stage NN component!') - - out = XoutFrames(frames=StreamXout(id=self._comp._multi_stage_nn.manip.id, - out=self._comp._multi_stage_nn.manip.out, - name=self._comp.name)) - - return self._comp._create_xout(pipeline, out) - - def tracker(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutTracker: - """ - Streams ObjectTracker tracklets and high-res frames that were downscaled and used for inferencing. Produces TrackerPacket. - """ - if not self._comp._is_tracker(): - raise Exception('Tracker was not enabled! Enable with cam.create_nn("[model]", tracker=True)!') - - self._comp.node.passthrough.link(self._comp.tracker.inputDetectionFrame) - self._comp.node.out.link(self._comp.tracker.inputDetections) - - # TODO: add support for full frame tracking - self._comp.node.passthrough.link(self._comp.tracker.inputTrackerFrame) - - out = XoutTracker(det_nn=self._comp, - frames=self._comp._input.get_stream_xout(), # CameraComponent - device=device, - tracklets=StreamXout(self._comp.tracker.id, self._comp.tracker.out), - apply_kalman=self._comp.apply_tracking_filter, - forget_after_n_frames=self._comp.forget_after_n_frames, - calculate_speed=self._comp.calculate_speed) - - return self._comp._create_xout(pipeline, out) - - def encoded(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutNnResults: - """ - Streams NN results and encoded frames (frames used for inferencing) - Produces DetectionPacket or TwoStagePacket (if it's 2. stage NNComponent). - """ - if self._comp._is_multi_stage(): - input_nn = self._comp._input - - if input_nn._input.encoder is None: - raise Exception('Encoder not enabled for the input') - - det_nn_out = StreamXout(id=self._comp._input.node.id, - out=self._comp._input.node.out, - name=self._comp._input.name) - frames = StreamXout(id=input_nn._input.encoder.id, - out=input_nn._input.encoder.bitstream, - name=self._comp.name) - second_nn_out = StreamXout(self._comp.node.id, self._comp.node.out, name=self._comp.name) - - out = XoutTwoStage(det_nn=self._comp._input, - second_nn=self._comp, - frames=frames, - det_out=det_nn_out, - second_nn_out=second_nn_out, - device=device, - input_queue_name="input_queue" if self._comp.x_in else None) - - return self._comp._create_xout(pipeline, out) - - if self._comp._input.encoder is None: - raise Exception('Encoder not enabled for the input') - - if self._comp._input._encoder_profile == dai.VideoEncoderProperties.Profile.MJPEG: - out = XoutNnMjpeg( - det_nn=self._comp, - frames=StreamXout(self._comp._input.encoder.id, self._comp._input.encoder.bitstream), - nn_results=StreamXout(self._comp.node.id, self._comp.node.out), - color=self._comp._input.is_color(), - lossless=self._comp._input.encoder.getLossless(), - fps=self._comp._input.encoder.getFrameRate(), - frame_shape=self._comp._input.stream_size - ) - else: - out = XoutNnH26x( - det_nn=self._comp, - frames=StreamXout(self._comp._input.node.id, self._comp._input.encoder.bitstream), - nn_results=StreamXout(self._comp.node.id, self._comp.node.out), - color=self._comp._input.is_color(), - profile=self._comp._input._encoder_profile, - fps=self._comp._input.encoder.getFrameRate(), - frame_shape=self._comp._input.stream_size - ) - - return self._comp._create_xout(pipeline, out) - - def nn_data(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutNnData: - if type(self._comp.node) == dai.node.NeuralNetwork: - out = XoutNnData(xout=StreamXout(self._comp.node.id, self._comp.node.out)) - else: - out = XoutNnData(xout=StreamXout(self._comp.node.id, self._comp.node.outNetwork)) - return self._comp._create_xout(pipeline, out) + def __init__(self, nn_component: 'NNComponent'): + self.main = self.MainOut(nn_component) + self.passthrough = self.PassThroughOut(nn_component) + self.image_manip = self.ImgManipOut(nn_component) + self.input = self.InputOut(nn_component) + self.spatials = self.SpatialOut(nn_component) + self.twostage_crops = self.TwoStageOut(nn_component) + self.tracker = self.TrackerOut(nn_component) + self.encoded = self.EncodedOut(nn_component) + self.nn_data = self.NnDataOut(nn_component) # Checks - def _is_spatial(self) -> bool: + def is_spatial(self) -> bool: return self._spatial is not None # todo fix if spatial is bool and equals to False - def _is_tracker(self) -> bool: + def is_tracker(self) -> bool: # Currently, only object detectors are supported - return self._is_detector() and self.tracker is not None + return self.is_detector() and self.tracker is not None - def _is_yolo(self) -> bool: + def is_yolo(self) -> bool: return ( self._node_type == dai.node.YoloDetectionNetwork or self._node_type == dai.node.YoloSpatialDetectionNetwork ) - def _is_mobile_net(self) -> bool: + def is_mobile_net(self) -> bool: return ( self._node_type == dai.node.MobileNetDetectionNetwork or self._node_type == dai.node.MobileNetSpatialDetectionNetwork ) - def _is_detector(self) -> bool: + def is_detector(self) -> bool: """ Currently these 2 object detectors are supported """ - return self._is_yolo() or self._is_mobile_net() + return self.is_yolo() or self.is_mobile_net() - def _is_multi_stage(self): + def is_multi_stage(self): if not isinstance(self._input, type(self)): return False diff --git a/depthai_sdk/src/depthai_sdk/components/nn_helper.py b/depthai_sdk/src/depthai_sdk/components/nn_helper.py index 5b82be46e..ef2ebbb18 100644 --- a/depthai_sdk/src/depthai_sdk/components/nn_helper.py +++ b/depthai_sdk/src/depthai_sdk/components/nn_helper.py @@ -1,9 +1,9 @@ import importlib -from pathlib import Path import os -from typing import Dict, Union, Optional, Tuple +from pathlib import Path +from typing import Dict, Union + import requests -import depthai as dai BLOBS_PATH = Path.home() / Path('.cache/blobs') diff --git a/depthai_sdk/src/depthai_sdk/components/parser.py b/depthai_sdk/src/depthai_sdk/components/parser.py index ed41296f6..fc1c2f018 100644 --- a/depthai_sdk/src/depthai_sdk/components/parser.py +++ b/depthai_sdk/src/depthai_sdk/components/parser.py @@ -30,6 +30,22 @@ def rgb_resolution(resolution: Union[ return dai.ColorCameraProperties.SensorResolution.THE_1080_P +def encoder_profile_to_fourcc(profile: dai.VideoEncoderProperties.Profile) -> str: + """ + Converts encoder profile to fourcc string + """ + if profile == dai.VideoEncoderProperties.Profile.MJPEG: + return 'mjpeg' + elif profile == dai.VideoEncoderProperties.Profile.H265_MAIN: + return 'hevc' + elif profile in [dai.VideoEncoderProperties.Profile.H264_BASELINE, + dai.VideoEncoderProperties.Profile.H264_HIGH, + dai.VideoEncoderProperties.Profile.H264_MAIN + ]: + return 'h264' + raise ValueError(f'Unknown encoder profile: {profile}') + + def mono_resolution(resolution: Union[ None, str, dai.MonoCameraProperties.SensorResolution]) -> dai.MonoCameraProperties.SensorResolution: """ @@ -70,6 +86,14 @@ def parse_bool(value: str) -> bool: raise ValueError(f"Couldn't parse '{value}' to bool!") +def get_first_color_cam(device: dai.Device) -> dai.CameraBoardSocket: + for cam in device.getConnectedCameraFeatures(): + if cam.supportedTypes[0] == dai.CameraSensorType.COLOR: + return cam.socket + # Default + return None + + def parse_camera_socket(value: Union[str, dai.CameraBoardSocket]) -> dai.CameraBoardSocket: if isinstance(value, dai.CameraBoardSocket): return value @@ -94,6 +118,7 @@ def parse_camera_socket(value: Union[str, dai.CameraBoardSocket]) -> dai.CameraB else: raise ValueError(f"Camera socket name '{value}' not supported!") + def parse_usb_speed(speed: Union[None, str, dai.UsbSpeed]) -> Optional[dai.UsbSpeed]: if speed is None: return None diff --git a/depthai_sdk/src/depthai_sdk/components/pointcloud_component.py b/depthai_sdk/src/depthai_sdk/components/pointcloud_component.py index 8ed847272..7e806ffc7 100644 --- a/depthai_sdk/src/depthai_sdk/components/pointcloud_component.py +++ b/depthai_sdk/src/depthai_sdk/components/pointcloud_component.py @@ -1,12 +1,9 @@ -import logging -import warnings -from typing import Optional, Union, Any, Dict, Tuple +from typing import Optional, Union, Any -import cv2 import depthai as dai -import numpy as np + from depthai_sdk.components.camera_component import CameraComponent -from depthai_sdk.components.component import Component +from depthai_sdk.components.component import Component, ComponentOutput from depthai_sdk.components.stereo_component import StereoComponent from depthai_sdk.oak_outputs.xout.xout_base import XoutBase, StreamXout from depthai_sdk.oak_outputs.xout.xout_pointcloud import XoutPointcloud @@ -21,34 +18,23 @@ def __init__(self, stereo: Union[None, StereoComponent, dai.node.StereoDepth, dai.Node.Output] = None, colorize: Optional[CameraComponent] = None, replay: Optional[Replay] = None, - args: Any = None, - name: Optional[str] = None): + args: Any = None): """ Args: pipeline (dai.Pipeline): DepthAI pipeline replay (Replay object, optional): Replay args (Any, optional): Use user defined arguments when constructing the pipeline - name (str, optional): Name of the output stream """ super().__init__() self.out = self.Out(self) self.stereo_depth_node: dai.node.StereoDepth - self.depth: dai.Node.Output # Depth node output + self.depth: dai.Node.Output # Depth node output self.colorize_comp: Optional[CameraComponent] = colorize - self.name = name - self._replay: Optional[Replay] = replay - # Colorization aspect - if colorize is None: - self.colorize_comp = CameraComponent(device, pipeline, source='color', replay=replay, args=args) - - if isinstance(self.colorize_comp, CameraComponent): - self.colorize_comp.config_color_camera(isp_scale=(2,5)) - # Depth aspect if stereo is None: stereo = StereoComponent(device, pipeline, replay=replay, args=args) @@ -62,8 +48,8 @@ def __init__(self, config.postProcessing.spatialFilter.enable = True config.postProcessing.spatialFilter.holeFillingRadius = 2 config.postProcessing.spatialFilter.numIterations = 1 - config.postProcessing.thresholdFilter.minRange = 400 # 40cm - config.postProcessing.thresholdFilter.maxRange = 20000 # 20m + config.postProcessing.thresholdFilter.minRange = 400 # 40cm + config.postProcessing.thresholdFilter.maxRange = 20000 # 20m config.postProcessing.decimationFilter.decimationFactor = 2 config.postProcessing.decimationFilter.decimationMode = dai.RawStereoDepthConfig.PostProcessing.DecimationFilter.DecimationMode.NON_ZERO_MEDIAN stereo.node.initialConfig.set(config) @@ -82,31 +68,22 @@ def __init__(self, self.stereo_depth_node = stereo.getParent() self.depth = stereo - - def config_postprocessing(self, - ) -> None: + def config_postprocessing(self) -> None: """ Configures postprocessing options. - - Args: """ - pass + raise NotImplementedError("config_postprocessing() not yet implemented") class Out: + class PointcloudOut(ComponentOutput): + def __call__(self, device: dai.Device) -> XoutBase: + colorize = None + if self._comp.colorize_comp is not None: + colorize = StreamXout(self._comp.colorize_comp.stream, name="Color") + return XoutPointcloud(device, + StreamXout(self._comp.depth), + color_frames=colorize).set_comp_out(self) + def __init__(self, component: 'PointcloudComponent'): - self._comp = component - - def main(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutBase: - return self.pointcloud(pipeline, device) - - def pointcloud(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutBase: - colorize = None - if self._comp.colorize_comp is not None: - colorize = StreamXout(self._comp.colorize_comp.node.id, self._comp.colorize_comp.stream, name="Color") - - out = XoutPointcloud(device, - StreamXout(self._comp.stereo_depth_node.id, self._comp.depth, name=self._comp.name), - color_frames=colorize, - fps=30 - ) - return self._comp._create_xout(pipeline, out) + self.pointcloud = self.PointcloudOut(component) + self.main = self.pointcloud diff --git a/depthai_sdk/src/depthai_sdk/components/pointcloud_helper.py b/depthai_sdk/src/depthai_sdk/components/pointcloud_helper.py index 7e3cac42f..56903452a 100644 --- a/depthai_sdk/src/depthai_sdk/components/pointcloud_helper.py +++ b/depthai_sdk/src/depthai_sdk/components/pointcloud_helper.py @@ -1,5 +1,6 @@ -import numpy as np import depthai as dai +import numpy as np + def create_xyz(device: dai.Device, width: int, height: int): calibData = device.readCalibration() @@ -29,4 +30,3 @@ def create_xyz(device: dai.Device, width: int, height: int): xyz = np.stack([x_coord, y_coord], axis=-1) return np.pad(xyz, ((0, 0), (0, 0), (0, 1)), "constant", constant_values=1.0) - diff --git a/depthai_sdk/src/depthai_sdk/components/stereo_component.py b/depthai_sdk/src/depthai_sdk/components/stereo_component.py index 7367c34b5..2b9d7de72 100644 --- a/depthai_sdk/src/depthai_sdk/components/stereo_component.py +++ b/depthai_sdk/src/depthai_sdk/components/stereo_component.py @@ -7,17 +7,15 @@ import depthai as dai import numpy as np -from depthai_sdk.components.camera_component import CameraComponent +from depthai_sdk.components.camera_component import CameraComponent, ComponentOutput from depthai_sdk.components.component import Component -from depthai_sdk.components.parser import parse_median_filter, parse_encode +from depthai_sdk.components.parser import parse_median_filter, parse_encode, encoder_profile_to_fourcc from depthai_sdk.components.stereo_control import StereoControl from depthai_sdk.components.undistort import _get_mesh from depthai_sdk.oak_outputs.xout.xout_base import XoutBase, StreamXout -from depthai_sdk.oak_outputs.xout.xout_depth import XoutDepth +from depthai_sdk.oak_outputs.xout.xout_depth import XoutDisparityDepth from depthai_sdk.oak_outputs.xout.xout_disparity import XoutDisparity from depthai_sdk.oak_outputs.xout.xout_frames import XoutFrames -from depthai_sdk.oak_outputs.xout.xout_h26x import XoutH26x -from depthai_sdk.oak_outputs.xout.xout_mjpeg import XoutMjpeg from depthai_sdk.replay import Replay from depthai_sdk.visualize.configs import StereoColor from depthai_sdk.visualize.visualizer_helper import depth_to_disp_factor @@ -45,25 +43,20 @@ def disparity(self) -> dai.Node.Output: def __init__(self, device: dai.Device, pipeline: dai.Pipeline, - resolution: Union[None, str, dai.MonoCameraProperties.SensorResolution] = None, - fps: Optional[float] = None, - left: Union[None, CameraComponent, dai.node.MonoCamera] = None, # Left mono camera - right: Union[None, CameraComponent, dai.node.MonoCamera] = None, # Right mono camera + left: Union[CameraComponent, dai.node.MonoCamera], # Left stereo camera + right: Union[CameraComponent, dai.node.MonoCamera], # Right stereo camera replay: Optional[Replay] = None, args: Any = None, - name: Optional[str] = None, encode: Union[None, str, bool, dai.VideoEncoderProperties.Profile] = None): """ Args: - pipeline (dai.Pipeline): DepthAI pipeline - resolution (str/SensorResolution): If monochrome cameras aren't already passed, create them and set specified resolution - fps (float): If monochrome cameras aren't already passed, create them and set specified FPS - left (None / dai.None.Output / CameraComponent): Left mono camera source. Will get handled by Camera object. - right (None / dai.None.Output / CameraComponent): Right mono camera source. Will get handled by Camera object. - replay (Replay object, optional): Replay - args (Any, optional): Use user defined arguments when constructing the pipeline - name (str, optional): Name of the output stream - encode (str/bool/Profile, optional): Encode the output stream + device (dai.Device): DepthAI device. + pipeline (dai.Pipeline): DepthAI pipeline. + left (dai.None.Output / CameraComponent): Left mono camera source. Will get handled by Camera object. + right (dai.None.Output / CameraComponent): Right mono camera source. Will get handled by Camera object. + replay (Replay object, optional): Replay object to use for playback. + args (Any, optional): Use user defined arguments when constructing the pipeline. + encode (str/bool/Profile, optional): Encode the output stream. """ super().__init__() self.out = self.Out(self) @@ -78,10 +71,7 @@ def __init__(self, self._device = device self._replay: Optional[Replay] = replay - self._resolution: Optional[Union[str, dai.MonoCameraProperties.SensorResolution]] = resolution - self._fps: Optional[float] = fps self._args: Dict = args - self.name = name self.left = left self.right = right @@ -120,25 +110,6 @@ def __init__(self, if len(device.getCameraSensorNames()) == 1: raise Exception('OAK-1 camera does not have Stereo camera pair!') - # If not specified, default to 400P resolution for faster processing - self._resolution = self._resolution or dai.MonoCameraProperties.SensorResolution.THE_400_P - - # Always use 1200p for OAK-D-LR and OAK-D-SR - if self._device.getDeviceName() == 'OAK-D-LR': - self._resolution = dai.MonoCameraProperties.SensorResolution.THE_1200_P - - if not self.left: # Should never happen - self.left = CameraComponent(device, pipeline, 'left', self._resolution, self._fps, replay=self._replay) - if not self.right: - self.right = CameraComponent(device, pipeline, 'right', self._resolution, self._fps, - replay=self._replay) - - # AR0234 outputs 1200p, so we need to resize it to 800p on RVC2 - if self._device.getDeviceName() == 'OAK-D-LR': - if isinstance(self.left, CameraComponent) and isinstance(self.right, CameraComponent): - self.left.config_color_camera(isp_scale=(2, 3)) - self.right.config_color_camera(isp_scale=(2, 3)) - if self._get_ir_drivers(): laser = self._args.get('irDotBrightness', None) laser = laser if laser is not None else 800 @@ -158,6 +129,38 @@ def __init__(self, self._left_stream = self._get_output_stream(self.left) self._right_stream = self._get_output_stream(self.right) + # Check whether input stereo pairs are larger than 1280 pixels in width (limitation of the RVC2/RVC3). + # If that's the case, create ImageManip to downscale the streams. + downscale_manips = [] + if isinstance(self.left, CameraComponent): + # Check whether input size width is larger than 1280 + w, h = self.left.stream_size + if w > 1280: + manip = pipeline.create(dai.node.ImageManip) + new_h = int(h * (1280 / w)) + manip.setResize(1280, new_h) + logging.info(f'Input frame size to stereo component was {w}x{h}, added downscalling to 1280x{new_h}') + manip.setMaxOutputFrameSize(1280 * new_h) + # Stereo works on GRAY8 frames + manip.setFrameType(dai.ImgFrame.Type.GRAY8) + self._left_stream.link(manip.inputImage) + self._left_stream = manip.out + downscale_manips.append(manip) + if isinstance(self.right, CameraComponent): + # Check whether input size width is larger than 1280 + w, h = self.right.stream_size + if w > 1280: + manip = pipeline.create(dai.node.ImageManip) + new_h = int(h * (1280 / w)) + manip.setResize(1280, new_h) + logging.info(f'Input frame size to stereo component was {w}x{h}, added downscalling to 1280x{new_h}') + manip.setMaxOutputFrameSize(1280 * new_h) + # Stereo works on GRAY8 frames + manip.setFrameType(dai.ImgFrame.Type.GRAY8) + self._right_stream.link(manip.inputImage) + self._right_stream = manip.out + downscale_manips.append(manip) + if self._replay: # Replay self._replay.initStereoDepth(self.node, left_name=self.left._source, right_name=self.right._source) else: @@ -176,14 +179,14 @@ def __init__(self, self.node.setRectifyEdgeFillColor(0) if self._undistortion_offset is not None: - calibData = self._replay._calibData if self._replay else device.readCalibration() + calib_data = self._replay._calibData if self._replay else device.readCalibration() w_frame, h_frame = self._get_stream_size(self.left) - mapX_left, mapY_left, mapX_right, mapY_right = self._get_maps(w_frame, h_frame, calibData) + mapX_left, mapY_left, mapX_right, mapY_right = self._get_maps(w_frame, h_frame, calib_data) mesh_l = _get_mesh(mapX_left, mapY_left) mesh_r = _get_mesh(mapX_right, mapY_right) - meshLeft = list(mesh_l.tobytes()) - meshRight = list(mesh_r.tobytes()) - self.node.loadMeshData(meshLeft, meshRight) + mesh_left = list(mesh_l.tobytes()) + mesh_right = list(mesh_r.tobytes()) + self.node.loadMeshData(mesh_left, mesh_right) if self._args: self._config_stereo_args(self._args) @@ -192,8 +195,8 @@ def __init__(self, self._control_xlink_in = pipeline.create(dai.node.XLinkIn) self._control_xlink_in.setStreamName(f"{self.node.id}_inputControl") self._control_xlink_in.out.link(self.node.inputConfig) - self._control_xlink_in.setMaxDataSize( - 1) # CameraControl message doesn't use any additional data (only metadata) + # CameraControl message doesn't use any additional data (only metadata) + self._control_xlink_in.setMaxDataSize(1) def on_pipeline_started(self, device: dai.Device): if self._control_xlink_in is not None: @@ -261,7 +264,7 @@ def config_stereo(self, if confidence is not None: self.node.initialConfig.setConfidenceThreshold(confidence) if align is not None: self._align_component = align - self.node.setDepthAlign(align.node.getBoardSocket()) + self.node.setDepthAlign(align._socket) if median is not None: self.node.setMedianFilter(parse_median_filter(median)) if extended is not None: self.node.initialConfig.setExtendedDisparity(extended) if subpixel is not None: self.node.initialConfig.setSubpixel(subpixel) @@ -389,12 +392,12 @@ def _get_disparity_factor(self, device: dai.Device) -> float: calib = device.readCalibration() baseline = calib.getBaselineDistance(useSpecTranslation=True) * 10 # mm intrinsics = calib.getCameraIntrinsics(dai.CameraBoardSocket.RIGHT, self.right.getResolutionSize()) - focalLength = intrinsics[0][0] + focal_length = intrinsics[0][0] disp_levels = self.node.getMaxDisparity() / 95 - return baseline * focalLength * disp_levels + return baseline * focal_length * disp_levels def _get_maps(self, width: int, height: int, calib: dai.CalibrationHandler): - imageSize = (width, height) + image_size = (width, height) M1 = np.array(calib.getCameraIntrinsics(calib.getStereoLeftCameraId(), width, height)) M2 = np.array(calib.getCameraIntrinsics(calib.getStereoRightCameraId(), width, height)) d1 = np.array(calib.getDistortionCoefficients(calib.getStereoLeftCameraId())) @@ -413,98 +416,78 @@ def _get_maps(self, width: int, height: int, calib: dai.CalibrationHandler): M2[0][0] += self._undistortion_offset M2[1][1] += self._undistortion_offset - mapX_l, mapY_l = cv2.initUndistortRectifyMap(M1, d1, R1, M2, imageSize, cv2.CV_32FC1) - mapX_r, mapY_r = cv2.initUndistortRectifyMap(M2, d2, R2, M2, imageSize, cv2.CV_32FC1) + mapX_l, mapY_l = cv2.initUndistortRectifyMap(M1, d1, R1, M2, image_size, cv2.CV_32FC1) + mapX_r, mapY_r = cv2.initUndistortRectifyMap(M2, d2, R2, M2, image_size, cv2.CV_32FC1) return mapX_l, mapY_l, mapX_r, mapY_r + def get_fourcc(self) -> Optional[str]: + if self.encoder is None: + return None + return encoder_profile_to_fourcc(self._encoderProfile) + """ Available outputs (to the host) of this component """ + def _mono_frames(self): + """ + Create mono frames output if WLS filter is enabled or colorize is set to RGBD + """ + mono_frames = None + if self.wls_config['enabled'] or self._colorize == StereoColor.RGBD: + mono_frames = StreamXout(self._right_stream) + return mono_frames + class Out: + class DepthOut(ComponentOutput): + def __call__(self, device: dai.Device) -> XoutBase: + return XoutDisparityDepth( + device=device, + frames=StreamXout(self._comp.depth), + dispScaleFactor=depth_to_disp_factor(device, self._comp.node), + mono_frames=self._comp._mono_frames(), + colorize=self._comp._colorize, + colormap=self._comp._postprocess_colormap, + ir_settings=self._comp.ir_settings + ).set_comp_out(self) + + class DisparityOut(ComponentOutput): + def __call__(self, device: dai.Device, fourcc: Optional[str] = None) -> XoutBase: + return XoutDisparity( + device=device, + frames=StreamXout(self._comp.encoder.bitstream) if fourcc else + StreamXout(self._comp.disparity), + disp_factor=255.0 / self._comp.node.getMaxDisparity(), + mono_frames=self._comp._mono_frames(), + colorize=self._comp._colorize, + colormap=self._comp._postprocess_colormap, + wls_config=self._comp.wls_config, + ir_settings=self._comp.ir_settings, + ).set_comp_out(self) + + class RectifiedLeftOut(ComponentOutput): + def __call__(self, device: dai.Device) -> XoutBase: + return XoutFrames(StreamXout(self._comp.node.rectifiedLeft, 'Rectified left')).set_comp_out(self) + + class RectifiedRightOut(ComponentOutput): + def __call__(self, device: dai.Device) -> XoutBase: + return XoutFrames(StreamXout(self._comp.node.rectifiedRight, 'Rectified right')).set_comp_out(self) + + class EncodedOut(DisparityOut): + def __call__(self, device: dai.Device) -> XoutBase: + if not self._comp.encoder: + raise RuntimeError('Encoder not enabled, cannot output encoded frames') + if self._comp.wls_config['enabled']: + warnings.warn('WLS filter is enabled, but cannot be applied to encoded frames.') + + return super().__call__(device, fourcc=self._comp.get_fourcc()) + def __init__(self, stereo_component: 'StereoComponent'): self._comp = stereo_component - def _mono_frames(self): - """ - Create mono frames output if WLS filter is enabled or colorize is set to RGBD - """ - mono_frames = None - if self._comp.wls_config['enabled'] or self._comp._colorize == StereoColor.RGBD: - mono_frames = StreamXout(self._comp.node.id, self._comp._right_stream, name=self._comp.name) - return mono_frames - - def main(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutBase: - # By default, we want to show disparity - return self.depth(pipeline, device) - - def disparity(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutBase: - fps = self._comp.left.get_fps() if self._comp._replay is None else self._comp._replay.get_fps() - - out = XoutDisparity( - device=device, - frames=StreamXout(self._comp.node.id, self._comp.disparity, name=self._comp.name), - disp_factor=255.0 / self._comp.node.getMaxDisparity(), - fps=fps, - mono_frames=self._mono_frames(), - colorize=self._comp._colorize, - colormap=self._comp._postprocess_colormap, - wls_config=self._comp.wls_config, - ir_settings=self._comp.ir_settings, - ) - - return self._comp._create_xout(pipeline, out) - - def rectified_left(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutBase: - fps = self._comp.left.get_fps() if self._comp._replay is None else self._comp._replay.get_fps() - out = XoutFrames( - frames=StreamXout(self._comp.node.id, self._comp.node.rectifiedLeft), - fps=fps) - out.name = 'Rectified left' - return self._comp._create_xout(pipeline, out) - - def rectified_right(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutBase: - fps = self._comp.left.get_fps() if self._comp._replay is None else self._comp._replay.get_fps() - out = XoutFrames( - frames=StreamXout(self._comp.node.id, self._comp.node.rectifiedRight), - fps=fps) - out.name = 'Rectified right' - return self._comp._create_xout(pipeline, out) - - def depth(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutBase: - fps = self._comp.left.get_fps() if self._comp._replay is None else self._comp._replay.get_fps() - - out = XoutDepth( - device=device, - frames=StreamXout(self._comp.node.id, self._comp.depth, name=self._comp.name), - dispScaleFactor=depth_to_disp_factor(device, self._comp.node), - fps=fps, - mono_frames=self._mono_frames(), - colorize=self._comp._colorize, - colormap=self._comp._postprocess_colormap, - wls_config=self._comp.wls_config, - ir_settings=self._comp.ir_settings - ) - return self._comp._create_xout(pipeline, out) - - def encoded(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutBase: - if not self._comp.encoder: - raise RuntimeError('Encoder not enabled, cannot output encoded frames') - - if self._comp.wls_config['enabled']: - warnings.warn('WLS filter is enabled, but cannot be applied to encoded frames.') - - if self._comp._encoderProfile == dai.VideoEncoderProperties.Profile.MJPEG: - out = XoutMjpeg(frames=StreamXout(self._comp.encoder.id, self._comp.encoder.bitstream), - color=self._comp.colormap is not None, - lossless=self._comp.encoder.getLossless(), - fps=self._comp.encoder.getFrameRate(), - frame_shape=(1200, 800)) - else: - out = XoutH26x(frames=StreamXout(self._comp.encoder.id, self._comp.encoder.bitstream), - color=self._comp.colormap is not None, - profile=self._comp._encoderProfile, - fps=self._comp.encoder.getFrameRate(), - frame_shape=(1200, 800)) - - return self._comp._create_xout(pipeline, out) + self.depth = self.DepthOut(stereo_component) + self.rectified_left = self.RectifiedLeftOut(stereo_component) + self.rectified_right = self.RectifiedRightOut(stereo_component) + self.disparity = self.DisparityOut(stereo_component) + self.encoded = self.EncodedOut(stereo_component) + self.main = self.depth diff --git a/depthai_sdk/src/depthai_sdk/components/stereo_control.py b/depthai_sdk/src/depthai_sdk/components/stereo_control.py index 372493d66..cf52e688b 100644 --- a/depthai_sdk/src/depthai_sdk/components/stereo_control.py +++ b/depthai_sdk/src/depthai_sdk/components/stereo_control.py @@ -14,9 +14,11 @@ 'illumination_led': (0, 1500), } + def clamp(value, min_value, max_value): return max(min(value, max_value), min_value) + class StereoControl: def __init__(self, device: dai.Device): self.queue = None diff --git a/depthai_sdk/src/depthai_sdk/components/undistort.py b/depthai_sdk/src/depthai_sdk/components/undistort.py index 56a2c2618..53d302d90 100644 --- a/depthai_sdk/src/depthai_sdk/components/undistort.py +++ b/depthai_sdk/src/depthai_sdk/components/undistort.py @@ -1,33 +1,33 @@ import numpy as np + def _get_mesh(mapX: np.ndarray, mapY: np.ndarray): - meshCellSize = 16 + mesh_cell_size = 16 mesh0 = [] - # print(mapX.shape) # Creates subsampled mesh which will be loaded on to device to undistort the image for y in range(mapX.shape[0] + 1): # iterating over height of the image - if y % meshCellSize == 0: - rowLeft = [] + if y % mesh_cell_size == 0: + row_left = [] for x in range(mapX.shape[1] + 1): # iterating over width of the image - if x % meshCellSize == 0: + if x % mesh_cell_size == 0: if y == mapX.shape[0] and x == mapX.shape[1]: - rowLeft.append(mapY[y - 1, x - 1]) - rowLeft.append(mapX[y - 1, x - 1]) + row_left.append(mapY[y - 1, x - 1]) + row_left.append(mapX[y - 1, x - 1]) elif y == mapX.shape[0]: - rowLeft.append(mapY[y - 1, x]) - rowLeft.append(mapX[y - 1, x]) + row_left.append(mapY[y - 1, x]) + row_left.append(mapX[y - 1, x]) elif x == mapX.shape[1]: - rowLeft.append(mapY[y, x - 1]) - rowLeft.append(mapX[y, x - 1]) + row_left.append(mapY[y, x - 1]) + row_left.append(mapX[y, x - 1]) else: - rowLeft.append(mapY[y, x]) - rowLeft.append(mapX[y, x]) - if (mapX.shape[1] % meshCellSize) % 2 != 0: - rowLeft.append(0) - rowLeft.append(0) + row_left.append(mapY[y, x]) + row_left.append(mapX[y, x]) + if (mapX.shape[1] % mesh_cell_size) % 2 != 0: + row_left.append(0) + row_left.append(0) - mesh0.append(rowLeft) + mesh0.append(row_left) mesh0 = np.array(mesh0) # mesh = list(map(tuple, mesh0)) - return mesh0 \ No newline at end of file + return mesh0 diff --git a/depthai_sdk/src/depthai_sdk/integrations/roboflow.py b/depthai_sdk/src/depthai_sdk/integrations/roboflow.py index 1bc6fe60f..40a6e3919 100644 --- a/depthai_sdk/src/depthai_sdk/integrations/roboflow.py +++ b/depthai_sdk/src/depthai_sdk/integrations/roboflow.py @@ -1,7 +1,7 @@ import json import logging from pathlib import Path -from typing import Dict +from typing import Dict, Optional from zipfile import ZipFile import depthai as dai @@ -19,7 +19,7 @@ def __init__(self, config: Dict): self.config = config - def _file_with_ext(self, folder: Path, ext: str) -> Path: + def _file_with_ext(self, folder: Path, ext: str) -> Optional[Path]: files = list(folder.glob(f"*{ext}")) if 0 == len(files): return None @@ -33,9 +33,9 @@ def device_update(self, device: dai.Device) -> Path: name = self.config['model'].replace('/', '_') # '/' isn't valid folder name model_folder = ROBOFLOW_MODELS / name - jsonFile = self._file_with_ext(model_folder, '.json') - if jsonFile: - return jsonFile + json_file = self._file_with_ext(model_folder, '.json') + if json_file: + return json_file json_res = response.json() if "error" in json_res: @@ -52,18 +52,18 @@ def device_update(self, device: dai.Device) -> Path: logging.info(f"Downloading '{ret['name']}' model from Roboflow server") - zipFileReq = requests.get(ret['model']) - zipFileReq.raise_for_status() + zip_file_req = requests.get(ret['model']) + zip_file_req.raise_for_status() (ROBOFLOW_MODELS / name).mkdir(parents=True, exist_ok=True) - zipFilePath = str(ROBOFLOW_MODELS / 'roboflow.zip') + zip_file_path = str(ROBOFLOW_MODELS / 'roboflow.zip') # Download the .zip where our model is - with open(zipFilePath, 'wb') as f: - f.write(zipFileReq.content) + with open(zip_file_path, 'wb') as f: + f.write(zip_file_req.content) - logging.info(f"Downloaded the model to {zipFilePath}") + logging.info(f"Downloaded the model to {zip_file_path}") - with ZipFile(zipFilePath, 'r') as zObject: # Extract the zip + with ZipFile(zip_file_path, 'r') as zObject: # Extract the zip zObject.extractall(str(ROBOFLOW_MODELS / name)) # Rename bin/xml files @@ -72,21 +72,20 @@ def device_update(self, device: dai.Device) -> Path: # Rename bin/xml paths inside the json new_json_name = str(model_folder / (name + ".json")) - jsonConf = json.load(self._file_with_ext(model_folder, ".json").open()) - jsonConf['model']['xml'] = name + ".xml" - jsonConf['model']['bin'] = name + ".bin" + json_conf = json.load(self._file_with_ext(model_folder, ".json").open()) + json_conf['model']['xml'] = name + ".xml" + json_conf['model']['bin'] = name + ".bin" # For some reason, Roboflow server provides incorrect json file, so we have to edit it - if 'output_format' in jsonConf: - jsonConf['nn_config']['output_format'] = jsonConf['output_format'] - del jsonConf['output_format'] - if 'NN_family' in jsonConf: - jsonConf['nn_config']['NN_family'] = jsonConf['NN_family'] - del jsonConf['NN_family'] + if 'output_format' in json_conf: + json_conf['nn_config']['output_format'] = json_conf['output_format'] + del json_conf['output_format'] + if 'NN_family' in json_conf: + json_conf['nn_config']['NN_family'] = json_conf['NN_family'] + del json_conf['NN_family'] # Overwrite the json file, and rename it - self._file_with_ext(model_folder, ".json").rename(new_json_name).open("w").write(json.dumps(jsonConf)) - - Path(zipFilePath).unlink() # Delete .zip + self._file_with_ext(model_folder, ".json").rename(new_json_name).open("w").write(json.dumps(json_conf)) + Path(zip_file_path).unlink() # Delete .zip return Path(new_json_name) diff --git a/depthai_sdk/src/depthai_sdk/integrations/ros/imu_interpolation.py b/depthai_sdk/src/depthai_sdk/integrations/ros/imu_interpolation.py index f809dd9b9..6688b5f01 100644 --- a/depthai_sdk/src/depthai_sdk/integrations/ros/imu_interpolation.py +++ b/depthai_sdk/src/depthai_sdk/integrations/ros/imu_interpolation.py @@ -1,7 +1,8 @@ from enum import Enum from typing import List -import numpy as np + import depthai as dai +import numpy as np class ImuSyncMethod(Enum): diff --git a/depthai_sdk/src/depthai_sdk/integrations/ros/ros2_streaming.py b/depthai_sdk/src/depthai_sdk/integrations/ros/ros2_streaming.py index e05909495..f8d4a1a97 100644 --- a/depthai_sdk/src/depthai_sdk/integrations/ros/ros2_streaming.py +++ b/depthai_sdk/src/depthai_sdk/integrations/ros/ros2_streaming.py @@ -1,8 +1,10 @@ import logging +from queue import Queue from threading import Thread from typing import Dict, Any -from queue import Queue + import rclpy + from depthai_sdk.integrations.ros.ros_base import RosBase diff --git a/depthai_sdk/src/depthai_sdk/nn_models/deeplabv3_person/config.json b/depthai_sdk/src/depthai_sdk/nn_models/_deeplabv3_person/config.json similarity index 100% rename from depthai_sdk/src/depthai_sdk/nn_models/deeplabv3_person/config.json rename to depthai_sdk/src/depthai_sdk/nn_models/_deeplabv3_person/config.json diff --git a/depthai_sdk/src/depthai_sdk/nn_models/deeplabv3_person/handler.py b/depthai_sdk/src/depthai_sdk/nn_models/_deeplabv3_person/handler.py similarity index 100% rename from depthai_sdk/src/depthai_sdk/nn_models/deeplabv3_person/handler.py rename to depthai_sdk/src/depthai_sdk/nn_models/_deeplabv3_person/handler.py diff --git a/depthai_sdk/src/depthai_sdk/nn_models/openpose2/config.json b/depthai_sdk/src/depthai_sdk/nn_models/_openpose2/config.json similarity index 66% rename from depthai_sdk/src/depthai_sdk/nn_models/openpose2/config.json rename to depthai_sdk/src/depthai_sdk/nn_models/_openpose2/config.json index c5867bf80..e3c99219a 100644 --- a/depthai_sdk/src/depthai_sdk/nn_models/openpose2/config.json +++ b/depthai_sdk/src/depthai_sdk/nn_models/_openpose2/config.json @@ -1,8 +1,10 @@ { + "model":{ + "model_name": "openpose2" + }, "nn_config": { "output_format" : "raw" }, - "openvino_version": "2020_4", "handler": "handler.py", "version": 1 } diff --git a/depthai_sdk/src/depthai_sdk/nn_models/openpose2/handler.py b/depthai_sdk/src/depthai_sdk/nn_models/_openpose2/handler.py similarity index 100% rename from depthai_sdk/src/depthai_sdk/nn_models/openpose2/handler.py rename to depthai_sdk/src/depthai_sdk/nn_models/_openpose2/handler.py diff --git a/depthai_sdk/src/depthai_sdk/nn_models/openpose2/model.yml b/depthai_sdk/src/depthai_sdk/nn_models/_openpose2/model.yml similarity index 100% rename from depthai_sdk/src/depthai_sdk/nn_models/openpose2/model.yml rename to depthai_sdk/src/depthai_sdk/nn_models/_openpose2/model.yml diff --git a/depthai_sdk/src/depthai_sdk/nn_models/road-segmentation-adas-0001/config.json b/depthai_sdk/src/depthai_sdk/nn_models/_road-segmentation-adas-0001/config.json similarity index 100% rename from depthai_sdk/src/depthai_sdk/nn_models/road-segmentation-adas-0001/config.json rename to depthai_sdk/src/depthai_sdk/nn_models/_road-segmentation-adas-0001/config.json diff --git a/depthai_sdk/src/depthai_sdk/nn_models/road-segmentation-adas-0001/handler.py b/depthai_sdk/src/depthai_sdk/nn_models/_road-segmentation-adas-0001/handler.py similarity index 100% rename from depthai_sdk/src/depthai_sdk/nn_models/road-segmentation-adas-0001/handler.py rename to depthai_sdk/src/depthai_sdk/nn_models/_road-segmentation-adas-0001/handler.py diff --git a/depthai_sdk/src/depthai_sdk/nn_models/yolo-v3-tf/config.json b/depthai_sdk/src/depthai_sdk/nn_models/yolo-v3-tf/config.json index 969eb5bce..781101bdd 100644 --- a/depthai_sdk/src/depthai_sdk/nn_models/yolo-v3-tf/config.json +++ b/depthai_sdk/src/depthai_sdk/nn_models/yolo-v3-tf/config.json @@ -22,6 +22,7 @@ "confidence_threshold" : 0.5 } }, + "openvino_version": "2021.4", "mappings": { "labels": diff --git a/depthai_sdk/src/depthai_sdk/oak_camera.py b/depthai_sdk/src/depthai_sdk/oak_camera.py index 562f3aebc..d1e9fab51 100644 --- a/depthai_sdk/src/depthai_sdk/oak_camera.py +++ b/depthai_sdk/src/depthai_sdk/oak_camera.py @@ -1,4 +1,3 @@ -import copy import logging import time import warnings @@ -6,6 +5,7 @@ from typing import Dict, Any, Optional, List, Union, Callable from depthai_sdk import CV2_HAS_GUI_SUPPORT +from depthai_sdk.types import Resolution from depthai_sdk.visualize.visualizer import Visualizer try: @@ -17,19 +17,33 @@ from depthai_sdk.trigger_action.actions.abstract_action import Action from depthai_sdk.args_parser import ArgsParser -from depthai_sdk.classes.output_config import BaseConfig, RecordConfig, OutputConfig, SyncConfig, RosStreamConfig, TriggerActionConfig +from depthai_sdk.classes.packet_handlers import ( + BasePacketHandler, + QueuePacketHandler, + RosPacketHandler, + TriggerActionPacketHandler, + RecordPacketHandler, + CallbackPacketHandler, + VisualizePacketHandler +) +# RecordConfig, OutputConfig, SyncConfig, RosStreamConfig, TriggerActionConfig from depthai_sdk.components.camera_component import CameraComponent -from depthai_sdk.components.component import Component +from depthai_sdk.components.component import Component, ComponentOutput from depthai_sdk.components.imu_component import IMUComponent from depthai_sdk.components.nn_component import NNComponent -from depthai_sdk.components.parser import parse_usb_speed, parse_camera_socket +from depthai_sdk.components.parser import ( + parse_usb_speed, + parse_camera_socket, + get_first_color_cam, + parse_open_vino_version +) from depthai_sdk.components.stereo_component import StereoComponent from depthai_sdk.components.pointcloud_component import PointcloudComponent -from depthai_sdk.oak_device import OakDevice from depthai_sdk.record import RecordType, Record from depthai_sdk.replay import Replay from depthai_sdk.trigger_action.triggers.abstract_trigger import Trigger -from depthai_sdk.utils import configPipeline, report_crash_dump +from depthai_sdk.utils import report_crash_dump + class UsbWarning(UserWarning): @@ -49,7 +63,7 @@ class OakCamera: def __init__(self, device: Optional[str] = None, usb_speed: Union[None, str, dai.UsbSpeed] = None, # Auto by default - replay: Optional[str] = None, + replay: Union[None, str, Path] = None, rotation: int = 0, config: dai.Device.Config = None, args: Union[bool, Dict] = True @@ -70,20 +84,26 @@ def __init__(self, self.pipeline = dai.Pipeline() self._args: Optional[Dict[str, Any]] = None # User defined arguments - self._oak = OakDevice() + self._pipeine_graph = None if args: if isinstance(args, bool): self._args = ArgsParser.parseArgs() - # Set up the OakCamera - if self._args.get('recording', None): - replay = self._args.get('recording', None) - if self._args.get('deviceId', None): - device = self._args.get('deviceId', None) - if self._args.get('usbSpeed', None): - usb_speed = parse_usb_speed(self._args.get('usbSpeed', None)) else: # Already parsed self._args = args + # Set up the OakCamera + if self._args.get('recording', None): + replay = self._args.get('recording', None) + if self._args.get('deviceId', None): + device = self._args.get('deviceId', None) + if self._args.get('usbSpeed', None): + usb_speed = parse_usb_speed(self._args.get('usbSpeed', None)) + + self.config_pipeline( + xlink_chunk=self._args.get('xlinkChunkSize', None), + tuning_blob=self._args.get('cameraTuning', None), + openvino_version=self._args.get('openvinoVersion', None), + ) if config is None: config = dai.Device.Config() @@ -101,23 +121,22 @@ def __init__(self, self._polling = [] self._components: List[Component] = [] # List of components - self._out_templates: List[BaseConfig] = [] + self._packet_handlers: List[BasePacketHandler] = [] self._rotation = rotation - if replay is not None: self.replay = Replay(replay) self.replay.initPipeline(self.pipeline) logging.info(f'Available streams from recording: {self.replay.getStreams()}') - + self._calibration = self._init_calibration() + def camera(self, source: Union[str, dai.CameraBoardSocket], resolution: Optional[Union[ str, dai.ColorCameraProperties.SensorResolution, dai.MonoCameraProperties.SensorResolution - ]] = None, + ]] = None, fps: Optional[float] = None, encode: Union[None, str, bool, dai.VideoEncoderProperties.Profile] = None, - name: Optional[str] = None, ) -> CameraComponent: """ Creates Camera component. This abstracts ColorCamera/MonoCamera nodes and supports mocking the camera when @@ -129,36 +148,83 @@ def camera(self, resolution (str/SensorResolution): Sensor resolution of the camera. fps (float): Sensor FPS encode (bool/str/Profile): Whether we want to enable video encoding (accessible via cameraComponent.out_encoded). If True, it will use MJPEG - name (str): Name used to identify the X-out stream. This name will also be associated with the frame in the callback function. """ - socket = source + sensor_type = None if isinstance(source, str): - socket = parse_camera_socket(source.split(",")[0]) + if "," in source: # For sensors that support multiple + parts = source.lower().split(',') + source = parts[0] + if parts[1] in ["c", "color"]: + sensor_type = dai.CameraSensorType.COLOR + elif parts[1] in ["m", "mono"]: + sensor_type = dai.CameraSensorType.MONO + else: + raise Exception( + "Please specify sensor type with c/color or m/mono after the ','" + " - eg. `cam = oak.create_camera('cama,c')`" + ) + + if source == 'left': + source = self._calibration.getStereoLeftCameraId() + elif source == 'right': + source = self._calibration.getStereoRightCameraId() + elif source in ['color', 'rgb']: + source = get_first_color_cam(self.device) + else: + source = parse_camera_socket(source) + + if source in [None, dai.CameraBoardSocket.AUTO]: + return None # There's no camera on this socket + for comp in self._components: - if isinstance(comp, CameraComponent) and comp.node.getBoardSocket() == socket: + if isinstance(comp, CameraComponent) and comp._socket == source: return comp - comp = CameraComponent(self._oak.device, + comp = CameraComponent(self.device, self.pipeline, source=source, resolution=resolution, fps=fps, encode=encode, + sensor_type=sensor_type, rotation=self._rotation, replay=self.replay, - name=name, args=self._args) self._components.append(comp) return comp + def _init_device(self, + config: dai.Device.Config, + device_str: Optional[str] = None, + ) -> None: + + """ + Connect to the OAK camera + """ + self.device = None + if device_str is not None: + device_info = dai.DeviceInfo(device_str) + else: + (found, device_info) = dai.Device.getFirstAvailableDevice() + if not found: + raise Exception("No OAK device found to connect to!") + + self.device = dai.Device( + config=config, + deviceInfo=device_info, + ) + + # TODO test with usb3 (SUPER speed) + if config.board.usb.maxSpeed != dai.UsbSpeed.HIGH and self.device.getUsbSpeed() == dai.UsbSpeed.HIGH: + warnings.warn("Device connected in USB2 mode! This might cause some issues. " + "In such case, please try using a (different) USB3 cable, " + "or force USB2 mode 'with OakCamera(usb_speed='usb2') as oak:'", UsbWarning) + def create_camera(self, source: Union[str, dai.CameraBoardSocket], - resolution: Optional[Union[ - str, dai.ColorCameraProperties.SensorResolution, dai.MonoCameraProperties.SensorResolution - ]] = None, + resolution: Optional[Resolution] = None, fps: Optional[float] = None, encode: Union[None, str, bool, dai.VideoEncoderProperties.Profile] = None, - name: Optional[str] = None, ) -> CameraComponent: """ Deprecated, use camera() instead. @@ -172,14 +238,13 @@ def create_camera(self, resolution (str/SensorResolution): Sensor resolution of the camera. fps (float): Sensor FPS encode (bool/str/Profile): Whether we want to enable video encoding (accessible via cameraComponent.out_encoded). If True, it will use MJPEG - name (str): Name used to identify the X-out stream. This name will also be associated with the frame in the callback function. """ - return self.camera(source, resolution, fps, encode, name) + return self.camera(source, resolution, fps, encode) def all_cameras(self, resolution: Optional[Union[ str, dai.ColorCameraProperties.SensorResolution, dai.MonoCameraProperties.SensorResolution - ]] = None, + ]] = None, fps: Optional[float] = None, encode: Union[None, str, bool, dai.VideoEncoderProperties.Profile] = None, ) -> List[CameraComponent]: @@ -193,16 +258,19 @@ def all_cameras(self, """ components: List[CameraComponent] = [] # Loop over all available camera sensors - for cam_sensor in self._oak.device.getConnectedCameraFeatures(): - comp = CameraComponent(self._oak.device, + if self.replay: + sources = self.replay.getStreams() # TODO handle in case the stream is not from a camera + else: + sources = [cam_sensor.socket for cam_sensor in self.device.getConnectedCameraFeatures()] + for source in sources: + comp = CameraComponent(self.device, self.pipeline, - source=cam_sensor.socket, + source=source, resolution=resolution, fps=fps, encode=encode, rotation=self._rotation, replay=self.replay, - name=None, args=self._args) components.append(comp) @@ -211,9 +279,9 @@ def all_cameras(self, def create_all_cameras(self, resolution: Optional[Union[ - str, dai.ColorCameraProperties.SensorResolution, + str, dai.ColorCameraProperties.SensorResolution, dai.MonoCameraProperties.SensorResolution - ]] = None, + ]] = None, fps: Optional[float] = None, encode: Union[None, str, bool, dai.VideoEncoderProperties.Profile] = None, ) -> List[CameraComponent]: @@ -236,7 +304,6 @@ def create_nn(self, tracker: bool = False, # Enable object tracker - only for Object detection models spatial: Union[None, bool, StereoComponent] = None, decode_fn: Optional[Callable] = None, - name: Optional[str] = None ) -> NNComponent: """ Creates Neural Network component. @@ -248,9 +315,11 @@ def create_nn(self, tracker: Enable object tracker, if model is object detector (yolo/mobilenet) spatial: Calculate 3D spatial coordinates, if model is object detector (yolo/mobilenet) and depth stream is available decode_fn: Custom decoding function for the model's output - name (str): Name used to identify the X-out stream. This name will also be associated with the frame in the callback function. """ - comp = NNComponent(self._oak.device, + if spatial and type(spatial) == bool: + spatial = self.stereo() + + comp = NNComponent(self.device, self.pipeline, model=model, input=input, @@ -259,8 +328,7 @@ def create_nn(self, spatial=spatial, decode_fn=decode_fn, replay=self.replay, - args=self._args, - name=name) + args=self._args) self._components.append(comp) return comp @@ -269,7 +337,6 @@ def stereo(self, fps: Optional[float] = None, left: Union[None, dai.Node.Output, CameraComponent] = None, # Left mono camera right: Union[None, dai.Node.Output, CameraComponent] = None, # Right mono camera - name: Optional[str] = None, encode: Union[None, str, bool, dai.VideoEncoderProperties.Profile] = None ) -> StereoComponent: """ @@ -280,23 +347,22 @@ def stereo(self, fps (float): If monochrome cameras aren't already passed, create them and set specified FPS left (CameraComponent/dai.node.MonoCamera): Pass the camera object (component/node) that will be used for stereo camera. right (CameraComponent/dai.node.MonoCamera): Pass the camera object (component/node) that will be used for stereo camera. - name (str): Name used to identify the X-out stream. This name will also be associated with the frame in the callback function. encode (bool/str/Profile): Whether we want to enable video encoding (accessible via StereoComponent.out.encoded). If True, it will use h264 codec. """ if left is None: - left = self.camera(source=dai.CameraBoardSocket.LEFT, resolution=resolution, fps=fps) + left = self.camera(source="left", resolution=resolution, fps=fps) if right is None: - right = self.camera(source=dai.CameraBoardSocket.RIGHT, resolution=resolution, fps=fps) + right = self.camera(source="right", resolution=resolution, fps=fps) + + if right is None or left is None: + return None - comp = StereoComponent(self._oak.device, + comp = StereoComponent(self.device, self.pipeline, - resolution=resolution, - fps=fps, left=left, right=right, replay=self.replay, args=self._args, - name=name, encode=encode) self._components.append(comp) return comp @@ -319,23 +385,21 @@ def create_stereo(self, fps (float): If monochrome cameras aren't already passed, create them and set specified FPS left (CameraComponent/dai.node.MonoCamera): Pass the camera object (component/node) that will be used for stereo camera. right (CameraComponent/dai.node.MonoCamera): Pass the camera object (component/node) that will be used for stereo camera. - name (str): Name used to identify the X-out stream. This name will also be associated with the frame in the callback function. encode (bool/str/Profile): Whether we want to enable video encoding (accessible via StereoComponent.out.encoded). If True, it will use h264 codec. """ - return self.stereo(resolution, fps, left, right, name, encode) + return self.stereo(resolution, fps, left, right, encode) def create_imu(self) -> IMUComponent: """ Create IMU component """ - comp = IMUComponent(self._oak.device, self.pipeline) + comp = IMUComponent(self.device, self.pipeline) self._components.append(comp) return comp def create_pointcloud(self, stereo: Union[None, StereoComponent, dai.node.StereoDepth, dai.Node.Output] = None, colorize: Union[None, CameraComponent, dai.node.MonoCamera, dai.node.ColorCamera, dai.Node.Output, bool] = None, - name: Optional[str] = None, ) -> PointcloudComponent: if colorize is None: @@ -349,43 +413,16 @@ def create_pointcloud(self, colorize = component comp = PointcloudComponent( - self._oak.device, + self.device, self.pipeline, stereo=stereo, colorize=colorize, replay=self.replay, args=self._args, - name=name ) self._components.append(comp) return comp - def _init_device(self, - config: dai.Device.Config, - device_str: Optional[str] = None, - ) -> None: - - """ - Connect to the OAK camera - """ - if device_str is not None: - device_info = dai.DeviceInfo(device_str) - else: - (found, device_info) = dai.Device.getFirstAvailableDevice() - if not found: - raise Exception("No OAK device found to connect to!") - - self._oak.device = dai.Device( - config=config, - deviceInfo=device_info, - ) - - # TODO test with usb3 (SUPER speed) - if config.board.usb.maxSpeed != dai.UsbSpeed.HIGH and self._oak.device.getUsbSpeed() == dai.UsbSpeed.HIGH: - warnings.warn("Device connected in USB2 mode! This might cause some issues. " - "In such case, please try using a (different) USB3 cable, " - "or force USB2 mode 'with OakCamera(usbSpeed='usb2') as oak:'", UsbWarning) - def config_pipeline(self, xlink_chunk: Optional[int] = None, calib: Optional[dai.CalibrationHandler] = None, @@ -399,23 +436,38 @@ def config_pipeline(self, @param tuning_blob: Camera tuning blob @param openvino_version: Force specific OpenVINO version """ - configPipeline(self.pipeline, xlink_chunk, calib, tuning_blob, openvino_version) + if xlink_chunk is not None: + self.pipeline.setXLinkChunkSize(xlink_chunk) + if calib is not None: + self.pipeline.setCalibrationData(calib) + if tuning_blob is not None: + self.pipeline.setCameraTuningBlobPath(tuning_blob) + ov_version = parse_open_vino_version(openvino_version) + if ov_version is not None: + self.pipeline.setOpenVINOVersion(ov_version) def __enter__(self): return self def __exit__(self, exc_type, exc_value, tb): + self.close() + + def close(self): logging.info("Closing OAK camera") if self.replay: - logging.info("Closing replay") self.replay.close() - if self._oak.device is not None: - self._oak.device.close() - for out in self._out_templates: - if isinstance(out, RecordConfig): - out.rec.close() - self._oak.close() + for handler in self._packet_handlers: + handler.close() + + self.device.close() + + def _new_oak_msg(self, q_name: str, msg): + if self._stop: + return + if q_name in self._new_msg_callbacks: + for callback in self._new_msg_callbacks[q_name]: + callback(q_name, msg) def start(self, blocking=False): """ @@ -423,48 +475,51 @@ def start(self, blocking=False): Args: blocking (bool): Continuously loop and call oak.poll() until program exits """ - self.build() - - # Remove unused nodes. There's a better way though. - # self._pipeline. - # schema = self._pipeline.serializeToJson()['pipeline'] - # used_nodes = [] - # for conn in schema['connections']: - # print() - # used_nodes.append(conn["node1Id"]) - # used_nodes.append(conn["node2Id"]) - # - # for node in self._pipeline.getAllNodes(): - # if node.id not in used_nodes: - # print(f"Removed node {node} (id: {node.id}) from the pipeline as it hasn't been used!") - # self._pipeline.remove(node) - - self._oak.device.startPipeline(self.pipeline) - - self._oak.init_callbacks(self.pipeline) + self._new_msg_callbacks = {} + for node in self.pipeline.getAllNodes(): + if isinstance(node, dai.node.XLinkOut): + self._new_msg_callbacks[node.getStreamName()] = [] + + for handler in self._packet_handlers: + # Setup PacketHandlers. This will: + # - Initialize all submodules (eg. Recording, Trigger/Actions, Visualizer) + # - Create XLinkIn nodes for all components/streams + handler.setup(self.pipeline, self.device, self._new_msg_callbacks) + + # Upload the pipeline to the device and start it + self.device.startPipeline(self.pipeline) + + for xlink_name in self._new_msg_callbacks: + try: + self.device.getOutputQueue(xlink_name, maxSize=4, blocking=False).addCallback(self._new_oak_msg) + # TODO: make this nicer, have self._new_msg_callbacks know whether it's replay or not + except Exception as e: + if self.replay: + self.replay._add_callback(xlink_name, self._new_oak_msg) + else: + raise e + + # Append callbacks to be called from main thread + # self._polling.append() + if self._pipeine_graph is not None: + self._pipeine_graph.create_graph(self.pipeline.serializeToJson()['pipeline'], self.device) + logging.info('Pipeline graph process started') # Call on_pipeline_started() for each component for comp in self._components: - comp.on_pipeline_started(self._oak.device) - - # Start FPS counters - for xout in self._oak.oak_out_streams: # Start FPS counters - xout.start_fps() + comp.on_pipeline_started(self.device) if self.replay: - self.replay.createQueues(self._oak.device) + self.replay.createQueues(self.device) + self.replay.start() # Called from Replay module on each new frame sent to the device. - self.replay.start(self._oak.new_msg) # Check if callbacks (sync/non-sync are set) if blocking: # Constant loop: get messages, call callbacks while self.running(): - time.sleep(0.001) self.poll() - cv2.destroyAllWindows() - def running(self) -> bool: """ Check if camera is running. @@ -479,18 +534,18 @@ def poll(self) -> Optional[int]: Returns: key pressed from cv2.waitKey, or None if """ + # if self._stop: + # return if CV2_HAS_GUI_SUPPORT: key = cv2.waitKey(1) if key == ord('q'): self._stop = True return key else: + time.sleep(0.001) key = -1 # TODO: check if components have controls enabled and check whether key == `control` - - self._oak.check_sync() - if self.replay: if key == ord(' '): self.replay.toggle_pause() @@ -508,183 +563,133 @@ def poll(self) -> Optional[int]: return key - def build(self) -> dai.Pipeline: - """ - Connect to the device and build the pipeline based on previously provided configuration. Configure XLink queues, - upload the pipeline to the device. This function must only be called once! build() is also called by start(). - Return: - Built dai.Pipeline - """ - if self._built: - return - self._built = True - - # First go through each component to check whether any is forcing an OpenVINO version - # TODO: check each component's SHAVE usage - for c in self._components: - ov = c.forced_openvino_version() - if ov: - if self.pipeline.getRequiredOpenVINOVersion() and self.pipeline.getRequiredOpenVINOVersion() != ov: - raise Exception( - 'Two components forced two different OpenVINO version!' - 'Please make sure that all your models are compiled using the same OpenVINO version.' - ) - self.pipeline.setOpenVINOVersion(ov) - - if self.pipeline.getRequiredOpenVINOVersion() is None: - # Force 2021.4 as it's better supported (blobconverter, compile tool) for now. - self.pipeline.setOpenVINOVersion(dai.OpenVINO.VERSION_2021_4) - - - # Create XLinkOuts based on visualizers/callbacks enabled - - # TODO: clean this up and potentially move elsewhere - names = [] - for out in self._out_templates: - xouts = out.setup(self.pipeline, self._oak.device, names) - self._oak.oak_out_streams.extend(xouts) - - # User-defined arguments - if self._args: - self.config_pipeline( - xlink_chunk=self._args.get('xlinkChunkSize', None), - tuning_blob=self._args.get('cameraTuning', None), - openvino_version=self._args.get('openvinoVersion', None), - ) - - return self.pipeline - - def _get_component_outputs(self, output: Union[List, Callable, Component]) -> List[Callable]: - if not isinstance(output, List): - output = [output] - - for i in range(len(output)): - if isinstance(output[i], Component): - # Select default (main) output of the component - output[i] = output[i].out.main - return output - - def sync(self, outputs: Union[Callable, List[Callable]], callback: Callable, visualize=False): - """ - Synchronize multiple components outputs forward them to the callback. - Args: - outputs: Component output(s) - callback: Where to send synced streams - visualize: Whether to draw on the frames (like with visualize()) - """ - if isinstance(outputs, Callable): - outputs = [outputs] # to list - - self._out_templates.append(SyncConfig(outputs, callback)) + def sync(self, outputs: Union[ComponentOutput, List[ComponentOutput]], callback: Callable, visualize=False): + raise DeprecationWarning('sync() is deprecated. Use callback() instead.') def record(self, - outputs: Union[Callable, List[Callable]], + outputs: Union[ComponentOutput, List[ComponentOutput]], path: str, - record_type: RecordType = RecordType.VIDEO): + record_type: RecordType = RecordType.VIDEO + ) -> RecordPacketHandler: """ Record component outputs. This handles syncing multiple streams (eg. left, right, color, depth) and saving them to the computer in desired format (raw, mp4, mcap, bag..). + Args: - outputs (Component/Component output): Component output(s) to be recorded - path: Folder path where to save these streams - record_type: Record type + outputs (Component/Component output): Component output(s) to be recorded. + path: Folder path where to save these streams. + record_type: Record type. """ - record = Record(Path(path).resolve(), record_type) - self._out_templates.append(RecordConfig(self._get_component_outputs(outputs), record)) - return record + handler = RecordPacketHandler(outputs, Record(Path(path).resolve(), record_type)) + self._packet_handlers.append(handler) + return handler def show_graph(self): """ - Shows DepthAI Pipeline graph, which can be useful when debugging. Builds the pipeline (oak.build()). + Shows DepthAI Pipeline graph, which can be useful when debugging. You must call this BEFORE calling the oak.start()! """ - self.build() - from depthai_pipeline_graph.pipeline_graph import \ - PipelineGraph - - p = PipelineGraph() - p.create_graph(self.pipeline.serializeToJson()['pipeline'], self.device) - self._polling.append(p.update) - logging.info('Process started') + from depthai_pipeline_graph.pipeline_graph import PipelineGraph + self._pipeine_graph = PipelineGraph() + self._polling.append(self._pipeine_graph.update) def visualize(self, - output: Union[List, Callable, Component], + output: Union[List, ComponentOutput, Component], record_path: Optional[str] = None, scale: float = None, fps=False, - callback: Callable = None): + callback: Callable = None, + visualizer: str = 'opencv' + ) -> Visualizer: """ Visualize component output(s). This handles output streaming (OAK->host), message syncing, and visualizing. + Args: - output (Component/Component output): Component output(s) to be visualized. If component is passed, SDK will visualize its default output (out()) - record_path: Path where to store the recording (visualization window name gets appended to that path), supported formats: mp4, avi - scale: Scale the output window by this factor - fps: Whether to show FPS on the output window - callback: Instead of showing the frame, pass the Packet to the callback function, where it can be displayed - """ - if record_path and isinstance(output, List): - if len(output) > 1: - raise ValueError('Recording visualizer is only supported for a single output.') - output = output[0] - - visualizer = Visualizer(scale, fps) - return self._callback(output, callback, visualizer, record_path) - - def _callback(self, - output: Union[List, Callable, Component], - callback: Callable, - visualizer: Visualizer = None, - record_path: Optional[str] = None): - if isinstance(output, List): - for element in output: - self._callback(element, callback, visualizer, record_path) - return visualizer - - if isinstance(output, Component): - output = output.out.main - - visualizer_enabled = visualizer is not None - if visualizer_enabled: - config = visualizer.config - visualizer = copy.deepcopy(visualizer) or Visualizer() - visualizer.config = config if config else visualizer.config - - self._out_templates.append(OutputConfig(output, callback, visualizer, visualizer_enabled, record_path)) - return visualizer - - def callback(self, output: Union[List, Callable, Component], callback: Callable, enable_visualizer: bool = False): + output (Component/Component output): Component output(s) to be visualized. If component is passed, SDK will visualize its default output (out()). + record_path: Path where to store the recording (visualization window name gets appended to that path), supported formats: mp4, avi. + scale: Scale the output window by this factor. + fps: Whether to show FPS on the output window. + callback: Instead of showing the frame, pass the Packet to the callback function, where it can be displayed. + visualizer: Which visualizer to use. Options: 'opencv', 'depthai-viewer', 'robothub'. + """ + main_thread = False + visualizer = visualizer.lower() + if visualizer in ['opencv', 'cv2']: + from depthai_sdk.visualize.visualizers.opencv_visualizer import OpenCvVisualizer + vis = OpenCvVisualizer(scale, fps) + main_thread = True # OpenCV's imshow() requires to be called from the main thread + elif visualizer in ['depthai-viewer', 'depthai_viewer', 'viewer', 'depthai']: + from depthai_sdk.visualize.visualizers.viewer_visualizer import DepthaiViewerVisualizer + vis = DepthaiViewerVisualizer(scale, fps) + elif visualizer in ['robothub', 'rh']: + raise NotImplementedError('Robothub visualizer is not implemented yet') + else: + raise ValueError(f"Unknown visualizer: {visualizer}. Options: 'opencv'") + + handler = VisualizePacketHandler(output, + vis, + callback=callback, record_path=record_path, + main_thread=main_thread) + self._packet_handlers.append(handler) + + if main_thread: + self._polling.append(handler._poll) + + return vis + + def queue(self, output: Union[ComponentOutput, Component, List], max_size: int = 30) -> QueuePacketHandler: """ - Create a callback for the component output(s). This handles output streaming (OAK->Host) and message syncing. + Create a queue for the component output(s). This handles output streaming (OAK->Host) and message syncing. + Args: output: Component output(s) to be visualized. If component is passed, SDK will visualize its default output. - callback: Handler function to which the Packet will be sent. - enable_visualizer: Whether to enable visualizer for this output. + max_size: Maximum queue size for this output. """ - self._callback(output, callback, Visualizer() if enable_visualizer else None) + handler = QueuePacketHandler(output, max_size) + self._packet_handlers.append(handler) + return handler - def ros_stream(self, output: Union[List, Callable, Component]): - self._out_templates.append(RosStreamConfig(self._get_component_outputs(output))) - - def trigger_action(self, trigger: Trigger, action: Union[Action, Callable]): - self._out_templates.append(TriggerActionConfig(trigger, action)) - - def set_max_queue_size(self, size: int): + def callback(self, + output: Union[List, Callable, Component], + callback: Callable, + main_thread=False + ) -> CallbackPacketHandler: """ - Set maximum queue size for all outputs. This is the maximum number of frames that can be stored in the queue. + Create a callback for the component output(s). This handles output streaming (OAK->Host) and message syncing. + Args: - size: Maximum queue size for all outputs. + output: Component output(s) to be visualized. If component is passed, SDK will visualize its default output. + callback: Handler function to which the Packet will be sent. + main_thread: Whether to run the callback in the main thread. If False, it will call the callback in a separate thread, so some functions (eg. cv2.imshow) won't work. """ - self._oak.set_max_queue_size(size) + handler = CallbackPacketHandler(output, callback=callback, main_thread=main_thread) + if main_thread: + self._polling.append(handler._poll) + self._packet_handlers.append(handler) + return handler - @property - def device(self) -> dai.Device: + def ros_stream(self, output: Union[List, ComponentOutput, Component]) -> RosPacketHandler: """ - Returns dai.Device object. oak.built() has to be called before querying this property! + Publish component output(s) to ROS streams. """ - return self._oak.device + handler = RosPacketHandler(output) + self._packet_handlers.append(handler) + return handler + + def trigger_action(self, trigger: Trigger, action: Union[Action, Callable]) -> None: + self._packet_handlers.append(TriggerActionPacketHandler(trigger, action)) @property def sensors(self) -> List[dai.CameraBoardSocket]: """ Returns list of all sensors added to the pipeline. """ - return self._oak.image_sensors + return self.device.getConnectedCameraFeatures() + + def _init_calibration(self) -> dai.CalibrationHandler: + if self.replay: + calibration = self.pipeline.getCalibrationData() + else: + calibration = self.device.readCalibration() + if calibration is None: + logging.warning("No calibration data found on the device or in replay") + return calibration diff --git a/depthai_sdk/src/depthai_sdk/oak_device.py b/depthai_sdk/src/depthai_sdk/oak_device.py deleted file mode 100644 index 235d345ea..000000000 --- a/depthai_sdk/src/depthai_sdk/oak_device.py +++ /dev/null @@ -1,51 +0,0 @@ -from typing import List, Optional - -import depthai as dai - -from depthai_sdk.oak_outputs.xout.xout_base import XoutBase - - -class OakDevice: - def __init__(self): - self.device: Optional[dai.Device] = None - # fpsHandlers: Dict[str, FPS] = dict() - self.oak_out_streams: List[XoutBase] = [] - self.max_queue_size = 4 - - @property - def image_sensors(self) -> List[dai.CameraBoardSocket]: - """ - Available imageSensors available on the camera - """ - return self.device.getConnectedCameras() - - @property - def info(self) -> dai.DeviceInfo: - return self.device.getDeviceInfo() - - def init_callbacks(self, pipeline: dai.Pipeline): - for node in pipeline.getAllNodes(): - if isinstance(node, dai.node.XLinkOut): - stream_name = node.getStreamName() - # self.fpsHandlers[name] = FPS() - self.device.getOutputQueue(stream_name, maxSize=self.max_queue_size, blocking=False).addCallback( - lambda name, msg: self.new_msg(name, msg) - ) - - def new_msg(self, name, msg): - for sync in self.oak_out_streams: - sync.new_msg(name, msg) - - def check_sync(self): - """ - Checks whether there are new synced messages, non-blocking. - """ - for sync in self.oak_out_streams: - sync.check_queue(block=False) # Don't block! - - def set_max_queue_size(self, size: int): - self.max_queue_size = size - - def close(self): - for stream in self.oak_out_streams: - stream.close() diff --git a/depthai_sdk/src/depthai_sdk/oak_outputs/syncing.py b/depthai_sdk/src/depthai_sdk/oak_outputs/syncing.py index addcac847..6318e5cda 100644 --- a/depthai_sdk/src/depthai_sdk/oak_outputs/syncing.py +++ b/depthai_sdk/src/depthai_sdk/oak_outputs/syncing.py @@ -1,13 +1,14 @@ import threading -from typing import Dict, List, Any, Optional +from datetime import timedelta +from typing import Dict, Any, Optional class SequenceNumSync: """ - msgs = {seqNum: {name: message}} + self.msgs = {seqNum: {name: message}} Example: - msgs = { + self.msgs = { '1': { 'rgb': dai.Frame(), 'dets': dai.ImgDetections(), @@ -47,3 +48,44 @@ def sync(self, seq_num: int, name: str, msg) -> Optional[Dict]: return ret return None + + +class TimestampSync: + def __init__(self, stream_num: int, ms_threshold: int): + self.msgs: Dict[str, Any] = dict() + self.stream_num: int = stream_num + self.ms_threshold = ms_threshold + + def sync(self, timestamp, name: str, msg): + if name not in self.msgs: + self.msgs[name] = [] + + self.msgs[name].append((timestamp, msg)) + + synced = {} + for name, arr in self.msgs.items(): + # Go through all stored messages and calculate the time difference to the target msg. + # Then sort these self.msgs to find a msg that's closest to the target time, and check + # whether it's below 17ms which is considered in-sync. + diffs = [] + for i, (msg_ts, msg) in enumerate(arr): + diffs.append(abs(msg_ts - timestamp)) + if len(diffs) == 0: + break + diffs_sorted = diffs.copy() + diffs_sorted.sort() + dif = diffs_sorted[0] + + if dif < timedelta(milliseconds=self.ms_threshold): + synced[name] = diffs.index(dif) + + if len(synced) == self.stream_num: # We have all synced streams + # Remove older self.msgs + for name, i in synced.items(): + self.msgs[name] = self.msgs[name][i:] + ret = {} + for name, arr in self.msgs.items(): + ts, synced_msg = arr.pop(0) + ret[name] = synced_msg + return ret + return None diff --git a/depthai_sdk/src/depthai_sdk/oak_outputs/xout.py b/depthai_sdk/src/depthai_sdk/oak_outputs/xout.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/__init__.py b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/__init__.py index b896e7159..e69de29bb 100644 --- a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/__init__.py +++ b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/__init__.py @@ -1,15 +0,0 @@ -try: - import cv2 -except ImportError: - cv2 = None - - -class Clickable: - def __init__(self, decay_step: int = 30): - super().__init__() - self.buffer = None - self.decay_step = decay_step - - def on_click_callback(self, event, x, y, flags, param) -> None: - if event == cv2.EVENT_MOUSEMOVE: - self.buffer = ([0, param[0][y, x], [x, y]]) diff --git a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_base.py b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_base.py index 4ca3fa270..b4fabad61 100644 --- a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_base.py +++ b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_base.py @@ -1,21 +1,20 @@ -import traceback -import warnings from abc import ABC, abstractmethod -from queue import Empty, Queue -from typing import List, Callable, Optional +from typing import List, Optional, Callable import depthai as dai -from depthai_sdk.oak_outputs.fps import FPS +from depthai_sdk.classes.packets import FramePacket +from depthai_sdk.components.component import ComponentOutput class StreamXout: - def __init__(self, id: int, out: dai.Node.Output, name: Optional[str] = None): + def __init__(self, out: dai.Node.Output, name: Optional[str] = None): self.stream = out if name is not None: - self.name = f'{name}_{str(out.name)}' + self.name = name else: - self.name = f"{str(id)}_{out.name}" + node = out.getParent() + self.name = f"{str(node.id)}_{out.name}" class ReplayStream(StreamXout): @@ -26,90 +25,73 @@ def __init__(self, name: str): class XoutBase(ABC): def __init__(self) -> None: self._streams = [xout.name for xout in self.xstreams()] - self._visualizer = None - self._visualizer_enabled = False self._packet_name = None - self._fps = None - self.queue = None - self.callback = None + self._packet_name_postfix = None + + # It will get assigned later inside the BasePacketHandler class + self.new_packet_callback: Callable = lambda x: None def get_packet_name(self) -> str: if self._packet_name is None: self._packet_name = ";".join([xout.name for xout in self.xstreams()]) - return self._packet_name + return self._packet_name + (f'_{self._packet_name_postfix}' if self._packet_name_postfix else '') + + def set_packet_name_postfix(self, postfix: str) -> None: + """ + Set postfix to packet name. + """ + self._packet_name_postfix = postfix + + def set_comp_out(self, comp_out: ComponentOutput) -> 'XoutBase': + """ + Set ComponentOutput to Xout. + """ + if comp_out.name is None: + # If user hasn't specified component's output name, generate one + comp_out.name = self.get_packet_name() + else: + # Otherwise, set packet name to user-specified one + self._packet_name = comp_out.name + return self @abstractmethod def xstreams(self) -> List[StreamXout]: raise NotImplementedError() - def setup_base(self, callback: Callable): - # Gets called when initializing - self.queue = Queue(maxsize=10) - self.callback = callback - - def start_fps(self): - self._fps = FPS() + def device_msg_callback(self, name, dai_message) -> None: + """ + This is the (first) callback that gets called on a device message. Don't override it. + It will call `new_msg` and `on_callback` methods. If `new_msg` returns a packet, it will call + `new_packet` method. + """ + # self._fps_counter[name].next_iter() + packet = self.new_msg(name, dai_message) + if packet is not None: + # If not list, convert to list. + # Some Xouts create multiple packets from a single message (example: IMU) + if not isinstance(packet, list): + packet = [packet] + + for p in packet: + # In case we have encoded frames, we need to set the codec + if isinstance(p, FramePacket) and \ + hasattr(self, 'get_codec') and \ + self._fourcc is not None: + p.set_decode_codec(self.get_codec) + + self.on_callback(p) + self.new_packet_callback(p) @abstractmethod def new_msg(self, name: str, msg) -> None: raise NotImplementedError() - @abstractmethod - def visualize(self, packet) -> None: - raise NotImplementedError() - def on_callback(self, packet) -> None: """ Hook called when `callback` or `self.visualize` are used. """ pass - def on_record(self, packet) -> None: - """ - Hook called when `record_path` is used. - """ - pass - - def close(self) -> None: - """ - Hook that will be called when exiting the context manager. - """ - pass - - # This approach is used as some functions (eg. imshow()) need to be called from - # main thread, and calling them from callback thread wouldn't work. - def check_queue(self, block=False) -> None: - """ - Checks queue for any available messages. If available, call callback. Non-blocking by default. - """ - try: - packet = self.queue.get(block=block) - - if packet is not None: - self._fps.next_iter() - - self.on_callback(packet) - - if self._visualizer_enabled: - try: - self.visualize(packet) - except Exception as e: - warnings.warn(f'An error occurred while visualizing: {e}') - traceback.print_exc() - else: - # User defined callback - try: - self.callback(packet) - except Exception as e: - warnings.warn(f'An error occurred while calling callback: {e}') - traceback.print_exc() - - # Record after processing, so that user can modify the frame - self.on_record(packet) - - except Empty: # Queue empty - pass - def fourcc(self) -> str: if self.is_mjpeg(): return 'mjpeg' @@ -124,25 +106,22 @@ def fourcc(self) -> str: return None def is_h265(self) -> bool: - if type(self).__name__ == 'XoutH26x': - # XoutH26x class has profile attribute - return self.profile == dai.VideoEncoderProperties.Profile.H265_MAIN - return False + fourcc = getattr(self, '_fourcc', None) + return fourcc is not None and fourcc.lower() == 'hevc' def is_h264(self) -> bool: - if type(self).__name__ == 'XoutH26x': - # XoutH26x class has profile attribute - return self.profile != dai.VideoEncoderProperties.Profile.H265_MAIN - return False - - def is_h26x(self) -> bool: - return type(self).__name__ == 'XoutH26x' + fourcc = getattr(self, '_fourcc', None) + return fourcc is not None and fourcc.lower() == 'h264' def is_mjpeg(self) -> bool: - return type(self).__name__ == 'XoutMjpeg' + fourcc = getattr(self, '_fourcc', None) + return fourcc is not None and fourcc.lower() == 'mjpeg' + + def is_h26x(self) -> bool: + return self.is_h264() or self.is_h265() def is_raw(self) -> bool: - return type(self).__name__ == 'XoutFrames' + return type(self).__name__ == 'XoutFrames' and self._fourcc is None def is_depth(self) -> bool: return type(self).__name__ == 'XoutDepth' diff --git a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_depth.py b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_depth.py index cf774ac50..fe65f4b2a 100644 --- a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_depth.py +++ b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_depth.py @@ -1,50 +1,40 @@ -from typing import Optional +from typing import Optional, Dict import depthai as dai -import numpy as np -from depthai_sdk.classes.packets import DepthPacket +from depthai_sdk.classes.packets import DisparityDepthPacket from depthai_sdk.oak_outputs.xout.xout_base import StreamXout from depthai_sdk.oak_outputs.xout.xout_disparity import XoutDisparity from depthai_sdk.visualize.configs import StereoColor -try: - import cv2 -except ImportError: - cv2 = None - -class XoutDepth(XoutDisparity): +class XoutDisparityDepth(XoutDisparity): def __init__(self, device: dai.Device, frames: StreamXout, dispScaleFactor: float, - fps: float, mono_frames: Optional[StreamXout], colorize: StereoColor = None, colormap: int = None, - wls_config: dict = None, ir_settings: dict = None): self.name = 'Depth' super().__init__(device=device, frames=frames, disp_factor=255 / 95, - fps=fps, mono_frames=mono_frames, colorize=colorize, colormap=colormap, - wls_config=wls_config, ir_settings=ir_settings) self.disp_scale_factor = dispScaleFactor - def visualize(self, packet: DepthPacket): - # Convert depth to disparity for nicer visualization - packet.depth_map = packet.frame.copy() - with np.errstate(divide='ignore'): - disp = self.disp_scale_factor / packet.frame - - disp[disp == np.inf] = 0 - - packet.frame = np.round(disp).astype(np.uint8) - super().visualize(packet) + def package(self, msgs: Dict) -> DisparityDepthPacket: + mono_frame = msgs[self.mono_frames.name] if self.mono_frames else None + return DisparityDepthPacket( + self.get_packet_name(), + msgs[self.frames.name], + colorize=self.colorize, + colormap=self.colormap, + mono_frame=mono_frame, + disp_scale_factor=self.disp_scale_factor, + ) diff --git a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_disparity.py b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_disparity.py index e6e8239aa..80bf7f28f 100644 --- a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_disparity.py +++ b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_disparity.py @@ -2,15 +2,15 @@ import logging import warnings from collections import defaultdict -from typing import List, Optional +from typing import List, Optional, Dict import depthai as dai import numpy as np -from depthai_sdk.classes.packets import DepthPacket -from depthai_sdk.oak_outputs.xout import Clickable +from depthai_sdk.classes.packets import DisparityPacket from depthai_sdk.oak_outputs.xout.xout_base import StreamXout from depthai_sdk.oak_outputs.xout.xout_frames import XoutFrames +from depthai_sdk.oak_outputs.xout.xout_seq_sync import XoutSeqSync from depthai_sdk.visualize.configs import StereoColor try: @@ -19,26 +19,23 @@ cv2 = None -class XoutDisparity(XoutFrames, Clickable): +class XoutDisparity(XoutSeqSync, XoutFrames): def __init__(self, device: dai.Device, frames: StreamXout, disp_factor: float, - fps: float, mono_frames: Optional[StreamXout], colorize: StereoColor = None, colormap: int = None, wls_config: dict = None, ir_settings: dict = None): self.mono_frames = mono_frames - self.multiplier = disp_factor - self.fps = fps self.name = 'Disparity' + self.multiplier = disp_factor self.device = device self.colorize = colorize self.colormap = colormap - self.use_wls_filter = wls_config['enabled'] self.ir_settings = ir_settings self._dot_projector_brightness = 0 # [0, 1200] @@ -56,20 +53,20 @@ def __init__(self, self._X, self._y = [], [] # Prefer to use WLS level if set, otherwise use lambda and sigma - wls_level = wls_config['level'] - if wls_level and self.use_wls_filter: + self.use_wls_filter = wls_config['enabled'] if wls_config else False + if self.use_wls_filter: + wls_level = wls_config['level'] logging.debug( f'Using WLS level: {wls_level.name} (lambda: {wls_level.value[0]}, sigma: {wls_level.value[1]})' ) - self.wls_lambda = wls_level.value[0] - self.wls_sigma = wls_level.value[1] - else: - self.wls_lambda = wls_config['lambda'] - self.wls_sigma = wls_config['sigma'] + self.wls_lambda = wls_level.value[0] or wls_config['lambda'] + self.wls_sigma = wls_level.value[1] or wls_config['sigma'] - if self.use_wls_filter: try: self.wls_filter = cv2.ximgproc.createDisparityWLSFilterGeneric(False) + self.wls_filter.setLambda(self.wls_lambda) + self.wls_filter.setSigmaColor(self.wls_sigma) + except AttributeError: warnings.warn( 'OpenCV version does not support WLS filter. Disabling WLS filter. ' @@ -78,108 +75,48 @@ def __init__(self, ) self.use_wls_filter = False - self.msgs = dict() - - XoutFrames.__init__(self, frames=frames, fps=fps) - Clickable.__init__(self, decay_step=int(self.fps)) + XoutFrames.__init__(self, frames=frames) + XoutSeqSync.__init__(self, [frames, mono_frames]) def on_callback(self, packet) -> None: if self.ir_settings['auto_mode']: - self._auto_ir_search(packet.frame) - - def visualize(self, packet: DepthPacket): - frame = packet.frame - disparity_frame = (frame * self.multiplier).astype(np.uint8) - try: - mono_frame = packet.mono_frame.getCvFrame() - except AttributeError: - mono_frame = None - - stereo_config = self._visualizer.config.stereo - - if self.use_wls_filter or stereo_config.wls_filter: - self.wls_filter.setLambda(self.wls_lambda or stereo_config.wls_lambda) - self.wls_filter.setSigmaColor(self.wls_sigma or stereo_config.wls_sigma) - disparity_frame = self.wls_filter.filter(disparity_frame, mono_frame) - - colorize = self.colorize or stereo_config.colorize - if self.colormap is not None: - colormap = self.colormap - else: - colormap = stereo_config.colormap - colormap[0] = [0, 0, 0] # Invalidate pixels 0 to be black - - if mono_frame is not None and disparity_frame.ndim == 2 and mono_frame.ndim == 3: - disparity_frame = disparity_frame[..., np.newaxis] - - if colorize == StereoColor.GRAY: - packet.frame = disparity_frame - elif colorize == StereoColor.RGB: - packet.frame = cv2.applyColorMap(disparity_frame, colormap) - elif colorize == StereoColor.RGBD: - packet.frame = cv2.applyColorMap( - (disparity_frame * 1.0 + mono_frame * 0.5).astype(np.uint8), colormap - ) - - if self._visualizer.config.output.clickable: - cv2.namedWindow(self.name) - cv2.setMouseCallback(self.name, self.on_click_callback, param=[disparity_frame]) - - if self.buffer: - x, y = self.buffer[2] - text = f'{self.buffer[1]}' # Disparity value - if packet.depth_map is not None: - text = f"{packet.depth_map[y, x] / 1000 :.2f} m" - - self._visualizer.add_circle(coords=(x, y), radius=3, color=(255, 255, 255), thickness=-1) - self._visualizer.add_text(text=text, coords=(x, y - 10)) - - super().visualize(packet) + self._auto_ir_search(packet.msg.getFrame()) def xstreams(self) -> List[StreamXout]: if self.mono_frames is None: return [self.frames] return [self.frames, self.mono_frames] - def new_msg(self, name: str, msg: dai.Buffer) -> None: - if name not in self._streams: - return # From Replay modules. TODO: better handling? - - # TODO: what if msg doesn't have sequence num? - seq = str(msg.getSequenceNum()) + def package(self, msgs: Dict) -> DisparityPacket: + img_frame = msgs[self.frames.name] + mono_frame = msgs[self.mono_frames.name] if self.mono_frames else None + # TODO: refactor the mess below + packet = DisparityPacket( + self.get_packet_name(), + img_frame, + self.multiplier, + disparity_map=None, + colorize=self.colorize, + colormap=self.colormap, + mono_frame=mono_frame, + ) + packet._get_codec = self.get_codec + + if self._fourcc is None: + disparity_frame = img_frame.getFrame() + else: + disparity_frame = packet.decode() + if disparity_frame is None: + return None - if seq not in self.msgs: - self.msgs[seq] = dict() + if mono_frame and self.use_wls_filter: + # Perform WLS filtering + # If we have wls enabled, it means CV2 is installed + disparity_frame = self.wls_filter.filter(disparity_frame, mono_frame.getCvFrame()) - if name == self.frames.name: - self.msgs[seq][name] = msg - elif name == self.mono_frames.name: - self.msgs[seq][name] = msg - else: - raise ValueError('Message from unknown stream name received by TwoStageSeqSync!') - - if len(self.msgs[seq]) == len(self.xstreams()): - # Frames synced! - if self.queue.full(): - self.queue.get() # Get one, so queue isn't full - - mono_frame = None - if self.mono_frames is not None: - mono_frame = self.msgs[seq][self.mono_frames.name] - - packet = DepthPacket( - self.get_packet_name(), - img_frame=self.msgs[seq][self.frames.name], - mono_frame=mono_frame, - visualizer=self._visualizer - ) - self.queue.put(packet, block=False) + packet.disparity_map = disparity_frame - new_msgs = {} - for name, msg in self.msgs.items(): - if int(name) > int(seq): - new_msgs[name] = msg - self.msgs = new_msgs + return packet def _auto_ir_search(self, frame: np.ndarray): # Perform neighbourhood search if we got worse metric values @@ -202,7 +139,7 @@ def _ir_grid_search_iteration(self, frame: np.array, candidate_pairs: list = Non fill_rate = np.count_nonzero(frame) / frame.size self._metrics_buffer['fill_rate'].append(fill_rate) - if len(self._metrics_buffer['fill_rate']) < max(self.fps, 30): + if len(self._metrics_buffer['fill_rate']) < 30: return False if candidate_idx >= len(candidate_pairs): @@ -225,7 +162,7 @@ def _ir_grid_search_iteration(self, frame: np.array, candidate_pairs: list = Non return False # Skip first half second of frames to allow for auto exposure to settle down - fill_rate_avg = np.mean(self._metrics_buffer['fill_rate'][int(self.fps // 2):]) + fill_rate_avg = np.mean(self._metrics_buffer['fill_rate'][15:]) self._X.append([self._dot_projector_brightness, self._flood_brightness]) self._y.append(fill_rate_avg) @@ -237,7 +174,7 @@ def _check_consistency(self, frame): fill_rate = np.count_nonzero(frame) / frame.size self._metrics_buffer['fill_rate'].append(fill_rate) - if len(self._metrics_buffer['fill_rate']) < max(self.fps, 30): + if len(self._metrics_buffer['fill_rate']) < 30: return fill_rate_avg = np.mean(self._metrics_buffer['fill_rate']) diff --git a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_frames.py b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_frames.py index 4f8971e85..38f17f8b2 100644 --- a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_frames.py +++ b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_frames.py @@ -1,13 +1,7 @@ -from typing import Tuple, List - -import numpy as np +from typing import List, Optional from depthai_sdk.classes.packets import FramePacket from depthai_sdk.oak_outputs.xout.xout_base import XoutBase, StreamXout -from depthai_sdk.recorders.video_recorder import VideoRecorder -from depthai_sdk.recorders.video_writers import AvWriter -from depthai_sdk.visualize.configs import TextPosition -from depthai_sdk.visualize.visualizer import Platform, Visualizer try: import cv2 @@ -20,83 +14,44 @@ class XoutFrames(XoutBase): Stream of frames. Single message, no syncing required. """ - def __init__(self, frames: StreamXout, fps: float = 30, frame_shape: Tuple[int, ...] = None): + def __init__(self, + frames: StreamXout, + fourcc: Optional[str] = None, # 'mjpeg', 'h264', 'hevc' + ): """ Args: frames: StreamXout object. - fps: Frames per second for the output stream. - frame_shape: Shape of the frame. If not provided, it will be inferred from the first frame. + fourcc: Codec to use for encoding. If None, no encoding will be done. """ self.frames = frames self.name = frames.name - - self.fps = fps - self._video_recorder = None - self._is_recorder_enabled = None - self._frame_shape = frame_shape + self._fourcc = fourcc + self._codec = None super().__init__() - def setup_visualize(self, - visualizer: Visualizer, - visualizer_enabled: bool, - name: str = None - ) -> None: - self._visualizer = visualizer - self._visualizer_enabled = visualizer_enabled - self.name = name or self.name - - def setup_recorder(self, recorder: VideoRecorder) -> None: - self._video_recorder = recorder - - def visualize(self, packet: FramePacket) -> None: - # Frame shape may be 1D, that means it's an encoded frame - if self._visualizer.frame_shape is None or np.array(self._visualizer.frame_shape).ndim == 1: - if self._frame_shape is not None: - self._visualizer.frame_shape = self._frame_shape - else: - self._visualizer.frame_shape = packet.frame.shape - - if self._visualizer.config.output.show_fps: - self._visualizer.add_text( - text=f'FPS: {self._fps.fps():.1f}', - position=TextPosition.TOP_LEFT - ) - - if self.callback: # Don't display frame, call the callback - self.callback(packet) - else: - packet.frame = self._visualizer.draw(packet.frame) - # Draw on the frame - if self._visualizer.platform == Platform.PC: - cv2.imshow(self.name, packet.frame) - else: - pass - - def on_record(self, packet) -> None: - if self._video_recorder: - if isinstance(self._video_recorder[self.name], AvWriter): - self._video_recorder.write(self.name, packet.msg) - else: - self._video_recorder.write(self.name, packet.frame) + def set_fourcc(self, fourcc: str) -> 'XoutFrames': + self._fourcc = fourcc + return self def xstreams(self) -> List[StreamXout]: return [self.frames] - def new_msg(self, name: str, msg) -> None: + def new_msg(self, name: str, msg): if name not in self._streams: return - - if self.queue.full(): - self.queue.get() # Get one, so queue isn't full - - packet = FramePacket(self.name or name, - msg, - msg.getCvFrame() if cv2 else None, - self._visualizer) - - self.queue.put(packet, block=False) - - def close(self) -> None: - if self._video_recorder is not None: - self._video_recorder.close() + return FramePacket(self.get_packet_name(), msg) + + def get_codec(self): + # No codec, frames are NV12/YUV/BGR, so we can just use imgFrame.getCvFrame() + if self._fourcc is None: + return None + + if self._codec is None: + try: + import av + except ImportError: + raise ImportError('Attempted to decode an encoded frame, but av is not installed.' + ' Please install it with `pip install av`') + self._codec = av.CodecContext.create(self._fourcc, "r") + return self._codec diff --git a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_h26x.py b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_h26x.py deleted file mode 100644 index 5ace19d6e..000000000 --- a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_h26x.py +++ /dev/null @@ -1,57 +0,0 @@ -from typing import Tuple - -import depthai as dai - -from depthai_sdk.classes import FramePacket -from depthai_sdk.oak_outputs.xout.xout_base import StreamXout -from depthai_sdk.oak_outputs.xout.xout_frames import XoutFrames - -try: - import av -except ImportError: - av = None - - -class XoutH26x(XoutFrames): - def __init__(self, - frames: StreamXout, - color: bool, - profile: dai.VideoEncoderProperties.Profile, - fps: float, - frame_shape: Tuple[int, ...]): - super().__init__(frames) - self.name = 'H26x Stream' - self.color = color - self.profile = profile - self.fps = fps - self._frame_shape = frame_shape - fourcc = 'hevc' if profile == dai.VideoEncoderProperties.Profile.H265_MAIN else 'h264' - self.codec = av.CodecContext.create(fourcc, "r") if av else None - - def decode_frame(self, packet: FramePacket): - if not self.codec: - raise ImportError('av is not installed. Please install it with `pip install av`') - - enc_packets = self.codec.parse(packet.msg.getData()) - if len(enc_packets) == 0: - return None - - frames = self.codec.decode(enc_packets[-1]) - if not frames: - return None - - frame = frames[0].to_ndarray(format='bgr24') - - # If it's Mono, squeeze from 3 planes (height, width, 3) to single plane (height, width) - if not self.color: - frame = frame[:, :, 0] - - return frame - - def visualize(self, packet: FramePacket): - decoded_frame = self.decode_frame(packet) - if decoded_frame is None: - return - - packet.frame = decoded_frame - super().visualize(packet) diff --git a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_imu.py b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_imu.py index cf9d016f5..2f9d7b650 100644 --- a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_imu.py +++ b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_imu.py @@ -2,128 +2,37 @@ import depthai as dai import numpy as np +from ahrs.filters import Mahony from depthai_sdk.classes import IMUPacket from depthai_sdk.oak_outputs.xout.xout_base import XoutBase, StreamXout -from depthai_sdk.visualize.visualizer import Visualizer - -try: - import cv2 -except ImportError: - cv2 = None class XoutIMU(XoutBase): - def __init__(self, imu_xout: StreamXout): + def __init__(self, imu_xout: StreamXout, fps: int): self.imu_out = imu_xout - self.packets = [] - self.start_time = 0.0 - - self.fig = None - self.axes = None - self.acceleration_lines = [] - self.gyroscope_lines = [] - - self.acceleration_buffer = [] - self.gyroscope_buffer = [] + self._ahrs = Mahony(frequency=fps) + self._ahrs.Q = np.array([1, 0, 0, 0], dtype=np.float64) super().__init__() self.name = 'IMU' - def setup_visualize(self, - visualizer: Visualizer, - visualizer_enabled: bool, - name: str = None, _=None): - from matplotlib import pyplot as plt - - self._visualizer = visualizer - self._visualizer_enabled = visualizer_enabled - self.name = name or self.name - - self.fig, self.axes = plt.subplots(2, 1, figsize=(10, 10), constrained_layout=True) - labels = ['x', 'y', 'z'] - colors = ['r', 'g', 'b'] - - for i in range(3): - self.acceleration_lines.append(self.axes[0].plot([], [], label=f'Acceleration {labels[i]}', color=colors[i])[0]) - self.axes[0].set_ylabel('Acceleration (m/s^2)') - self.axes[0].set_xlabel('Time (s)') - self.axes[0].legend() - - for i in range(3): - self.gyroscope_lines.append(self.axes[1].plot([], [], label=f'Gyroscope {labels[i]}', color=colors[i])[0]) - self.axes[1].set_ylabel('Gyroscope (rad/s)') - self.axes[1].set_xlabel('Time (s)') - self.axes[1].legend() - - def visualize(self, packet: IMUPacket): - if self.start_time == 0.0: - self.start_time = packet.data[0].acceleroMeter.timestamp.get() - - acceleration_x = [el.acceleroMeter.x for el in packet.data] - acceleration_y = [el.acceleroMeter.y for el in packet.data] - acceleration_z = [el.acceleroMeter.z for el in packet.data] - - t_acceleration = [(el.acceleroMeter.timestamp.get() - self.start_time).total_seconds() for el in packet.data] - - # Keep only last 100 values - if len(self.acceleration_buffer) > 100: - self.acceleration_buffer.pop(0) - - self.acceleration_buffer.append([t_acceleration, acceleration_x, acceleration_y, acceleration_z]) - - gyroscope_x = [el.gyroscope.x for el in packet.data] - gyroscope_y = [el.gyroscope.y for el in packet.data] - gyroscope_z = [el.gyroscope.z for el in packet.data] - - t_gyroscope = [(el.gyroscope.timestamp.get() - self.start_time).total_seconds() for el in packet.data] - - # Keep only last 100 values - if len(self.gyroscope_buffer) > 100: - self.gyroscope_buffer.pop(0) - - self.gyroscope_buffer.append([t_gyroscope, gyroscope_x, gyroscope_y, gyroscope_z]) - - # Plot acceleration - for i in range(3): - self.acceleration_lines[i].set_xdata([el[0] for el in self.acceleration_buffer]) - self.acceleration_lines[i].set_ydata([el[i + 1] for el in self.acceleration_buffer]) - - self.axes[0].set_xlim(self.acceleration_buffer[0][0][0], t_acceleration[-1]) - self.axes[0].set_ylim(-20, 20) - - # Plot gyroscope - for i in range(3): - self.gyroscope_lines[i].set_xdata([el[0] for el in self.gyroscope_buffer]) - self.gyroscope_lines[i].set_ydata([el[i + 1] for el in self.gyroscope_buffer]) - - self.axes[1].set_xlim(self.gyroscope_buffer[0][0][0], t_acceleration[-1]) - self.axes[1].set_ylim(-20, 20) - - self.fig.canvas.draw() - - # Convert plot to numpy array - img = np.fromstring(self.fig.canvas.tostring_rgb(), dtype=np.uint8, sep='') - img = img.reshape(self.fig.canvas.get_width_height()[::-1] + (3,)) - img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) - - packet.frame = img - - if self.callback: # Don't display frame, call the callback - self.callback(packet) - else: - cv2.imshow(self.name, packet.frame) - def xstreams(self) -> List[StreamXout]: return [self.imu_out] - def new_msg(self, name: str, msg: dai.IMUData) -> None: + def new_msg(self, name: str, msg: dai.IMUData): if name not in self._streams: return - if self.queue.full(): - self.queue.get() # Get one, so queue isn't full - - packet = IMUPacket(msg.packets) - - self.queue.put(packet, block=False) + arr = [] + for packet in msg.packets: + gyro_vals = np.array([packet.gyroscope.z, packet.gyroscope.x, packet.gyroscope.y]) + accelero_vals = np.array([packet.acceleroMeter.z, packet.acceleroMeter.x, packet.acceleroMeter.y]) + self._ahrs.Q = self._ahrs.updateIMU(self._ahrs.Q, gyro_vals, accelero_vals) + rotation = dai.IMUReportRotationVectorWAcc() + rotation.i = self._ahrs.Q[0] + rotation.j = self._ahrs.Q[1] + rotation.k = self._ahrs.Q[2] + rotation.real = self._ahrs.Q[3] + arr.append(IMUPacket(self.get_packet_name(), packet, rotation=rotation)) + return arr diff --git a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_mjpeg.py b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_mjpeg.py deleted file mode 100644 index b2611c566..000000000 --- a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_mjpeg.py +++ /dev/null @@ -1,34 +0,0 @@ -from typing import Tuple - -import numpy as np - -from depthai_sdk.classes.packets import FramePacket -from depthai_sdk.oak_outputs.xout.xout_base import StreamXout -from depthai_sdk.oak_outputs.xout.xout_frames import XoutFrames - -try: - import cv2 -except ImportError: - cv2 = None - - -class XoutMjpeg(XoutFrames): - name: str = "MJPEG Stream" - - def __init__(self, frames: StreamXout, color: bool, lossless: bool, fps: float, frame_shape: Tuple[int, ...]): - super().__init__(frames) - # We could use cv2.IMREAD_UNCHANGED, but it produces 3 planes (RGB) for mono frame instead of a single plane - self.flag = cv2.IMREAD_COLOR if color else cv2.IMREAD_GRAYSCALE - self.lossless = lossless - self.fps = fps - self._frame_shape = frame_shape - - if lossless and self._visualizer: - raise ValueError('Visualizing Lossless MJPEG stream is not supported!') - - def decode_frame(self, packet: FramePacket) -> np.ndarray: - return cv2.imdecode(packet.msg.getData(), self.flag) - - def visualize(self, packet: FramePacket): - packet.frame = self.decode_frame(packet) - super().visualize(packet) diff --git a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_nn.py b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_nn.py index 64641493c..db04f30fe 100644 --- a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_nn.py +++ b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_nn.py @@ -2,47 +2,41 @@ from typing import List, Union, Dict, Any, Optional, Tuple import depthai as dai -import numpy as np from depthai_sdk.classes import Detections, ImgLandmarks, SemanticSegmentation from depthai_sdk.classes.enum import ResizeMode from depthai_sdk.classes.packets import ( - _Detection, DetectionPacket, TrackerPacket, SpatialBbMappingPacket, TwoStagePacket, NNDataPacket + Detection, + DetectionPacket, + ImgLandmarksPacket, + NnOutputPacket, + SemanticSegmentationPacket, + SpatialBbMappingPacket, + TwoStagePacket, + NNDataPacket ) -from depthai_sdk.classes.enum import ResizeMode +from depthai_sdk.oak_outputs.syncing import SequenceNumSync from depthai_sdk.oak_outputs.xout.xout_base import XoutBase, StreamXout +from depthai_sdk.oak_outputs.xout.xout_depth import XoutDisparityDepth from depthai_sdk.oak_outputs.xout.xout_frames import XoutFrames from depthai_sdk.oak_outputs.xout.xout_seq_sync import XoutSeqSync -from depthai_sdk.visualize.visualizer import Visualizer -from depthai_sdk.visualize.visualizer_helper import hex_to_bgr, colorize_disparity, draw_mappings, depth_to_disp_factor +from depthai_sdk.types import XoutNNOutputPacket from depthai_sdk.visualize.bbox import BoundingBox -from depthai_sdk.visualize.colors import generate_colors -try: - import cv2 -except ImportError: - cv2 = None +from depthai_sdk.visualize.colors import generate_colors, hex_to_bgr + class XoutNnData(XoutBase): def __init__(self, xout: StreamXout): self.nndata_out = xout super().__init__() - self.name = 'NNData' - - def visualize(self, packet: NNDataPacket): - print('Visualization of NNData is not supported') def xstreams(self) -> List[StreamXout]: return [self.nndata_out] - def new_msg(self, name: str, msg: dai.NNData) -> None: + def new_msg(self, name: str, msg: dai.NNData) -> NNDataPacket: if name not in self._streams: return - - if self.queue.full(): - self.queue.get() # Get one, so queue isn't full - - packet = NNDataPacket(name=self.name, nn_data=msg) - self.queue.put(packet, block=False) + return NNDataPacket(name=self.get_packet_name(), nn_data=msg) class XoutNnResults(XoutSeqSync, XoutFrames): @@ -52,7 +46,8 @@ def xstreams(self) -> List[StreamXout]: def __init__(self, det_nn: 'NNComponent', frames: StreamXout, - nn_results: StreamXout): + nn_results: StreamXout, + bbox: BoundingBox): self.det_nn = det_nn self.nn_results = nn_results @@ -61,6 +56,7 @@ def __init__(self, self.name = 'NN results' self.labels = None + self.bbox = bbox # TODO: add support for colors, generate new colors for each label that doesn't have colors if det_nn._labels: @@ -84,176 +80,85 @@ def __init__(self, self._resize_mode: ResizeMode = det_nn._ar_resize_mode self._nn_size: Tuple[int, int] = det_nn._size - self.segmentation_colormap = None - - def setup_visualize(self, - visualizer: Visualizer, - visualizer_enabled: bool, - name: str = None): - super().setup_visualize(visualizer, visualizer_enabled, name) - - def on_callback(self, packet: Union[DetectionPacket, TrackerPacket]): - # Convert Grayscale to BGR - if len(packet.frame.shape) == 2: - packet.frame = np.dstack((packet.frame, packet.frame, packet.frame)) - - frame_shape = self.det_nn._input.stream_size[::-1] - - if self._frame_shape is None: - # Lazy-load the frame shape - self._frame_shape = np.array([*frame_shape]) - if self._visualizer: - self._visualizer.frame_shape = self._frame_shape - - bbox = BoundingBox().resize_to_aspect_ratio(self._frame_shape, self._nn_size, self._resize_mode) - - # Add detections to packet - if isinstance(packet.img_detections, dai.ImgDetections) \ - or isinstance(packet.img_detections, dai.SpatialImgDetections) \ - or isinstance(packet.img_detections, Detections): - - for detection in packet.img_detections.detections: - d = _Detection() - d.img_detection = detection - d.label = self.labels[detection.label][0] if self.labels else str(detection.label) - d.color = self.labels[detection.label][1] if self.labels else (255, 255, 255) - - d.top_left, d.bottom_right = bbox.get_relative_bbox(BoundingBox(detection)).denormalize(self._frame_shape) - packet.detections.append(d) - - if self._visualizer: - # Add detections to visualizer - self._visualizer.add_detections( - packet.img_detections.detections, - bbox, - self.labels, - is_spatial=packet._is_spatial_detection() - ) - elif isinstance(packet.img_detections, ImgLandmarks): - if not self._visualizer: - return - - all_landmarks = packet.img_detections.landmarks - all_landmarks_indices = packet.img_detections.landmarks_indices - colors = packet.img_detections.colors - for landmarks, indices in zip(all_landmarks, all_landmarks_indices): - for i, landmark in enumerate(landmarks): - # Map normalized coordinates to frame coordinates - l = [(int(point[0] * self._frame_shape[1]), int(point[1] * self._frame_shape[0])) for point in landmark] - idx = indices[i] - - self._visualizer.add_line(pt1=tuple(l[0]), pt2=tuple(l[1]), color=colors[idx], thickness=4) - self._visualizer.add_circle(coords=tuple(l[0]), radius=8, color=colors[idx], thickness=-1) - self._visualizer.add_circle(coords=tuple(l[1]), radius=8, color=colors[idx], thickness=-1) - elif isinstance(packet.img_detections, SemanticSegmentation): - raise NotImplementedError('Semantic segmentation visualization is not implemented yet!') - if not self._visualizer: - return - - # Generate colormap if not already generated - if self.segmentation_colormap is None: - n_classes = len(self.labels) if self.labels else 8 - self.segmentation_colormap = generate_colors(n_classes) - - mask = np.array(packet.img_detections.mask).astype(np.uint8) - - if mask.ndim == 3: - mask = np.argmax(mask, axis=0) - - try: - colorized_mask = np.array(self.segmentation_colormap)[mask] - except IndexError: - unique_classes = np.unique(mask) - max_class = np.max(unique_classes) - new_colors = generate_colors(max_class - len(self.segmentation_colormap) + 1) - self.segmentation_colormap.extend(new_colors) - colorized_mask = np.array(self.segmentation_colormap)[mask] - - # bbox = None - # if self.normalizer.resize_mode == ResizeMode.LETTERBOX: - # bbox = self.normalizer.get_letterbox_bbox(packet.frame, normalize=True) - # input_h, input_w = self.normalizer.aspect_ratio - # resize_bbox = bbox[0] * input_w, bbox[1] * input_h, bbox[2] * input_w, bbox[3] * input_h - # resize_bbox = np.int0(resize_bbox) - # else: - # resize_bbox = self.normalizer.normalize(frame=np.zeros(self._frame_shape, dtype=bool), - # bbox=bbox or (0., 0., 1., 1.)) - - # x1, y1, x2, y2 = resize_bbox - # h, w = packet.frame.shape[:2] - # # Stretch mode - # if self.normalizer.resize_mode == ResizeMode.STRETCH: - # colorized_mask = cv2.resize(colorized_mask, (w, h)) - # elif self.normalizer.resize_mode == ResizeMode.LETTERBOX: - # colorized_mask = cv2.resize(colorized_mask[y1:y2, x1:x2], (w, h)) - # else: - # padded_mask = np.zeros((h, w, 3), dtype=np.uint8) - # resized_mask = cv2.resize(colorized_mask, (x2 - x1, y2 - y1)) - # padded_mask[y1:y2, x1:x2] = resized_mask - # colorized_mask = padded_mask - - # self._visualizer.add_mask(colorized_mask, alpha=0.5) - - def package(self, msgs: Dict): - if self.queue.full(): - self.queue.get() # Get one, so queue isn't full - - decode_fn = self.det_nn._decode_fn - packet = DetectionPacket( - self.get_packet_name(), - msgs[self.frames.name], - msgs[self.nn_results.name] if decode_fn is None else decode_fn(msgs[self.nn_results.name]), - self._visualizer - ) - - self.queue.put(packet, block=False) - -class XoutSpatialBbMappings(XoutSeqSync, XoutFrames): + def package(self, msgs: Dict) -> XoutNNOutputPacket: + nn_result = msgs[self.nn_results.name] + img = msgs[self.frames.name] + if type(nn_result) == dai.NNData: + decode_fn = self.det_nn._decode_fn + + if decode_fn is None: + return NnOutputPacket(self.get_packet_name(), img, nn_result, self.bbox) + + decoded_nn_result = decode_fn(nn_result) + if type(decoded_nn_result) == Detections: + packet = DetectionPacket(self.get_packet_name(), img, nn_result, self.bbox) + return self._add_detections_to_packet(packet, decoded_nn_result) + elif type(decoded_nn_result) == ImgLandmarks: + return ImgLandmarksPacket(self.get_packet_name(), img, nn_result, decoded_nn_result, self.bbox) + elif type(decoded_nn_result) == SemanticSegmentation: + return SemanticSegmentationPacket(self.get_packet_name(), img, nn_result, decoded_nn_result, self.bbox) + raise ValueError(f'NN result decoding failed! decode() returned type {type(nn_result)}') + + elif type(nn_result) in [dai.ImgDetections, dai.SpatialImgDetections]: + packet = DetectionPacket(self.get_packet_name(), img, nn_result, self.bbox) + return self._add_detections_to_packet(packet, nn_result) + else: + raise ValueError(f'Unknown NN result type: {type(nn_result)}') + + def _add_detections_to_packet(self, + packet: DetectionPacket, + dets: Union[dai.ImgDetections, dai.SpatialImgDetections, Detections] + ) -> DetectionPacket: + for detection in dets.detections: + packet.detections.append(Detection( + img_detection=detection if isinstance(detection, dai.ImgDetection) else None, + label_str=self.labels[detection.label][0] if self.labels else str(detection.label), + confidence=detection.confidence, + color=self.labels[detection.label][1] if self.labels else (255, 255, 255), + bbox=BoundingBox(detection), + angle=detection.angle if hasattr(detection, 'angle') else None, + ts=dets.getTimestamp() + )) + return packet + + +class XoutSpatialBbMappings(XoutDisparityDepth, SequenceNumSync): def __init__(self, device: dai.Device, stereo: dai.node.StereoDepth, - frames: StreamXout, - configs: StreamXout): + frames: StreamXout, # passthroughDepth + configs: StreamXout, # out + dispScaleFactor: float, + bbox: BoundingBox): self._stereo = stereo self.frames = frames self.configs = configs + self.bbox = bbox - XoutFrames.__init__(self, frames) - XoutSeqSync.__init__(self, [frames, configs]) - - self.device = device - self.multiplier = 255 / 95.0 - self.factor = None - self.name = 'Depth & Bounding Boxes' + XoutDisparityDepth.__init__(self, device, frames, dispScaleFactor, None) + SequenceNumSync.__init__(self, 2) - def xstreams(self) -> List[StreamXout]: - return [self.frames, self.configs] + def new_msg(self, name: str, msg): + # Ignore frames that we aren't listening for + if name not in self._streams: return - def visualize(self, packet: SpatialBbMappingPacket): - if not self.factor: - size = (packet.msg.getWidth(), packet.msg.getHeight()) - self.factor = depth_to_disp_factor(self.device, self._stereo) + synced = self.sync(msg.getSequenceNum(), name, msg) + if synced: + return self.package(synced) - depth = np.array(packet.msg.getFrame()) - with np.errstate(all='ignore'): - disp = (self.factor / depth).astype(np.uint8) + def on_callback(self, packet) -> None: + pass - print('disp max', np.max(disp), 'disp min', np.min(disp)) - packet.frame = colorize_disparity(disp, multiplier=1) - draw_mappings(packet) - - super().visualize(packet) + def xstreams(self) -> List[StreamXout]: + return [self.frames, self.configs] - def package(self, msgs: Dict): - if self.queue.full(): - self.queue.get() # Get one, so queue isn't full - packet = SpatialBbMappingPacket( + def package(self, msgs: Dict) -> SpatialBbMappingPacket: + return SpatialBbMappingPacket( self.get_packet_name(), msgs[self.frames.name], msgs[self.configs.name], - self._visualizer + disp_scale_factor=self.disp_scale_factor, ) - self.queue.put(packet, block=False) class XoutTwoStage(XoutNnResults): @@ -261,11 +166,10 @@ class XoutTwoStage(XoutNnResults): Two stage syncing based on sequence number. Each frame produces ImgDetections msg that contains X detections. Each detection (if not on blacklist) will crop the original frame and forward it to the second (stage) NN for inferencing. - """ - """ + msgs = { '1': TwoStageSyncPacket(), - '2': TwoStageSyncPacket(), + '2': TwoStageSyncPacket(), } """ @@ -276,10 +180,11 @@ def __init__(self, det_out: StreamXout, second_nn_out: StreamXout, device: dai.Device, - input_queue_name: str): + input_queue_name: str, + bbox: BoundingBox): self.second_nn_out = second_nn_out # Save StreamXout before initializing super()! - super().__init__(det_nn, frames, det_out) + super().__init__(det_nn, frames, det_out, bbox) self.msgs: Dict[str, Dict[str, Any]] = dict() self.det_nn = det_nn @@ -301,7 +206,7 @@ def xstreams(self) -> List[StreamXout]: # No need for `def visualize()` as `XoutNnResults.visualize()` does what we want - def new_msg(self, name: str, msg: dai.Buffer) -> None: + def new_msg(self, name: str, msg: dai.Buffer): if name not in self._streams: return # From Replay modules. TODO: better handling? @@ -373,33 +278,22 @@ def new_msg(self, name: str, msg: dai.Buffer) -> None: self.input_cfg_queue.send(cfg) - # print(f'Added detection seq {seq}') elif name in self.frames.name: self.msgs[seq][name] = msg - # print(f'Added frame seq {seq}') else: raise ValueError('Message from unknown stream name received by TwoStageSeqSync!') if self.synced(seq): - # print('Synced', seq) # Frames synced! - if self.queue.full(): - self.queue.get() # Get one, so queue isn't full - + dets = self.msgs[seq][self.nn_results.name] packet = TwoStagePacket( self.get_packet_name(), self.msgs[seq][self.frames.name], - self.msgs[seq][self.nn_results.name], + dets, self.msgs[seq][self.second_nn_out.name], self.whitelist_labels, - self._visualizer + self.bbox ) - self.queue.put(packet, block=False) - - # Throws RuntimeError: dictionary changed size during iteration - # for s in self.msgs: - # if int(s) <= int(seq): - # del self.msgs[s] with self.lock: new_msgs = {} @@ -408,6 +302,8 @@ def new_msg(self, name: str, msg: dai.Buffer) -> None: new_msgs[name] = msg self.msgs = new_msgs + return self._add_detections_to_packet(packet, dets) + def add_detections(self, seq: str, dets: dai.ImgDetections): # Used to match the scaled bounding boxes by the 2-stage NN script node self.msgs[seq][self.nn_results.name] = dets diff --git a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_nn_encoded.py b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_nn_encoded.py deleted file mode 100644 index 9db1202d0..000000000 --- a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_nn_encoded.py +++ /dev/null @@ -1,58 +0,0 @@ -from typing import Tuple, List - -import depthai as dai - -from depthai_sdk.classes.packets import FramePacket -from depthai_sdk.oak_outputs.xout.xout_base import StreamXout -from depthai_sdk.oak_outputs.xout.xout_h26x import XoutH26x -from depthai_sdk.oak_outputs.xout.xout_mjpeg import XoutMjpeg -from depthai_sdk.oak_outputs.xout.xout_nn import XoutNnResults - - -class XoutNnH26x(XoutNnResults, XoutH26x): - name: str = "H26x NN Results" - # Streams - frames: StreamXout - nn_results: StreamXout - - def __init__(self, - det_nn: 'NNComponent', - frames: StreamXout, - nn_results: StreamXout, - color: bool, - profile: dai.VideoEncoderProperties.Profile, - fps: float, - frame_shape: Tuple[int, ...]): - self.nn_results = nn_results - - XoutH26x.__init__(self, frames, color, profile, fps, frame_shape) - XoutNnResults.__init__(self, det_nn, frames, nn_results) - - def xstreams(self) -> List[StreamXout]: - return [self.frames, self.nn_results] - - def visualize(self, packet: FramePacket): - decoded_frame = XoutH26x.decode_frame(self, packet) - if decoded_frame is None: - return - - packet.frame = decoded_frame - XoutNnResults.visualize(self, packet) - - -class XoutNnMjpeg(XoutNnResults, XoutMjpeg): - def __init__(self, - det_nn: 'NNComponent', - frames: StreamXout, - nn_results: StreamXout, - color: bool, - lossless: bool, - fps: float, - frame_shape: Tuple[int, ...]): - self.nn_results = nn_results - XoutMjpeg.__init__(self, frames, color, lossless, fps, frame_shape) - XoutNnResults.__init__(self, det_nn, frames, nn_results) - - def visualize(self, packet: FramePacket): - packet.frame = XoutMjpeg.decode_frame(self, packet) - XoutNnResults.visualize(self, packet) diff --git a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_pointcloud.py b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_pointcloud.py index 26bb2a438..6dd73ee0e 100644 --- a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_pointcloud.py +++ b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_pointcloud.py @@ -1,14 +1,12 @@ -import logging -import warnings -from typing import List, Optional, Union +from typing import List, Optional import depthai as dai import numpy as np -from depthai_sdk.classes.packets import DepthPacket, PointcloudPacket +from depthai_sdk.classes.packets import PointcloudPacket +from depthai_sdk.components.pointcloud_helper import create_xyz from depthai_sdk.oak_outputs.xout.xout_base import StreamXout from depthai_sdk.oak_outputs.xout.xout_frames import XoutFrames -from depthai_sdk.components.pointcloud_helper import create_xyz try: import cv2 @@ -20,27 +18,21 @@ class XoutPointcloud(XoutFrames): def __init__(self, device: dai.Device, depth_frames: StreamXout, - fps: int, color_frames: Optional[StreamXout] = None): - self.color_frames = color_frames - XoutFrames.__init__(self, frames=depth_frames, fps=fps) + XoutFrames.__init__(self, frames=depth_frames) self.name = 'Pointcloud' - self.fps = fps self.device = device self.xyz = None self.msgs = dict() - def visualize(self, packet: DepthPacket): - pass - def xstreams(self) -> List[StreamXout]: if self.color_frames is not None: return [self.frames, self.color_frames] return [self.frames] - def new_msg(self, name: str, msg: dai.Buffer) -> None: + def new_msg(self, name: str, msg: dai.Buffer): if name not in self._streams: return # From Replay modules. TODO: better handling? @@ -59,9 +51,6 @@ def new_msg(self, name: str, msg: dai.Buffer) -> None: if len(self.msgs[seq]) == len(self.xstreams()): # Frames synced! - if self.queue.full(): - self.queue.get() # Get one, so queue isn't full - depth_frame: dai.ImgFrame = self.msgs[seq][self.frames.name] color_frame = None @@ -71,21 +60,19 @@ def new_msg(self, name: str, msg: dai.Buffer) -> None: if self.xyz is None: self.xyz = create_xyz(self.device, depth_frame.getWidth(), depth_frame.getHeight()) - pcl = self.xyz * np.expand_dims(np.array(depth_frame.getFrame()), axis = -1) + pcl = self.xyz * np.expand_dims(np.array(depth_frame.getFrame()), axis=-1) # TODO: postprocessing - - packet = PointcloudPacket( - self.get_packet_name(), - pcl, - depth_map=depth_frame, - color_frame=color_frame, - visualizer=self._visualizer - ) - self.queue.put(packet, block=False) - + # Cleanup new_msgs = {} for name, msg in self.msgs.items(): if int(name) > int(seq): new_msgs[name] = msg self.msgs = new_msgs + + return PointcloudPacket( + self.get_packet_name(), + pcl, + depth_map=depth_frame, + colorize_frame=color_frame + ) diff --git a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_seq_sync.py b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_seq_sync.py index a9399aebf..31665af85 100644 --- a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_seq_sync.py +++ b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_seq_sync.py @@ -1,5 +1,5 @@ from abc import abstractmethod -from typing import List +from typing import List, Union, Dict from depthai_sdk.oak_outputs.syncing import SequenceNumSync from depthai_sdk.oak_outputs.xout.xout_base import XoutBase, StreamXout @@ -10,20 +10,22 @@ def xstreams(self) -> List[StreamXout]: return self.streams def __init__(self, streams: List[StreamXout]): - self.streams = streams + # Filter out None streams + self.streams = [s for s in streams if s is not None] + # Save StreamXout before initializing super()! XoutBase.__init__(self) - SequenceNumSync.__init__(self, len(streams)) + SequenceNumSync.__init__(self, len(self.streams)) self.msgs = dict() @abstractmethod - def package(self, msgs: List): + def package(self, msgs: Union[List, Dict]): raise NotImplementedError('XoutSeqSync is an abstract class, you need to override package() method!') - def new_msg(self, name: str, msg) -> None: + def new_msg(self, name: str, msg): # Ignore frames that we aren't listening for if name not in self._streams: return synced = self.sync(msg.getSequenceNum(), name, msg) if synced: - self.package(synced) + return self.package(synced) diff --git a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_tracker.py b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_tracker.py index 08309cd5c..ecd989d5f 100644 --- a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_tracker.py +++ b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_tracker.py @@ -1,345 +1,210 @@ import logging import math -from collections import defaultdict -from typing import Union, Dict, Optional +from datetime import timedelta +from typing import Dict, Optional, List, Union, Tuple import depthai as dai import numpy as np -from depthai_sdk.classes import DetectionPacket, TrackerPacket -from depthai_sdk.classes.packets import _TrackingDetection + +from depthai_sdk.classes import TrackerPacket +from depthai_sdk.classes.packets import TrackingDetection from depthai_sdk.oak_outputs.xout.xout_base import StreamXout from depthai_sdk.oak_outputs.xout.xout_nn import XoutNnResults from depthai_sdk.tracking import KalmanFilter from depthai_sdk.visualize.bbox import BoundingBox -from depthai_sdk.visualize.configs import TextPosition -from depthai_sdk.visualize.visualizer import Visualizer -class XoutTracker(XoutNnResults): - buffer_size: int = 10 +class TrackedObject: + def __init__(self, baseline: float, focal: float, apply_kalman: bool, calculate_speed: bool): + # Point + self.kalman_3d: Optional[KalmanFilter] = None + # BBox + self.kalman_2d: Optional[KalmanFilter] = None + + self.previous_detections: List[TrackingDetection] = [] + self.blacklist = False + self.lost_counter = 0 + + self.baseline = baseline + self.focal = focal + self.apply_kalman = apply_kalman + self.calculate_speed = calculate_speed + + def new_tracklet(self, tracklet: dai.Tracklet, ts: timedelta, color: Tuple, label: str): + is_3d = self._is_3d(tracklet) + tracking_det = TrackingDetection( + img_detection=tracklet.srcImgDetection, + label_str=label, + confidence=tracklet.srcImgDetection.confidence, + color=color, + bbox=BoundingBox(tracklet.srcImgDetection), + angle=None, + tracklet=tracklet, + ts=ts, + filtered_2d=self._calc_kalman_2d(tracklet, ts) if self.apply_kalman else None, + filtered_3d=self._calc_kalman_3d(tracklet, ts) if self.apply_kalman and is_3d else None, + speed=None, + ) + self.previous_detections.append(tracking_det) + # Calc speed should be called after adding new TrackingDetection to self.previous_detections + tracking_det.speed = self.calc_speed(ts) if (self.calculate_speed and is_3d) else None + + def calc_speed(self, ts: timedelta) -> Union[float, np.ndarray]: + """ + Should be called after adding new TrackingDetection to self.previous_detections + """ + + def get_coords(det) -> dai.Point3f: + return det.filtered_3d or det.tracklet.spatialCoordinates + + def get_dist(p1: dai.Point3f, p2: dai.Point3f) -> float: + return np.sqrt((p2.x - p1.x) ** 2 + (p2.y - p1.y) ** 2 + (p2.z - p1.z) ** 2) / 1000 + + speeds = [] + for i in range(len(self.previous_detections) - 1): + d1 = self.previous_detections[i] + # if d1 timestamp is older than 1 second, skip + if (ts - d1.ts).total_seconds() > 1: + continue + d2 = self.previous_detections[i + 1] + distance = get_dist(get_coords(d1), get_coords(d2)) + time = (d2.ts - d1.ts).total_seconds() + speeds.append(distance / time) + + if len(speeds) == 0: + return 0.0 + + window_size = 3 + window = np.hanning(window_size) + window /= window.sum() + + smoothed = np.convolve(speeds, window, mode='same') + return np.mean(smoothed) + + def _is_3d(self, tracklet: dai.Tracklet) -> bool: + return (tracklet.spatialCoordinates.x != 0.0 or + tracklet.spatialCoordinates.y != 0.0 or + tracklet.spatialCoordinates.z != 0.0) + + def _calc_kalman_3d(self, tracklet: dai.Tracklet, ts: timedelta) -> Union[None, dai.Point3f]: + x_space = tracklet.spatialCoordinates.x + y_space = tracklet.spatialCoordinates.y + z_space = tracklet.spatialCoordinates.z + meas_vec_space = np.array([[x_space], [y_space], [z_space]]) + meas_std_space = z_space ** 2 / (self.baseline * self.focal) + + if self.kalman_3d is None: + self.kalman_3d = KalmanFilter(10, 0.1, meas_vec_space, ts) + return None + + dt = (ts - self.kalman_3d.time).total_seconds() + self.kalman_3d.predict(dt) + self.kalman_3d.update(meas_vec_space) + self.kalman_3d.time = ts + self.kalman_3d.meas_std = meas_std_space + vec_space = self.kalman_3d.x + return dai.Point3f(vec_space[0], vec_space[1], vec_space[2]) + + def _calc_kalman_2d(self, tracklet: dai.Tracklet, ts: timedelta) -> Union[None, BoundingBox]: + bb = BoundingBox(tracklet.srcImgDetection) + x_mid, y_mid = bb.get_centroid().to_tuple() + + meas_vec_bbox = np.array([[x_mid], [y_mid], [bb.width], [bb.height]]) + + if self.kalman_2d is None: + self.kalman_2d = KalmanFilter(10, 0.1, meas_vec_bbox, ts) + return None + + dt = (ts - self.kalman_2d.time).total_seconds() + + self.kalman_2d.predict(dt) + self.kalman_2d.update(meas_vec_bbox) + self.kalman_2d.time = ts + vec_bbox = self.kalman_2d.x + + return BoundingBox([ + vec_bbox[0][0] - vec_bbox[2][0] / 2, + vec_bbox[1][0] - vec_bbox[3][0] / 2, + vec_bbox[0][0] + vec_bbox[2][0] / 2, + vec_bbox[1][0] + vec_bbox[3][0] / 2, + ]) + +class XoutTracker(XoutNnResults): def __init__(self, det_nn: 'NNComponent', frames: StreamXout, device: dai.Device, tracklets: StreamXout, + bbox: BoundingBox, apply_kalman: bool = False, forget_after_n_frames: Optional[int] = None, - calculate_speed: bool = False): - super().__init__(det_nn, frames, tracklets) + calculate_speed: bool = False, + ): + """ + apply_kalman: Whether to apply kalman filter to tracklets + forget_after_n_frames: If tracklet is lost for n frames, remove it from tracked_objects + + """ + super().__init__(det_nn, frames, tracklets, bbox) self.name = 'Object Tracker' - self.device = device - - self.__read_device_calibration() - - self.buffer = [] - self.spatial_buffer = [] + self.__read_device_calibration(device) - self.lost_counter = {} - self.blacklist = set() + self.tracked_objects: Dict[int, TrackedObject] = {} self.apply_kalman = apply_kalman self.forget_after_n_frames = forget_after_n_frames - self.kalman_filters: Dict[int, Dict[str, KalmanFilter]] = {} self.calculate_speed = calculate_speed - def setup_visualize(self, - visualizer: Visualizer, - visualizer_enabled: bool, - name: str = None): - super().setup_visualize(visualizer, visualizer_enabled, name) - - def on_callback(self, packet: Union[DetectionPacket, TrackerPacket]): - if len(packet.frame.shape) == 2: - packet.frame = np.dstack((packet.frame, packet.frame, packet.frame)) - - frame_shape = self.det_nn._input.stream_size[::-1] - - if self._frame_shape is None: - # Lazy-load the frame shape - self._frame_shape = np.array([*frame_shape]) - if self._visualizer: - self._visualizer.frame_shape = self._frame_shape - - spatial_points = self._get_spatial_points(packet) - threshold = self.forget_after_n_frames - - if threshold: - self._update_lost_counter(packet, threshold) - - self._update_buffers(packet, spatial_points) - - # Optional kalman filter - if self.apply_kalman: - self._kalman_filter(packet, spatial_points) + def package(self, msgs: Dict) -> TrackerPacket: + tracklets: dai.Tracklets = msgs[self.nn_results.name] - # Estimate speed - tracklet2speed = self._calculate_speed(spatial_points) + for tracklet in tracklets.tracklets: + # If there is no id in self.tracked_objects, create new TrackedObject. This could happen if + # TrackingStatus.NEW, or we removed it (too many lost frames) + if tracklet.id not in self.tracked_objects: + self.tracked_objects[tracklet.id] = TrackedObject(self.baseline, self.focal, self.apply_kalman, + self.calculate_speed) - if self._visualizer: - self._add_tracklet_visualization(packet, spatial_points, tracklet2speed) - - self._add_detections(packet, tracklet2speed) + if tracklet.status == dai.Tracklet.TrackingStatus.NEW: + pass + elif tracklet.status == dai.Tracklet.TrackingStatus.TRACKED: + self.tracked_objects[tracklet.id].lost_counter = 0 + elif tracklet.status == dai.Tracklet.TrackingStatus.LOST: + self.tracked_objects[tracklet.id].lost_counter += 1 - def visualize(self, packet): - super().visualize(packet) + img_d = tracklet.srcImgDetection + # When adding new tracklet, TrackletObject class will also perform filtering + # and speed estimation + self.tracked_objects[tracklet.id] \ + .new_tracklet(tracklet, + tracklets.getTimestamp(), + self.labels[img_d.label][1] if self.labels else (255, 255, 255), + self.labels[img_d.label][0] if self.labels else str(img_d.label) + ) + if tracklet.status == dai.Tracklet.TrackingStatus.REMOVED or \ + (self.forget_after_n_frames is not None and \ + self.forget_after_n_frames <= self.tracked_objects[tracklet.id].lost_counter): + # Remove TrackedObject + self.tracked_objects.pop(tracklet.id) - def package(self, msgs: Dict): - if self.queue.full(): - self.queue.get() # Get one, so queue isn't full packet = TrackerPacket( self.get_packet_name(), msgs[self.frames.name], - msgs[self.nn_results.name], - self._visualizer - ) - self.queue.put(packet, block=False) - - def _add_tracklet_visualization(self, packet, spatial_points, tracklet2speed): - h, w = self._frame_shape[:2] - filtered_tracklets = [tracklet for tracklet in packet.daiTracklets.tracklets if - tracklet.id not in self.blacklist] - - norm_bbox = BoundingBox().resize_to_aspect_ratio(packet.frame.shape, self._nn_size, self._resize_mode) - - self._visualizer.add_detections(detections=filtered_tracklets, - normalizer=norm_bbox, - label_map=self.labels, - spatial_points=spatial_points) - - # Add tracking ids - for tracklet in filtered_tracklets: - det = tracklet.srcImgDetection - bbox = (w * det.xmin, h * det.ymin, w * det.xmax, h * det.ymax) - bbox = tuple(map(int, bbox)) - self._visualizer.add_text( - f'ID: {tracklet.id}', - bbox=bbox, - position=TextPosition.MID - ) - - if self._visualizer.config.tracking.show_speed and tracklet.id in tracklet2speed: - speed = tracklet2speed[tracklet.id] - speed = f'{speed:.1f} m/s\n{speed * 3.6:.1f} km/h' - bbox = tracklet.srcImgDetection - bbox = (int(w * bbox.xmin), int(h * bbox.ymin), int(w * bbox.xmax), int(h * bbox.ymax)) - - self._visualizer.add_text( - speed, - bbox=bbox, - position=TextPosition.TOP_RIGHT, - outline=True - ) - - # Add tracking lines - self._visualizer.add_trail( - tracklets=[t for p in self.buffer for t in p.daiTracklets.tracklets if t.id not in self.blacklist], - label_map=self.labels, - bbox=norm_bbox, + tracklets, + bbox=self.bbox, ) - def _update_lost_counter(self, packet, lost_threshold: int): - for i, tracklet in enumerate(packet.daiTracklets.tracklets): - if tracklet.status == dai.Tracklet.TrackingStatus.NEW: - self.__remove_from_blacklist(tracklet) - self.lost_counter[tracklet.id] = 0 - elif tracklet.status == dai.Tracklet.TrackingStatus.TRACKED: - self.__remove_from_blacklist(tracklet) - self.lost_counter[tracklet.id] = 0 - elif tracklet.status == dai.Tracklet.TrackingStatus.LOST and tracklet.id in self.lost_counter: - self.lost_counter[tracklet.id] += 1 - - if tracklet.id in self.lost_counter and self.lost_counter[tracklet.id] >= lost_threshold: - self.__add_to_blacklist(tracklet) - self.lost_counter.pop(tracklet.id) - - def _update_buffers(self, packet, spatial_points=None): - # Update buffer - self.buffer.append(packet) - if self.buffer_size < len(self.buffer): - self.buffer.pop(0) - - # Update spatial buffer - if spatial_points is not None: - self.spatial_buffer.append(spatial_points) - if self.buffer_size < 5: - self.spatial_buffer.pop(0) - - def _kalman_filter(self, packet, spatial_points=None): - current_time = packet.daiTracklets.getTimestamp() - is_3d = spatial_points is not None - - tracklets = [] - - for i, tracklet in enumerate(packet.daiTracklets.tracklets): - if tracklet.id in self.blacklist: # Skip blacklisted tracklets - continue - - meas_vec_space = 0 - meas_std_space = 0 - - roi = tracklet.roi - x1 = roi.topLeft().x - y1 = roi.topLeft().y - x2 = roi.bottomRight().x - y2 = roi.bottomRight().y + for obj_id, tracked_obj in self.tracked_objects.items(): + if obj_id not in packet.tracklets: + packet.tracklets[obj_id] = [] + for tracking_det in tracked_obj.previous_detections: + packet.tracklets[obj_id].append(tracking_det) - if is_3d: - x_space = tracklet.spatialCoordinates.x - y_space = tracklet.spatialCoordinates.y - z_space = tracklet.spatialCoordinates.z - meas_vec_space = np.array([[x_space], [y_space], [z_space]]) - meas_std_space = z_space ** 2 / (self.baseline * self.focal) + return packet - meas_vec_bbox = np.array([[(x1 + x2) / 2], [(y1 + y2) / 2], [x2 - x1], [y2 - y1]]) - - if tracklet.status == dai.Tracklet.TrackingStatus.NEW: - self.kalman_filters[tracklet.id] = {'bbox': KalmanFilter(10, 0.1, meas_vec_bbox, current_time)} - if is_3d: - self.kalman_filters[tracklet.id]['space'] = KalmanFilter(10, 0.1, meas_vec_space, current_time) - - elif tracklet.status == dai.Tracklet.TrackingStatus.TRACKED or tracklet.status == dai.Tracklet.TrackingStatus.LOST: - if tracklet.id not in self.kalman_filters: - continue - - dt = current_time - self.kalman_filters[tracklet.id]['bbox'].time - dt = dt.total_seconds() - - self.kalman_filters[tracklet.id]['bbox'].predict(dt) - self.kalman_filters[tracklet.id]['bbox'].update(meas_vec_bbox) - self.kalman_filters[tracklet.id]['bbox'].time = current_time - vec_bbox = self.kalman_filters[tracklet.id]['bbox'].x - - if is_3d: - self.kalman_filters[tracklet.id]['space'].predict(dt) - self.kalman_filters[tracklet.id]['space'].update(meas_vec_space) - self.kalman_filters[tracklet.id]['space'].time = current_time - self.kalman_filters[tracklet.id]['space'].meas_std = meas_std_space - vec_space = self.kalman_filters[tracklet.id]['space'].x - - x1_filter = vec_bbox[0] - vec_bbox[2] / 2 - x2_filter = vec_bbox[0] + vec_bbox[2] / 2 - y1_filter = vec_bbox[1] - vec_bbox[3] / 2 - y2_filter = vec_bbox[1] + vec_bbox[3] / 2 - - rect = dai.Rect(x1_filter, y1_filter, x2_filter - x1_filter, y2_filter - y1_filter) - new_tracklet = self.__create_tracklet(tracklet, rect, vec_space if is_3d else None) - tracklets.append(new_tracklet) - - elif tracklet.status == dai.Tracklet.TrackingStatus.REMOVED: - self.kalman_filters.pop(tracklet.id, None) - - if tracklets: - packet.daiTracklets.tracklets = tracklets - - def _add_detections(self, packet, tracklet2speed): - for tracklet in packet.daiTracklets.tracklets: - if tracklet.id in self.blacklist: # Skip blacklisted tracklets - continue - - d = _TrackingDetection() - img_d = tracklet.srcImgDetection - d.tracklet = tracklet - d.label = self.labels[img_d.label][0] if self.labels else str(img_d.label) - d.color = self.labels[img_d.label][1] if self.labels else (255, 255, 255) - roi = tracklet.roi.denormalize(self._frame_shape[1], self._frame_shape[0]) - d.top_left = (int(roi.x), int(roi.y)) - d.bottom_right = (int(roi.x + roi.width), int(roi.y + roi.height)) - - if tracklet.id in tracklet2speed: - d.speed = tracklet2speed[tracklet.id] - d.speed_kmph = d.speed * 3.6 - d.speed_mph = d.speed * 2.23694 - - packet.detections.append(d) - - def _calculate_speed(self, spatial_points) -> dict: - if spatial_points is None or self.calculate_speed is False: - return {} - - tracklet2speed = {} - if spatial_points is not None: - spatial_coords = defaultdict(list) - t = defaultdict(list) - tracklets = defaultdict(list) - for buffered_packet in self.buffer: - for tracklet in buffered_packet.daiTracklets.tracklets: - spatial_coords[tracklet.id].append(tracklet.spatialCoordinates) - t[tracklet.id].append(buffered_packet.daiTracklets.getTimestamp()) - tracklets[tracklet.id].append(tracklet) - - indices = spatial_coords.keys() - for idx in indices: - # Skip if there is only one point - if len(spatial_coords[idx]) < 2: - continue - - n = len(spatial_coords[idx]) - speeds = [] - - for i in range(n - 1): - x1, y1, z1 = spatial_coords[idx][i].x, spatial_coords[idx][i].y, spatial_coords[idx][i].z - x2, y2, z2 = spatial_coords[idx][i + 1].x, spatial_coords[idx][i + 1].y, spatial_coords[idx][ - i + 1].z - distance = np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2 + (z2 - z1) ** 2) / 1000 - time = (t[idx][i + 1] - t[idx][i]).total_seconds() - speeds.append(distance / time) - - window_size = 3 - window = np.hanning(window_size) - window /= window.sum() - - smoothed = np.convolve(speeds, window, mode='same') - speed = np.mean(smoothed) - - tracklet2speed[idx] = speed - - return tracklet2speed - - @staticmethod - def _get_spatial_points(packet) -> list: - try: - if packet._is_spatial_detection(): - spatial_points = [packet._get_spatials(det.srcImgDetection) - for det in - packet.daiTracklets.tracklets] - else: - spatial_points = None - except IndexError: - spatial_points = None - - return spatial_points - - def __get_img_detection(self, tracklet, confidence: float = 1.0): - """Converts tracklet to ImgDetection.""" - img_d = dai.ImgDetection() - img_d.label = tracklet.label - img_d.confidence = confidence - img_d.xmin = tracklet.roi.x - img_d.ymin = tracklet.roi.y - img_d.xmax = tracklet.roi.x + tracklet.roi.width - img_d.ymax = tracklet.roi.y + tracklet.roi.height - return img_d - - def __create_tracklet(self, tracklet, roi=None, spatial_points=None): - """Creates a Tracklet object.""" - tracklet_obj = dai.Tracklet() - tracklet_obj.id = tracklet.id - tracklet_obj.age = tracklet.age - tracklet_obj.label = tracklet.label - tracklet_obj.status = tracklet.status - tracklet_obj.roi = roi - if spatial_points is not None: - tracklet_obj.spatialCoordinates = dai.Point3f(spatial_points[0], spatial_points[1], spatial_points[2]) - else: - tracklet_obj.spatialCoordinates = tracklet.spatialCoordinates - - img_d = self.__get_img_detection(tracklet, confidence=tracklet.srcImgDetection.confidence) - tracklet_obj.srcImgDetection = img_d - return tracklet_obj - - def __read_device_calibration(self): - calib = self.device.readCalibration() + def __read_device_calibration(self, device: dai.Device): + calib = device.readCalibration() eeprom = calib.getEepromData() left_cam = calib.getStereoLeftCameraId() if left_cam != dai.CameraBoardSocket.AUTO and left_cam in eeprom.cameraData.keys(): @@ -351,11 +216,3 @@ def __read_device_calibration(self): logging.warning("Calibration data missing, using OAK-D defaults") self.baseline = 75 self.focal = 440 - - def __add_to_blacklist(self, tracklet): - if tracklet.id not in self.blacklist: - self.blacklist.add(tracklet.id) - - def __remove_from_blacklist(self, tracklet): - if tracklet.id in self.blacklist: - self.blacklist.remove(tracklet.id) diff --git a/depthai_sdk/src/depthai_sdk/oak_outputs/xout_base.py b/depthai_sdk/src/depthai_sdk/oak_outputs/xout_base.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/depthai_sdk/src/depthai_sdk/previews.py b/depthai_sdk/src/depthai_sdk/previews.py index b694d8807..24e3a4798 100644 --- a/depthai_sdk/src/depthai_sdk/previews.py +++ b/depthai_sdk/src/depthai_sdk/previews.py @@ -11,6 +11,7 @@ try: from turbojpeg import TurboJPEG, TJFLAG_FASTUPSAMPLE, TJFLAG_FASTDCT, TJPF_GRAY + turbo = TurboJPEG() except: turbo = None diff --git a/depthai_sdk/src/depthai_sdk/readers/abstract_reader.py b/depthai_sdk/src/depthai_sdk/readers/abstract_reader.py index ee583c456..65798a159 100644 --- a/depthai_sdk/src/depthai_sdk/readers/abstract_reader.py +++ b/depthai_sdk/src/depthai_sdk/readers/abstract_reader.py @@ -2,8 +2,10 @@ from abc import ABC, abstractmethod from pathlib import Path from typing import List, Tuple, Dict + import numpy as np + class AbstractReader(ABC): @abstractmethod def read(self) -> Dict[str, np.ndarray]: diff --git a/depthai_sdk/src/depthai_sdk/readers/db3_reader.py b/depthai_sdk/src/depthai_sdk/readers/db3_reader.py index 32c7d0b06..8a8037803 100644 --- a/depthai_sdk/src/depthai_sdk/readers/db3_reader.py +++ b/depthai_sdk/src/depthai_sdk/readers/db3_reader.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Any, Generator, List, Dict, Tuple +from typing import Generator, List, Dict, Tuple, Optional import cv2 import numpy as np @@ -34,8 +34,7 @@ def __init__(self, folder: Path) -> None: if stream.lower() in con.topic.lower(): self.generators[stream.lower()] = self.reader.messages([con]) - - def read(self) -> Dict[str, np.ndarray]: + def read(self) -> Optional[Dict[str, np.ndarray]]: ros_msgs: Dict[str, np.ndarray] = dict() try: @@ -76,7 +75,7 @@ def getStreams(self) -> List[str]: def getShape(self, name: str) -> Tuple[int, int]: frame = self.frames[name] - return (frame.shape[1], frame.shape[0]) + return frame.shape[1], frame.shape[0] def get_message_size(self, name: str) -> int: size = 1 diff --git a/depthai_sdk/src/depthai_sdk/readers/image_reader.py b/depthai_sdk/src/depthai_sdk/readers/image_reader.py index 345008965..5a278548f 100644 --- a/depthai_sdk/src/depthai_sdk/readers/image_reader.py +++ b/depthai_sdk/src/depthai_sdk/readers/image_reader.py @@ -59,9 +59,9 @@ def __init__(self, path: Path) -> None: self.cntr[name] = 0 self.last_cycle_time = time.time() - self.cycle_sec = 3.0 # Images get cycled every 3 seconds by default + self.cycle_sec = 3.0 # Images get cycled every 3 seconds by default - def set_cycle_fps(self, fps): # Called from replay.py on set_fps() + def set_cycle_fps(self, fps): # Called from replay.py on set_fps() self.cycle_sec = 1.0 / fps def read(self) -> Dict[str, np.ndarray]: diff --git a/depthai_sdk/src/depthai_sdk/readers/rosbag_reader.py b/depthai_sdk/src/depthai_sdk/readers/rosbag_reader.py index eb9cfd619..96ed81348 100644 --- a/depthai_sdk/src/depthai_sdk/readers/rosbag_reader.py +++ b/depthai_sdk/src/depthai_sdk/readers/rosbag_reader.py @@ -33,12 +33,12 @@ def getStreams(self) -> List[str]: def getShape(self, name: str) -> Tuple[int, int]: connection, _, rawdata = next(self.reader.messages('/device_0/sensor_0/Depth_0/image/data')) msg = deserialize_cdr(ros1_to_cdr(rawdata, connection.msgtype), connection.msgtype) - return (msg.width, msg.height) + return msg.width, msg.height def get_message_size(self, name: str) -> int: connection, _, rawdata = next(self.reader.messages('/device_0/sensor_0/Depth_0/image/data')) msg = deserialize_cdr(ros1_to_cdr(rawdata, connection.msgtype), connection.msgtype) - return len(msg.data) # TODO: test + return len(msg.data) # TODO: test def close(self): self.reader.close() diff --git a/depthai_sdk/src/depthai_sdk/readers/videocap_reader.py b/depthai_sdk/src/depthai_sdk/readers/videocap_reader.py index 725729ce5..bbe4984f4 100644 --- a/depthai_sdk/src/depthai_sdk/readers/videocap_reader.py +++ b/depthai_sdk/src/depthai_sdk/readers/videocap_reader.py @@ -1,6 +1,7 @@ import os from pathlib import Path -from typing import List, Tuple, Dict, Any +from typing import List, Tuple, Dict, Any, Optional + import depthai as dai try: @@ -21,6 +22,7 @@ class VideoCapReader(AbstractReader): def __init__(self, path: Path, loop: bool = False) -> None: self.videos: Dict[str, Any] = {} + self._closed = False # self.initialFrames: Dict[str, Any] = dict() # self.shapes: Dict[str, Tuple[int, int]] = dict() @@ -40,14 +42,16 @@ def __init__(self, path: Path, loop: bool = False) -> None: continue # Check if name of the file starts with left.. right.., or CameraBoardSocket - if f_name.startswith('CameraBoardSocket.'): - f_name = f_name.split('CameraBoardSocket.')[1] + if f_name.startswith('CAM_'): + # Remove everything after CAM_x + f_name = f_name[:5] + socket = None try: socket = parse_camera_socket(f_name) except ValueError: # Invalid file name - continue + pass # TODO: avoid changing stream names, just use socket # stream = str(socket) @@ -72,6 +76,8 @@ def __init__(self, path: Path, loop: bool = False) -> None: video['initialFrame'] = f def read(self): + if self._closed: + return False frames = dict() for name, video in self.videos.items(): if video['initialFrame'] is not None: @@ -101,11 +107,13 @@ def getStreams(self) -> List[str]: def getShape(self, name: str) -> Tuple[int, int]: shape = self.videos[name.lower()]['shape'] return shape - def get_socket(self, name: str): + + def get_socket(self, name: str) -> Optional[dai.CameraBoardSocket]: return self.videos[name.lower()]['socket'] def close(self): [r['reader'].release() for _, r in self.videos.items()] + self._closed = True def disableStream(self, name: str): if name.lower() in self.videos: diff --git a/depthai_sdk/src/depthai_sdk/record.py b/depthai_sdk/src/depthai_sdk/record.py index c9dd3c917..97869447c 100644 --- a/depthai_sdk/src/depthai_sdk/record.py +++ b/depthai_sdk/src/depthai_sdk/record.py @@ -4,13 +4,12 @@ from pathlib import Path from queue import Queue from threading import Thread -from typing import Dict, List +from typing import List import depthai as dai -from depthai_sdk.classes.packets import FramePacket +from depthai_sdk.classes.packets import FramePacket, IMUPacket from depthai_sdk.oak_outputs.xout.xout_frames import XoutFrames -from depthai_sdk.oak_outputs.xout.xout_seq_sync import XoutSeqSync from depthai_sdk.recorders.abstract_recorder import Recorder @@ -36,12 +35,13 @@ def _run(recorder: Recorder, frame_queue: Queue): class RecordType(IntEnum): VIDEO = 1 # Save to video file - ROSBAG = 2 # To ROS .bag - MCAP = 3 # To .mcap - DB3 = 4 # To .db3 (ros2) + VIDEO_LOSSLESS = 2 # Save to lossless video file (.avi) + ROSBAG = 3 # To ROS .bag + MCAP = 4 # To .mcap + DB3 = 5 # To .db3 (ros2) -class Record(XoutSeqSync): +class Record: """ This class records depthai streams from OAK cameras into different formats. It will also save calibration .json, so depth reconstruction will be possible. @@ -53,7 +53,6 @@ def __init__(self, path: Path, record_type: RecordType): path (Path): Path to the recording folder record_type (RecordType): Recording type """ - super().__init__([]) # We don't yet have streams, we will set it up later self.folder = path self.record_type = record_type self.frame_q = None @@ -70,6 +69,9 @@ def __init__(self, path: Path, record_type: RecordType): elif self.record_type == RecordType.VIDEO: from .recorders.video_recorder import VideoRecorder self.recorder = VideoRecorder() + elif self.record_type == RecordType.VIDEO_LOSSLESS: + from .recorders.video_recorder import VideoRecorder + self.recorder = VideoRecorder(lossless=True) elif self.record_type == RecordType.ROSBAG: from .recorders.rosbag_recorder import Rosbag1Recorder self.recorder = Rosbag1Recorder() @@ -79,39 +81,23 @@ def __init__(self, path: Path, record_type: RecordType): else: raise ValueError(f"Recording type '{self.record_type}' isn't supported!") - def package(self, msgs: Dict): - # Here we get sequence-num synced messages:) - mapped = dict() - for name, msg in msgs.items(): - if name in self.name_mapping: # Map to friendly name - mapped[self.name_mapping[name]] = msg - else: - mapped[name] = msg - - self.frame_q.put(mapped) - - def visualize(self, packet: FramePacket) -> None: - pass # No need. + def write(self, packets): + if not isinstance(packets, dict): + packets = {packets.name: packets} - def no_sync(self, name: str, msg): - # name = self.name_mapping[name] if name in self.name_mapping else name - obj = {name: msg} - self.frame_q.put(obj) + msgs = dict() + for name, packet in packets.items(): + if isinstance(packet, FramePacket): + msgs[name] = packet.msg + elif isinstance(packet, IMUPacket): + msgs[name] = packet.packet + self.frame_q.put(msgs) def start(self, device: dai.Device, xouts: List[XoutFrames]): """ Start recording process. This will create and start the pipeline, start recording threads, and initialize all queues. """ - if self.record_type == RecordType.VIDEO: - self._streams = [out.frames.name for out in xouts] # required by XoutSeqSync - self.stream_num = len(xouts) - self.name_mapping = dict() - for xout in xouts: - self.name_mapping[xout.frames.name] = xout.name - else: # For MCAP/Rosbags we don't need msg syncing - self.new_msg = self.no_sync - self.mxid = device.getMxId() self.path = self._create_folder(self.folder, self.mxid) calib_data = device.readCalibration() @@ -130,9 +116,6 @@ def config_mcap(self, pointcloud: bool): return self.recorder.set_pointcloud(pointcloud) - # def config_video(self, ): - # Nothing to configure for video recorder - # TODO: implement config of BAG to either record depth as frame or pointcloud # def config_bag(self, pointcloud: bool): # if self.type != RecordType.BAG: diff --git a/depthai_sdk/src/depthai_sdk/recorders/abstract_recorder.py b/depthai_sdk/src/depthai_sdk/recorders/abstract_recorder.py index c0adad881..6b593300e 100644 --- a/depthai_sdk/src/depthai_sdk/recorders/abstract_recorder.py +++ b/depthai_sdk/src/depthai_sdk/recorders/abstract_recorder.py @@ -1,9 +1,11 @@ from abc import ABC, abstractmethod +from enum import IntEnum from pathlib import Path from typing import List + import depthai as dai + import depthai_sdk.oak_outputs.xout as outputs -from enum import IntEnum class Recorder(ABC): @@ -30,16 +32,7 @@ class StreamType(IntEnum): IMU = 5 def __init__(self, xout: outputs.xout_base.XoutBase): - if isinstance(xout, outputs.xout_mjpeg.XoutMjpeg): - self.type = self.StreamType.MJPEG - self.xlink_name = xout.frames.name - elif isinstance(xout, outputs.xout_h26x.XoutH26x): - self.xlink_name = xout.frames.name - if xout.profile == dai.VideoEncoderProperties.Profile.H265_MAIN: - self.type = self.StreamType.H265 - else: - self.type = self.StreamType.H264 - elif isinstance(xout, outputs.xout_depth.XoutDepth): + if isinstance(xout, outputs.xout_depth.XoutDisparityDepth): self.xlink_name = xout.frames.name self.type = self.StreamType.DEPTH # TODO is depth raw or should it be DEPTH? elif isinstance(xout, outputs.xout_disparity.XoutDisparity): @@ -47,8 +40,16 @@ def __init__(self, xout: outputs.xout_base.XoutBase): self.type = self.StreamType.RAW elif isinstance(xout, outputs.xout_frames.XoutFrames): self.xlink_name = xout.frames.name - self.type = self.StreamType.RAW - elif isinstance(xout, outputs.XoutIMU): + if xout._fourcc is None: + self.type = self.StreamType.RAW + elif xout._fourcc == 'hevc': + self.type = self.StreamType.H265 + elif xout._fourcc == 'h264': + self.type = self.StreamType.H264 + elif xout._fourcc == 'mjpeg': + self.type = self.StreamType.MJPEG + + elif isinstance(xout, outputs.xout_imu.XoutIMU): self.xlink_name = xout.imu_out.name self.type = self.StreamType.IMU else: diff --git a/depthai_sdk/src/depthai_sdk/recorders/rosbag_recorder.py b/depthai_sdk/src/depthai_sdk/recorders/rosbag_recorder.py index a5574ab0e..72d868380 100755 --- a/depthai_sdk/src/depthai_sdk/recorders/rosbag_recorder.py +++ b/depthai_sdk/src/depthai_sdk/recorders/rosbag_recorder.py @@ -236,6 +236,7 @@ def __init__(self): self._closed = False self.imu_interpolation = ImuInterpolation() + def _update(self, device: dai.Device, xouts: List['XoutFrames']): """ Args: @@ -357,20 +358,19 @@ def write(self, name: str, dai_msg: dai.Buffer): elif stream.ros_type == PointCloud2: raise Exception('PointCloud2 not yet implemented') elif stream.ros_type == Imu: - dai_msg: dai.IMUData - for packet in dai_msg.packets: - report = packet.acceleroMeter or packet.gyroscope or packet.magneticField or packet.rotationVector - msg = Imu( - header=self.get_header(report.getTimestampDevice(), report.sequence), - orientation=Quaternion(x=0.0, y=0.0, z=0.0, w=1.0), - orientation_covariance=np.array([-1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]), - angular_velocity=Vector3(0.0, 0.0, 0.0), - angular_velocity_covariance=np.array([]), - linear_acceleration=Vector3(0.0, 0.0, 0.0), - linear_acceleration_covariance=np.array([]) - ) - self.imu_interpolation.Imu(msg, packet) - self.write_to_rosbag(name, stream.ros_type.__msgtype__, msg) + packet: dai.IMUPacket = dai_msg + report = packet.acceleroMeter or packet.gyroscope or packet.magneticField or packet.rotationVector + msg = Imu( + header=self.get_header(report.getTimestampDevice(), report.sequence), + orientation=Quaternion(x=0.0, y=0.0, z=0.0, w=1.0), + orientation_covariance=np.array([-1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]), + angular_velocity=Vector3(0.0, 0.0, 0.0), + angular_velocity_covariance=np.array([]), + linear_acceleration=Vector3(0.0, 0.0, 0.0), + linear_acceleration_covariance=np.array([]) + ) + self.imu_interpolation.Imu(msg, packet) + self.write_to_rosbag(name, stream.ros_type.__msgtype__, msg) elif stream.ros_type == Image: # msg = self.bridge.Image(dai_msg) dai_msg: dai.ImgFrame diff --git a/depthai_sdk/src/depthai_sdk/recorders/video_recorder.py b/depthai_sdk/src/depthai_sdk/recorders/video_recorder.py index 0582736bd..1e183cbd7 100644 --- a/depthai_sdk/src/depthai_sdk/recorders/video_recorder.py +++ b/depthai_sdk/src/depthai_sdk/recorders/video_recorder.py @@ -8,15 +8,15 @@ class VideoRecorder(Recorder): """ - Writes encoded streams raw (.mjpeg/.h264/.hevc) or directly to mp4 container. - Writes unencoded streams to mp4 using cv2.VideoWriter + Writes video streams (.mjpeg/.h264/.hevc) or directly to mp4/avi container. """ - def __init__(self): + def __init__(self, lossless: bool = False): self.path = None self._stream_type = dict() self._writers = dict() self._closed = False + self._lossless = lossless def __getitem__(self, item): return self._writers[item] @@ -41,28 +41,32 @@ def update(self, path: Path, device: dai.Device, xouts: List['XoutFrames']): # for example, 'color_bitstream' (encoded) or 'color_video' (unencoded), # if component was created with name='color' xout_name = xout.name # for example, 'color' --> file is color.mp4 (encoded) or color.avi (unencoded) - + file_name = xout_name + if file_name.startswith('CameraBoardSocket.'): + file_name = file_name[len('CameraBoardSocket.'):] stream = OakStream(xout) fourcc = stream.fourcc() # TODO add default fourcc? stream.fourcc() can be None. - if stream.is_raw(): + + print(fourcc, xout_name, stream.type) + if stream.is_raw() or stream.is_depth(): from .video_writers.video_writer import VideoWriter - self._writers[xout_name] = VideoWriter(self.path, xout_name, fourcc, xout.fps) + self._writers[xout_name] = VideoWriter(self.path, file_name, self._lossless) else: try: from .video_writers.av_writer import AvWriter - self._writers[xout_name] = AvWriter(self.path, xout_name, fourcc, xout.fps, xout._frame_shape) + self._writers[xout_name] = AvWriter(self.path, file_name, fourcc) except Exception as e: # TODO here can be other errors, not only import error logging.warning(f'Exception while creating AvWriter: {e}.' '\nFalling back to FileWriter, saving uncontainerized encoded streams.') from .video_writers.file_writer import FileWriter - self._writers[xout_name] = FileWriter(self.path, xout_name, fourcc) + self._writers[xout_name] = FileWriter(self.path, file_name, fourcc) def create_files_for_buffer(self, subfolder: str, buf_name: str): for _, writer in self._writers.items(): writer.create_file_for_buffer(subfolder, buf_name) - def create_file_for_buffer(self, wr_name: str, subfolder: str, buf_name: str): # get frames' properties for the file from buf_name + def create_file_for_buffer(self, wr_name: str, subfolder: str, buf_name: str): self._writers[wr_name].create_file_for_buffer(subfolder, buf_name) def create_file(self, wr_name: str, subfolder: str, frame: Union[np.ndarray, dai.ImgFrame]): diff --git a/depthai_sdk/src/depthai_sdk/recorders/video_writers/av_writer.py b/depthai_sdk/src/depthai_sdk/recorders/video_writers/av_writer.py index 62ed798aa..c7796e38e 100644 --- a/depthai_sdk/src/depthai_sdk/recorders/video_writers/av_writer.py +++ b/depthai_sdk/src/depthai_sdk/recorders/video_writers/av_writer.py @@ -1,7 +1,7 @@ import os from fractions import Fraction from pathlib import Path -from typing import Tuple, Union +from typing import Tuple, Union, Optional, List import depthai as dai import numpy as np @@ -45,7 +45,7 @@ def is_keyframe(encoded_frame: np.array) -> bool: class AvWriter(BaseWriter): - def __init__(self, path: Path, name: str, fourcc: str, fps: float, frame_shape: Tuple[int, int]): + def __init__(self, path: Path, name: str, fourcc: str): """ Args: path: Path to the folder where the file will be created. @@ -57,13 +57,15 @@ def __init__(self, path: Path, name: str, fourcc: str, fps: float, frame_shape: super().__init__(path, name) self.start_ts = None - self.frame_shape = frame_shape - - self._fps = fps self._fourcc = fourcc + self._stream = None + self._file = None + self.closed = False + self._codec = None # Used to determine dimensions of encoded frames + self._frame_buffer: List[dai.ImgFrame] = [] - def _create_stream(self, fourcc: str, fps: float) -> None: + def _create_stream(self, shape: Tuple) -> None: """ Create stream in file with given fourcc and fps, works in-place. @@ -71,16 +73,24 @@ def _create_stream(self, fourcc: str, fps: float) -> None: fourcc: Stream codec. fps: Frames per second of the stream. """ - self._stream = self._file.add_stream(fourcc, rate=int(fps)) + self._stream = self._file.add_stream(self._fourcc) self._stream.time_base = Fraction(1, 1000 * 1000) # Microseconds # We need to set pixel format for MJEPG, for H264/H265 it's yuv420p by default - if fourcc == 'mjpeg': + if self._fourcc == 'mjpeg': self._stream.pix_fmt = 'yuvj420p' - if self.frame_shape is not None: - self._stream.width = self.frame_shape[0] - self._stream.height = self.frame_shape[1] + self._stream.width = shape[0] + self._stream.height = shape[1] + + def get_dimension(self, img: dai.ImgFrame) -> Optional[Tuple[int, int]]: + enc_packets = self._codec.parse(img.getData()) + if len(enc_packets) == 0: + return None + frames = self._codec.decode(enc_packets[-1]) + if not frames: + return None + return frames[0].width, frames[0].height def create_file_for_buffer(self, subfolder: str, buf_name: str) -> None: # independent of type of frames self.create_file(subfolder) @@ -104,23 +114,20 @@ def _create_file(self, path_to_file: str) -> None: """ global av import av - self._file = av.open(str(Path(path_to_file).with_suffix(f'.{self._fourcc}')), 'w') - self._create_stream(self._fourcc, self._fps) - - def write(self, frame: dai.ImgFrame) -> None: - """ - Write packet bytes to h264 file. + # We will remux .h264 later + suffix = '.h264' if self._fourcc.lower() == 'h264' else '.mp4' + self._file = av.open(str(Path(path_to_file).with_suffix(suffix)), 'w') - Args: - frame: ImgFrame from depthai pipeline. - """ - if self._file is None: - self.create_file(subfolder='') + # Needed to get dimensions from the frame. Only decode first frame. + self._codec = av.CodecContext.create(self._fourcc, "r") + def __mux_imgframe(self, frame: dai.ImgFrame) -> None: frame_data = frame.getData() - if self.start_ts is None and not is_keyframe(frame_data): - return + if self.start_ts is None: + # For H26x, wait for a keyframe + if self._fourcc != 'mjpeg' and not is_keyframe(frame_data): + return packet = av.Packet(frame_data) # Create new packet with byte array @@ -129,23 +136,51 @@ def write(self, frame: dai.ImgFrame) -> None: self.start_ts = frame.getTimestampDevice() ts = int((frame.getTimestampDevice() - self.start_ts).total_seconds() * 1e6) # To microsec - packet.dts = ts - packet.pts = ts + packet.dts = ts + 1 # +1 to avoid zero dts + packet.pts = ts + 1 + packet.stream = self._stream self._file.mux_one(packet) # Mux the Packet into container + def write(self, frame: dai.ImgFrame) -> None: + """ + Write packet bytes to h264 file. + + Args: + frame: ImgFrame from depthai pipeline. + """ + if self.closed: + return + if self._file is None: + self.create_file(subfolder='') + + if self._stream is None: + shape = self.get_dimension(frame) + if shape is None: + # Save frame, so we can mux it later when dimnesions are known + self._frame_buffer.append(frame) + return + + self._create_stream(shape) + for buffered_frame in self._frame_buffer: + self.__mux_imgframe(buffered_frame) + + self.__mux_imgframe(frame) + def close(self) -> None: """ - Close the file and remux it to mp4. + Close the file and potentially remux it to mp4. """ + self.closed = True if self._file is not None: p = self._stream.encode(None) self._file.mux(p) self._file.close() - # Remux the stream to finalize the output file - self.remux_video(str(self._file.name)) + # Remux the h264 stream to finalize the output file + if self._fourcc == 'h264': + self.remux_h264_video(str(self._file.name)) - def remux_video(self, input_file: Union[Path, str]) -> None: + def remux_h264_video(self, input_file: Union[Path, str]) -> None: """ Remuxes h264 file to mp4. @@ -161,13 +196,13 @@ def remux_video(self, input_file: Union[Path, str]) -> None: with av.open(mp4_file, "w", format="mp4") as output_container, \ av.open(input_file, "r", format=self._fourcc) as input_container: input_stream = input_container.streams[0] - output_stream = output_container.add_stream(template=input_stream, rate=self._fps) + fps = input_stream.average_rate + output_stream = output_container.add_stream(template=input_stream, rate=fps) - if self.frame_shape: - output_stream.width = self.frame_shape[0] - output_stream.height = self.frame_shape[1] + output_stream.width = input_stream.width + output_stream.height = input_stream.height - frame_time = (1 / self._fps) * input_stream.time_base.denominator + frame_time = (1 / fps) * input_stream.time_base.denominator for i, packet in enumerate(input_container.demux(input_stream)): packet.dts = i * frame_time packet.pts = i * frame_time diff --git a/depthai_sdk/src/depthai_sdk/recorders/video_writers/base_writer.py b/depthai_sdk/src/depthai_sdk/recorders/video_writers/base_writer.py index d35bb4344..1bcf51d5b 100644 --- a/depthai_sdk/src/depthai_sdk/recorders/video_writers/base_writer.py +++ b/depthai_sdk/src/depthai_sdk/recorders/video_writers/base_writer.py @@ -1,5 +1,6 @@ +import time from abc import ABC -from collections import deque +from collections import deque, defaultdict from pathlib import Path from typing import Dict @@ -10,27 +11,45 @@ def __init__(self, path: Path, name: str): self.name = name self._buffers: Dict[str, deque] = {} + self._buffers_max_seconds: Dict[str, int] = {} # in seconds + self._buffers_timestamps = defaultdict(list) + self._buffers_approx_fps: Dict[str, float] = {} self._file = None - self._fps = None def create_file_for_buffer(self, subfolder: str, bufname: str): raise NotImplementedError() def init_buffer(self, name: str, max_seconds: int): if max_seconds > 0: - self._buffers[name] = deque(maxlen=int(max_seconds * self._fps)) + self._buffers[name] = deque() + self._buffers_max_seconds[name] = max_seconds def add_to_buffer(self, name: str, frame): if self._buffers[name] is None: return - if len(self._buffers[name]) == self._buffers[name].maxlen: + timestamp = time.time() + self._buffers_timestamps[name].append(timestamp) + + # Calculate time window based on max_seconds + time_window = self._buffers_max_seconds[name] + + # Remove frames that fall outside the time window + while self._buffers_timestamps[name] and (timestamp - self._buffers_timestamps[name][0] > time_window): self._buffers[name].popleft() + self._buffers_timestamps[name].pop(0) self._buffers[name].append(frame) def is_buffer_full(self, name: str) -> bool: - return len(self._buffers[name]) == self._buffers[name].maxlen + if self._buffers[name].maxlen: + return len(self._buffers[name]) == self._buffers[name].maxlen + + if not self._buffers_timestamps[name]: + return False + + diff = self._buffers_timestamps[name][0] + self._buffers_max_seconds[name] - self._buffers_timestamps[name][-1] + return diff < 0.1 def is_buffer_empty(self, name: str) -> bool: return len(self._buffers[name]) == 0 diff --git a/depthai_sdk/src/depthai_sdk/recorders/video_writers/video_writer.py b/depthai_sdk/src/depthai_sdk/recorders/video_writers/video_writer.py index 684704f3d..b0b0f731d 100644 --- a/depthai_sdk/src/depthai_sdk/recorders/video_writers/video_writer.py +++ b/depthai_sdk/src/depthai_sdk/recorders/video_writers/video_writer.py @@ -1,14 +1,9 @@ -from collections import deque +from datetime import timedelta +from fractions import Fraction from pathlib import Path -from typing import Union - -try: - import cv2 -except ImportError: - cv2 = None +import av import depthai as dai -import numpy as np from depthai_sdk.recorders.video_writers import BaseWriter from depthai_sdk.recorders.video_writers.utils import create_writer_dir @@ -16,40 +11,28 @@ class VideoWriter(BaseWriter): """ - Writes raw streams to mp4 using cv2.VideoWriter. + Writes raw streams to file """ - _fps: float - _path: str - def __init__(self, path: Path, name: str, fourcc: str, fps: float): + def __init__(self, path: Path, name: str, lossless: bool = False): """ Args: path: Path to save the output. Either a folder or a file. name: Name of the stream. - fourcc: FourCC code of the codec used to compress the frames. - fps: Frames per second. + lossless: If True, save the stream without compression. """ super().__init__(path, name) - self._fourcc = None - self._w, self._h = None, None - self._fps = fps + self._lossless = lossless - self._buffer = None - self._is_buffer_enabled = False + self._fourcc: str = None + self._format: str = None + self._start_ts: timedelta = None def __exit__(self, exc_type, exc_val, exc_tb): self.close() - def init_buffer(self, name: str, max_seconds: int): - if max_seconds > 0: - self._buffers[name] = deque(maxlen=int(max_seconds * self._fps)) - self._is_buffer_enabled = True - - def set_fourcc(self, fourcc: str): - self._fourcc = fourcc - def create_file_for_buffer(self, subfolder: str, buf_name: str): if self._buffers[buf_name] is None: raise RuntimeError(f"Buffer {buf_name} is not enabled") @@ -60,42 +43,70 @@ def create_file_for_buffer(self, subfolder: str, buf_name: str): frame = self._buffers[buf_name][0] self.create_file(subfolder, frame) - def create_file(self, subfolder: str, frame: Union[dai.ImgFrame, np.ndarray]): - path_to_file = create_writer_dir(self.path / subfolder, self.name, 'mp4') + def create_file(self, subfolder: str, frame: dai.ImgFrame): + if self._lossless or frame.getType() == dai.ImgFrame.Type.RAW16: + extension = 'avi' + else: + extension = 'mp4' + + path_to_file = create_writer_dir(self.path / subfolder, self.name, extension) - if not path_to_file.endswith('.mp4'): - path_to_file = path_to_file[:-4] + '.mp4' + if not path_to_file.endswith('.' + extension): + path_to_file = path_to_file[:-4] + '.' + extension self._create_file(path_to_file, frame) - def _create_file(self, path_to_file: str, frame: Union[dai.ImgFrame, np.ndarray]): - if isinstance(frame, np.ndarray): - self._h, self._w = frame.shape[:2] + def _create_file(self, path_to_file: str, frame: dai.ImgFrame): + options = {} + if self._lossless: + self._fourcc = 'rawvideo' + elif frame.getType() == dai.ImgFrame.Type.RAW16: + self._fourcc = 'ffv1' + self._format = 'gray16le' + else: # Mono/Color, encode + self._fourcc = 'h264' + options['crf'] = '15' + + self._file = av.open(path_to_file, 'w') + self._stream = self._file.add_stream(self._fourcc) + self._stream.options = options + self._stream.time_base = Fraction(1, 1000) + self._stream.codec_context.width = frame.getWidth() + self._stream.codec_context.height = frame.getHeight() + + def write(self, img_frame: dai.ImgFrame): + if self._file is None: + self.create_file(subfolder='', frame=img_frame) + if self._start_ts is None: + self._start_ts = img_frame.getTimestampDevice() + + if img_frame.getType() == dai.ImgFrame.Type.YUV420p: + video_format = 'yuv420p' + elif img_frame.getType() == dai.ImgFrame.Type.NV12: + video_format = 'nv12' + elif img_frame.getType() in [dai.ImgFrame.Type.RAW8, dai.ImgFrame.Type.GRAY8]: + video_format = 'gray' + elif img_frame.getType() == dai.ImgFrame.Type.RAW16: + video_format = 'gray16le' else: - self._h, self._w = frame.getHeight(), frame.getWidth() - - if not isinstance(frame, np.ndarray): - frame = frame.getCvFrame() + raise ValueError(f'Unsupported frame type: {img_frame.getType()}') - c = 1 if frame.ndim == 2 else frame.shape[2] + video_frame = av.VideoFrame.from_ndarray(img_frame.getFrame(), format=video_format) - self._fourcc = 'mp4v' - self._file = cv2.VideoWriter(path_to_file, - cv2.VideoWriter_fourcc(*self._fourcc), - self._fps, - (self._w, self._h), - isColor=c != 1) + ts = int((img_frame.getTimestampDevice() - self._start_ts).total_seconds() * 1e3) # To milliseconds + video_frame.pts = ts + 1 - def write(self, frame: Union[dai.ImgFrame, np.ndarray]): - if self._file is None: - self.create_file(subfolder='', frame=frame) - - self._file.write(frame if isinstance(frame, np.ndarray) else frame.getCvFrame()) + for packet in self._stream.encode(video_frame): + self._file.mux(packet) def close(self) -> None: """ Close the file if it is open. """ if self._file: - self._file.release() - self._file = None + # Flush stream + for packet in self._stream.encode(): + self._file.mux(packet) + + # Close output file + self._file.close() diff --git a/depthai_sdk/src/depthai_sdk/replay.py b/depthai_sdk/src/depthai_sdk/replay.py index 23e0bd9e1..8bfc48c66 100644 --- a/depthai_sdk/src/depthai_sdk/replay.py +++ b/depthai_sdk/src/depthai_sdk/replay.py @@ -1,8 +1,9 @@ -import logging +import os import os import time from threading import Thread from time import monotonic +from typing import Callable import depthai as dai @@ -16,42 +17,49 @@ '.pnm', '.pfm', '.sr', '.ras', '.tiff', '.tif', '.exr', '.hdr', '.pic'] -class ReplayStream: - stream_name: str # XLink stream name - queue: dai.DataInputQueue # Input queue - frame: np.ndarray # Last read frame from Reader (ndarray) - imgFrame: dai.ImgFrame # Last read ImgFrame from Reader (dai.ImgFrame) - _shape: Tuple[int, int] # width, height - disabled: bool - size_bytes: int # bytes +def _run(delay: float, sendFrames: Callable): + while True: + if not sendFrames(): + break + time.sleep(delay) + logging.info('Replay `run` thread stopped') + +class ReplayStream: @property def shape(self) -> Tuple[int, int]: return self.resize if self.resize else self._shape def __init__(self): self.node: dai.node.XLinkIn = None + self.queue: dai.DataInputQueue = None self.disabled = False self.stream_name = '' - self.camera_socket: dai.CameraBoardSocket = None + self.camera_socket: dai.CameraBoardSocket = None # Forced socket self.resize: Tuple[int, int] = None self.resize_mode: ResizeMode = None + self._shape: Tuple[int, int] = None + self.callbacks: List[Callable] = [] + + self.frame: np.ndarray # Last read frame from Reader (ndarray) + self.imgFrame: dai.ImgFrame # Last read ImgFrame from Reader (dai.ImgFrame) + self.size_bytes: int # bytes def get_socket(self) -> dai.CameraBoardSocket: - if self.camera_socket: + if self.camera_socket is not None: return self.camera_socket if 'left' in self.stream_name.lower(): return dai.CameraBoardSocket.LEFT elif 'right' in self.stream_name.lower(): return dai.CameraBoardSocket.RIGHT else: - return dai.CameraBoardSocket.RGB + return dai.CameraBoardSocket.CAM_A # raise Exception("Please specify replay stream CameraBoardSocket via replay.specify_socket()") class Replay: - def __init__(self, path: str): + def __init__(self, path: Union[Path, str]): """ Helper file to replay recorded depthai stream. It reads from recorded files (mjpeg/avi/mp4/h265/h264/bag) and sends frames back to OAK camera to replay the scene, including depth reconstruction from 2 synced mono @@ -142,6 +150,8 @@ def _get_path(self, path: str) -> Path: @param path: depthai-recording path. @return: Replay module """ + if isinstance(path, Path): + return path.resolve() if isUrl(path): if isYoutubeLink(path): # Overwrite source - so Replay class can use it @@ -200,6 +210,9 @@ def set_loop(self, flag: bool): def get_fps(self) -> float: return self.fps + def _add_callback(self, stream_name: str, callback: Callable): + self.streams[stream_name.lower()].callbacks.append(callback) + def resize(self, stream_name: str, size: Tuple[int, int], mode: ResizeMode = ResizeMode.STRETCH): """ Resize color frames prior to sending them to the device. @@ -288,27 +301,14 @@ def initStereoDepth(self, left.node.out.link(stereo.left) right.node.out.link(stereo.right) - def start(self, cb): + def start(self): """ Start sending frames to the OAK device on a new thread """ - self.thread = Thread(target=self.run, args=(cb,)) + self.thread = Thread(target=_run, args=(1.0 / self.fps, self.sendFrames,)) self.thread.start() - def run(self, cb): - delay = 1.0 / self.fps - while True: - if not self.sendFrames(cb): - break - - time.sleep(delay) - if self._stop: - break - - logging.info('Replay `run` thread stopped') - self._stop = True - - def sendFrames(self, cb=None) -> bool: + def sendFrames(self) -> bool: """ Reads and sends recorded frames from all enabled streams to the OAK camera. @@ -317,13 +317,14 @@ def sendFrames(self, cb=None) -> bool: """ if not self._pause: # If replaying is paused, don't read new frames if not self._readFrames(): + self._stop = True return False # End of the recording self._now = monotonic() for stream_name, stream in self.streams.items(): stream.imgFrame = self._createImgFrame(stream) # Save the imgFrame - if cb: # callback + for cb in stream.callbacks: # callback cb(stream_name.lower(), stream.imgFrame) # Don't send these frames to the OAK camera @@ -339,7 +340,7 @@ def sendFrames(self, cb=None) -> bool: def createQueues(self, device: dai.Device): """ Creates input queue for each enabled stream - + Args: device (dai.Device): Device to which we will stream frames """ @@ -402,7 +403,7 @@ def _createImgFrame(self, stream: ReplayStream) -> dai.ImgFrame: def _readFrames(self) -> bool: """ Reads frames from all Readers. - + Returns: bool: True if successful, otherwise False. """ @@ -430,6 +431,6 @@ def close(self): Closes all video readers. """ self._stop = True + self.reader.close() if self.thread: self.thread.join() - self.reader.close() diff --git a/depthai_sdk/src/depthai_sdk/trigger_action/trigger_action.py b/depthai_sdk/src/depthai_sdk/trigger_action/trigger_action.py index dfcd29c27..a9db8afc8 100644 --- a/depthai_sdk/src/depthai_sdk/trigger_action/trigger_action.py +++ b/depthai_sdk/src/depthai_sdk/trigger_action/trigger_action.py @@ -27,10 +27,7 @@ def __init__(self, trigger: Trigger, action: Union[Action, Callable]): if isinstance(action, Action) and action.inputs: SequenceNumSync.__init__(self, len(action.inputs)) - def new_packet_trigger(self, - packet: FramePacket, - _=None # visualizer seems redundant here - ) -> None: + def new_packet_trigger(self, packet: FramePacket) -> None: """ This method is called when a new packet is received from the trigger input stream. @@ -44,10 +41,7 @@ def new_packet_trigger(self, self.last_trigger_time = trigger_time self.action.activate() - def new_packet_action(self, - packet: FramePacket, - _=None # visualizer seems redundant here - ) -> None: + def new_packet_action(self, packet: FramePacket) -> None: """ This method is called when a new packet is received from the action input streams. Primary purpose of this method is to provide a way to keep a track of the packets. diff --git a/depthai_sdk/src/depthai_sdk/types.py b/depthai_sdk/src/depthai_sdk/types.py index 53c4114e0..4c257febf 100644 --- a/depthai_sdk/src/depthai_sdk/types.py +++ b/depthai_sdk/src/depthai_sdk/types.py @@ -2,6 +2,8 @@ import depthai as dai +from depthai_sdk.classes.packets import SemanticSegmentationPacket, ImgLandmarksPacket, NnOutputPacket, DetectionPacket + GenericNeuralNetwork = Union[ dai.node.NeuralNetwork, dai.node.MobileNetDetectionNetwork, @@ -9,3 +11,24 @@ dai.node.YoloDetectionNetwork, dai.node.YoloSpatialDetectionNetwork ] + +XoutNNOutputPacket = Union[ + NnOutputPacket, + DetectionPacket, + ImgLandmarksPacket, + SemanticSegmentationPacket +] + +Resolution = Union[ + str, + dai.ColorCameraProperties.SensorResolution, + dai.MonoCameraProperties.SensorResolution +] + +NNNode = Union[ + dai.node.NeuralNetwork, + dai.node.MobileNetDetectionNetwork, + dai.node.MobileNetSpatialDetectionNetwork, + dai.node.YoloDetectionNetwork, + dai.node.YoloSpatialDetectionNetwork +] diff --git a/depthai_sdk/src/depthai_sdk/utils.py b/depthai_sdk/src/depthai_sdk/utils.py index 516642637..58ae3bc43 100644 --- a/depthai_sdk/src/depthai_sdk/utils.py +++ b/depthai_sdk/src/depthai_sdk/utils.py @@ -2,6 +2,8 @@ import json import logging import sys +import tempfile +import traceback import urllib.request from pathlib import Path from typing import Dict, List, Tuple, Optional, Union, Any @@ -38,23 +40,6 @@ def getLocalRecording(recording: str) -> Optional[Path]: return None -def configPipeline(pipeline: dai.Pipeline, - xlinkChunk: Optional[int] = None, - calib: Optional[dai.CalibrationHandler] = None, - tuningBlob: Optional[str] = None, - openvinoVersion: Union[None, str, dai.OpenVINO.Version] = None - ) -> None: - if xlinkChunk: - pipeline.setXLinkChunkSize(xlinkChunk) - if calib: - pipeline.setCalibrationData(calib) - if tuningBlob: - pipeline.setCameraTuningBlobPath(tuningBlob) - if openvinoVersion: - # pipeline.setOpenVINOVersion(parseOpenVinoVersion(openvinoVersion)) - pass - - def getAvailableRecordings() -> Dict[str, Tuple[List[str], int]]: """ Get available (online) depthai-recordings. Returns list of available recordings and it's size @@ -415,16 +400,16 @@ def _create_cache_folder() -> bool: return True -def _create_config() -> None: +def _create_config() -> Optional[dict]: """ - Create config file in user's home directory. + Create config file in user's home directory. If config file already exists, check if sentry_dsn is correct. Returns: - None. + dict: Config file content. """ if not _create_cache_folder(): logging.debug('Failed to create config file.') - return + return None config_file = Path.home().joinpath('.depthai_sdk', 'config.json') default_config = { @@ -433,6 +418,13 @@ def _create_config() -> None: } if not config_file.exists(): config_file.write_text(json.dumps(default_config)) + else: + content = json.loads(config_file.read_text()) + if content['sentry_dsn'] != default_config['sentry_dsn']: + content['sentry_dsn'] = default_config['sentry_dsn'] + config_file.write_text(json.dumps(content)) + + return json.loads(config_file.read_text()) def set_sentry_status(status: bool = True) -> None: @@ -447,8 +439,7 @@ def set_sentry_status(status: bool = True) -> None: """ # check if config exists config_file = Path.home().joinpath('.depthai_sdk', 'config.json') - if not config_file.exists(): - _create_config() + _create_config() # read config config = json.loads(config_file.read_text()) @@ -464,13 +455,8 @@ def get_config_field(key: str) -> Any: bool: True if sentry is enabled, False otherwise. """ # check if config exists - config_file = Path.home().joinpath('.depthai_sdk', 'config.json') - if not config_file.exists(): - raise FileNotFoundError('Config file not found.') - - # read config - config = json.loads(config_file.read_text()) - return config[key] + config_file = _create_config() + return config_file.get(key, None) def report_crash_dump(device: dai.Device) -> None: @@ -487,11 +473,32 @@ def report_crash_dump(device: dai.Device) -> None: device_id = crash_dump.deviceId crash_dump_json = crash_dump.serializeToJson() - path = f'/tmp/crash_{commit_hash}_{device_id}.json' - with open(path, 'w') as f: - json.dump(crash_dump_json, f) - from sentry_sdk import capture_exception, configure_scope - with configure_scope() as scope: - scope.add_attachment(content_type='application/json', path=path) - capture_exception(CrashDumpException()) + # Save crash dump to a temporary file + with tempfile.TemporaryDirectory() as temp_dir: + path = Path(temp_dir) / f'crash_{commit_hash}_{device_id}.json' + with open(path, 'w') as f: + json.dump(crash_dump_json, f) + + with configure_scope() as scope: + logging.info('Reporting crash dump to sentry.') + scope.add_attachment(content_type='application/json', path=str(path)) + capture_exception(CrashDumpException()) + + +def _sentry_before_send(event, hint): + if 'exc_info' in hint: + exc_type, exc_value, tb = hint['exc_info'] + tb_info = traceback.extract_tb(tb) + + if isinstance(exc_value, (KeyboardInterrupt, SystemExit)): + return None + + # Loop through the traceback to check for any frame that originated in your module + for tbi in tb_info: + # Assuming your module files have the pattern "my_module_*", you can do: + if 'depthai_sdk' in tbi.filename: + return event # if the error originated in your module, send it + + # If none of the frames came from your module, or there's no exception info, don't send the event + return None diff --git a/depthai_sdk/src/depthai_sdk/visualize/bbox.py b/depthai_sdk/src/depthai_sdk/visualize/bbox.py index 67cf99a76..db2152ef2 100644 --- a/depthai_sdk/src/depthai_sdk/visualize/bbox.py +++ b/depthai_sdk/src/depthai_sdk/visualize/bbox.py @@ -1,13 +1,17 @@ -from typing import Optional, Tuple, Union, Sequence +from typing import Optional, Tuple, Union, Sequence, List + +import depthai as dai import numpy as np + from depthai_sdk.classes.enum import ResizeMode -import depthai as dai + class Point: """ Used within the BoundingBox class when dealing with points. """ - def __init__(self, x: float, y:float): + + def __init__(self, x: float, y: float): self.x = x self.y = y @@ -15,7 +19,7 @@ def __str__(self): return f"({self.x}, {self.y})" def to_tuple(self) -> Tuple[float, float]: - return (self.x, self.y) + return self.x, self.y def denormalize(self, frame_shape: Sequence) -> Tuple[int, int]: """ @@ -29,7 +33,8 @@ class BoundingBox: This class helps with bounding box calculations. It can be used to calculate relative bounding boxes, map points from relative to absolute coordinates and vice versa, crop frames, etc. """ - def __init__(self, bbox: Union[None, np.ndarray, Tuple[float, float, float, float], dai.ImgDetection] = None): + + def __init__(self, bbox: Union[None, List, np.ndarray, Tuple[float, float, float, float], dai.ImgDetection] = None): if isinstance(bbox, (Sequence, np.ndarray)): self.xmin, self.ymin, self.xmax, self.ymax = bbox elif isinstance(bbox, dai.ImgDetection): @@ -41,6 +46,29 @@ def __init__(self, bbox: Union[None, np.ndarray, Tuple[float, float, float, floa def __str__(self): return f"({self.xmin}, {self.ymin}), ({self.xmax}, {self.ymax})" + def clip(self, min_value=0.0, max_value=1.0) -> 'BoundingBox': + """ + Clips the bounding box to the given range. + """ + return BoundingBox([ + np.clip(self.xmin, min_value, max_value), + np.clip(self.ymin, min_value, max_value), + np.clip(self.xmax, min_value, max_value), + np.clip(self.ymax, min_value, max_value), + ]) + + def top_left(self) -> Tuple[float, float]: + """ + Get top-left corner of the bounding box. + """ + return self.xmin, self.ymin + + def bottom_right(self) -> Tuple[float, float]: + """ + Get bottom-right corner of the bounding box. + """ + return self.xmax, self.ymax + def to_tuple(self, frame_shape: Union[Sequence, None] = None) -> Tuple: """ Get bounding box coordinates as a tuple (xmin, ymin, xmax, ymax). @@ -136,9 +164,9 @@ def crop_frame(self, frame: np.ndarray) -> np.ndarray: return frame[left:right, top:bottom] def resize_to_aspect_ratio(self, - old_aspect_ratio: Union[float, Sequence], - new_aspect_ratio: Union[float, Sequence], - resize_mode: Union[ResizeMode, str] = ResizeMode.LETTERBOX) -> 'BoundingBox': + old_aspect_ratio: Union[float, Sequence], + new_aspect_ratio: Union[float, Sequence], + resize_mode: Union[ResizeMode, str] = ResizeMode.LETTERBOX) -> 'BoundingBox': """ Calculates a new BoundingBox, based on the current BoundingBox, but with a different aspect ratio. Example: If the original frame is 1920x1080, and we have a model with input size of 300x300, diff --git a/depthai_sdk/src/depthai_sdk/visualize/colors.py b/depthai_sdk/src/depthai_sdk/visualize/colors.py index 8c9cfe70a..64aec06fa 100644 --- a/depthai_sdk/src/depthai_sdk/visualize/colors.py +++ b/depthai_sdk/src/depthai_sdk/visualize/colors.py @@ -1,8 +1,8 @@ -import numpy as np -import math import colorsys +import math from typing import Tuple + def generate_colors(number_of_colors: int, pastel=0.5): colors = [] @@ -25,12 +25,13 @@ def generate_colors(number_of_colors: int, pastel=0.5): # Return only the first `number_of_colors` colors return colors[:number_of_colors] -def get_text_color(background_color: Tuple[int,int,int], threshold=0.6): + +def get_text_color(background_color: Tuple[int, int, int], threshold=0.6): """ Determines whether black or white text will be more legible against a given background color. Args: - background_color_bgr: The BGR color that the text will be displayed on. + background_color: The BGR color that the text will be displayed on. threshold: Float between 0 and 1. A threshold closer to 1 results in the function choosing white text more often. Returns: @@ -41,3 +42,11 @@ def get_text_color(background_color: Tuple[int,int,int], threshold=0.6): return (0, 0, 0) # BGR for black else: return (255, 255, 255) # BGR for white + + +def hex_to_bgr(hex: str) -> Tuple[int, ...]: + """ + "#ff1f00" (red) => (0, 31, 255) + """ + value = hex.lstrip('#') + return tuple(int(value[i:i + 2], 16) for i in (4, 2, 0)) diff --git a/depthai_sdk/src/depthai_sdk/visualize/configs.py b/depthai_sdk/src/depthai_sdk/visualize/configs.py index 8ee2b43d6..972663cc0 100644 --- a/depthai_sdk/src/depthai_sdk/visualize/configs.py +++ b/depthai_sdk/src/depthai_sdk/visualize/configs.py @@ -1,12 +1,8 @@ from dataclasses import dataclass, field from enum import IntEnum from typing import Tuple, Optional -import numpy as np -try: - import cv2 -except ImportError: - cv2 = None +import numpy as np class TextPosition(IntEnum): @@ -51,11 +47,53 @@ class OutputConfig: class StereoConfig: colorize: StereoColor = StereoColor.RGB # cv2.COLORMAP_JET. This was hardcoded, as we want to have an array, because we later invert it / invalidate values 0 - colormap: np.ndarray = field(default_factory=lambda: np.array([[[128,0,0]],[[132,0,0]],[[136,0,0]],[[140,0,0]],[[144,0,0]],[[148,0,0]],[[152,0,0]],[[156,0,0]],[[160,0,0]],[[164,0,0]],[[168,0,0]],[[172,0,0]],[[176,0,0]],[[180,0,0]],[[184,0,0]],[[188,0,0]],[[192,0,0]],[[196,0,0]],[[200,0,0]],[[204,0,0]],[[208,0,0]],[[212,0,0]],[[216,0,0]],[[220,0,0]],[[224,0,0]],[[228,0,0]],[[232,0,0]],[[236,0,0]],[[240,0,0]],[[244,0,0]],[[248,0,0]],[[252,0,0]],[[255,0,0]],[[255,4,0]],[[255,8,0]],[[255,12,0]],[[255,16,0]],[[255,20,0]],[[255,24,0]],[[255,28,0]],[[255,32,0]],[[255,36,0]],[[255,40,0]],[[255,44,0]],[[255,48,0]],[[255,52,0]],[[255,56,0]],[[255,60,0]],[[255,64,0]],[[255,68,0]],[[255,72,0]],[[255,76,0]],[[255,80,0]],[[255,84,0]],[[255,88,0]],[[255,92,0]],[[255,96,0]],[[255,100,0]],[[255,104,0]],[[255,108,0]],[[255,112,0]],[[255,116,0]],[[255,120,0]],[[255,124,0]],[[255,128,0]],[[255,132,0]],[[255,136,0]],[[255,140,0]],[[255,144,0]],[[255,148,0]],[[255,152,0]],[[255,156,0]],[[255,160,0]],[[255,164,0]],[[255,168,0]],[[255,172,0]],[[255,176,0]],[[255,180,0]],[[255,184,0]],[[255,188,0]],[[255,192,0]],[[255,196,0]],[[255,200,0]],[[255,204,0]],[[255,208,0]],[[255,212,0]],[[255,216,0]],[[255,220,0]],[[255,224,0]],[[255,228,0]],[[255,232,0]],[[255,236,0]],[[255,240,0]],[[255,244,0]],[[255,248,0]],[[255,252,0]],[[254,255,2]],[[250,255,6]],[[246,255,10]],[[242,255,14]],[[238,255,18]],[[234,255,22]],[[230,255,26]],[[226,255,30]],[[222,255,34]],[[218,255,38]],[[214,255,42]],[[210,255,46]],[[206,255,50]],[[202,255,54]],[[198,255,58]],[[194,255,62]],[[190,255,66]],[[186,255,70]],[[182,255,74]],[[178,255,78]],[[174,255,82]],[[170,255,86]],[[166,255,90]],[[162,255,94]],[[158,255,98]],[[154,255,102]],[[150,255,106]],[[146,255,110]],[[142,255,114]],[[138,255,118]],[[134,255,122]],[[130,255,126]],[[126,255,130]],[[122,255,134]],[[118,255,138]],[[114,255,142]],[[110,255,146]],[[106,255,150]],[[102,255,154]],[[98,255,158]],[[94,255,162]],[[90,255,166]],[[86,255,170]],[[82,255,174]],[[78,255,178]],[[74,255,182]],[[70,255,186]],[[66,255,190]],[[62,255,194]],[[58,255,198]],[[54,255,202]],[[50,255,206]],[[46,255,210]],[[42,255,214]],[[38,255,218]],[[34,255,222]],[[30,255,226]],[[26,255,230]],[[22,255,234]],[[18,255,238]],[[14,255,242]],[[10,255,246]],[[6,255,250]],[[1,255,254]],[[0,252,255]],[[0,248,255]],[[0,244,255]],[[0,240,255]],[[0,236,255]],[[0,232,255]],[[0,228,255]],[[0,224,255]],[[0,220,255]],[[0,216,255]],[[0,212,255]],[[0,208,255]],[[0,204,255]],[[0,200,255]],[[0,196,255]],[[0,192,255]],[[0,188,255]],[[0,184,255]],[[0,180,255]],[[0,176,255]],[[0,172,255]],[[0,168,255]],[[0,164,255]],[[0,160,255]],[[0,156,255]],[[0,152,255]],[[0,148,255]],[[0,144,255]],[[0,140,255]],[[0,136,255]],[[0,132,255]],[[0,128,255]],[[0,124,255]],[[0,120,255]],[[0,116,255]],[[0,112,255]],[[0,108,255]],[[0,104,255]],[[0,100,255]],[[0,96,255]],[[0,92,255]],[[0,88,255]],[[0,84,255]],[[0,80,255]],[[0,76,255]],[[0,72,255]],[[0,68,255]],[[0,64,255]],[[0,60,255]],[[0,56,255]],[[0,52,255]],[[0,48,255]],[[0,44,255]],[[0,40,255]],[[0,36,255]],[[0,32,255]],[[0,28,255]],[[0,24,255]],[[0,20,255]],[[0,16,255]],[[0,12,255]],[[0,8,255]],[[0,4,255]],[[0,0,255]],[[0,0,252]],[[0,0,248]],[[0,0,244]],[[0,0,240]],[[0,0,236]],[[0,0,232]],[[0,0,228]],[[0,0,224]],[[0,0,220]],[[0,0,216]],[[0,0,212]],[[0,0,208]],[[0,0,204]],[[0,0,200]],[[0,0,196]],[[0,0,192]],[[0,0,188]],[[0,0,184]],[[0,0,180]],[[0,0,176]],[[0,0,172]],[[0,0,168]],[[0,0,164]],[[0,0,160]],[[0,0,156]],[[0,0,152]],[[0,0,148]],[[0,0,144]],[[0,0,140]],[[0,0,136]],[[0,0,132]],[[0,0,128]]], dtype=np.uint8)) + colormap: np.ndarray = field(default_factory=lambda: np.array( + [[[128, 0, 0]], [[132, 0, 0]], [[136, 0, 0]], [[140, 0, 0]], [[144, 0, 0]], [[148, 0, 0]], [[152, 0, 0]], + [[156, 0, 0]], [[160, 0, 0]], [[164, 0, 0]], [[168, 0, 0]], [[172, 0, 0]], [[176, 0, 0]], [[180, 0, 0]], + [[184, 0, 0]], [[188, 0, 0]], [[192, 0, 0]], [[196, 0, 0]], [[200, 0, 0]], [[204, 0, 0]], [[208, 0, 0]], + [[212, 0, 0]], [[216, 0, 0]], [[220, 0, 0]], [[224, 0, 0]], [[228, 0, 0]], [[232, 0, 0]], [[236, 0, 0]], + [[240, 0, 0]], [[244, 0, 0]], [[248, 0, 0]], [[252, 0, 0]], [[255, 0, 0]], [[255, 4, 0]], [[255, 8, 0]], + [[255, 12, 0]], [[255, 16, 0]], [[255, 20, 0]], [[255, 24, 0]], [[255, 28, 0]], [[255, 32, 0]], [[255, 36, 0]], + [[255, 40, 0]], [[255, 44, 0]], [[255, 48, 0]], [[255, 52, 0]], [[255, 56, 0]], [[255, 60, 0]], [[255, 64, 0]], + [[255, 68, 0]], [[255, 72, 0]], [[255, 76, 0]], [[255, 80, 0]], [[255, 84, 0]], [[255, 88, 0]], [[255, 92, 0]], + [[255, 96, 0]], [[255, 100, 0]], [[255, 104, 0]], [[255, 108, 0]], [[255, 112, 0]], [[255, 116, 0]], + [[255, 120, 0]], [[255, 124, 0]], [[255, 128, 0]], [[255, 132, 0]], [[255, 136, 0]], [[255, 140, 0]], + [[255, 144, 0]], [[255, 148, 0]], [[255, 152, 0]], [[255, 156, 0]], [[255, 160, 0]], [[255, 164, 0]], + [[255, 168, 0]], [[255, 172, 0]], [[255, 176, 0]], [[255, 180, 0]], [[255, 184, 0]], [[255, 188, 0]], + [[255, 192, 0]], [[255, 196, 0]], [[255, 200, 0]], [[255, 204, 0]], [[255, 208, 0]], [[255, 212, 0]], + [[255, 216, 0]], [[255, 220, 0]], [[255, 224, 0]], [[255, 228, 0]], [[255, 232, 0]], [[255, 236, 0]], + [[255, 240, 0]], [[255, 244, 0]], [[255, 248, 0]], [[255, 252, 0]], [[254, 255, 2]], [[250, 255, 6]], + [[246, 255, 10]], [[242, 255, 14]], [[238, 255, 18]], [[234, 255, 22]], [[230, 255, 26]], [[226, 255, 30]], + [[222, 255, 34]], [[218, 255, 38]], [[214, 255, 42]], [[210, 255, 46]], [[206, 255, 50]], [[202, 255, 54]], + [[198, 255, 58]], [[194, 255, 62]], [[190, 255, 66]], [[186, 255, 70]], [[182, 255, 74]], [[178, 255, 78]], + [[174, 255, 82]], [[170, 255, 86]], [[166, 255, 90]], [[162, 255, 94]], [[158, 255, 98]], [[154, 255, 102]], + [[150, 255, 106]], [[146, 255, 110]], [[142, 255, 114]], [[138, 255, 118]], [[134, 255, 122]], + [[130, 255, 126]], [[126, 255, 130]], [[122, 255, 134]], [[118, 255, 138]], [[114, 255, 142]], + [[110, 255, 146]], [[106, 255, 150]], [[102, 255, 154]], [[98, 255, 158]], [[94, 255, 162]], [[90, 255, 166]], + [[86, 255, 170]], [[82, 255, 174]], [[78, 255, 178]], [[74, 255, 182]], [[70, 255, 186]], [[66, 255, 190]], + [[62, 255, 194]], [[58, 255, 198]], [[54, 255, 202]], [[50, 255, 206]], [[46, 255, 210]], [[42, 255, 214]], + [[38, 255, 218]], [[34, 255, 222]], [[30, 255, 226]], [[26, 255, 230]], [[22, 255, 234]], [[18, 255, 238]], + [[14, 255, 242]], [[10, 255, 246]], [[6, 255, 250]], [[1, 255, 254]], [[0, 252, 255]], [[0, 248, 255]], + [[0, 244, 255]], [[0, 240, 255]], [[0, 236, 255]], [[0, 232, 255]], [[0, 228, 255]], [[0, 224, 255]], + [[0, 220, 255]], [[0, 216, 255]], [[0, 212, 255]], [[0, 208, 255]], [[0, 204, 255]], [[0, 200, 255]], + [[0, 196, 255]], [[0, 192, 255]], [[0, 188, 255]], [[0, 184, 255]], [[0, 180, 255]], [[0, 176, 255]], + [[0, 172, 255]], [[0, 168, 255]], [[0, 164, 255]], [[0, 160, 255]], [[0, 156, 255]], [[0, 152, 255]], + [[0, 148, 255]], [[0, 144, 255]], [[0, 140, 255]], [[0, 136, 255]], [[0, 132, 255]], [[0, 128, 255]], + [[0, 124, 255]], [[0, 120, 255]], [[0, 116, 255]], [[0, 112, 255]], [[0, 108, 255]], [[0, 104, 255]], + [[0, 100, 255]], [[0, 96, 255]], [[0, 92, 255]], [[0, 88, 255]], [[0, 84, 255]], [[0, 80, 255]], + [[0, 76, 255]], [[0, 72, 255]], [[0, 68, 255]], [[0, 64, 255]], [[0, 60, 255]], [[0, 56, 255]], [[0, 52, 255]], + [[0, 48, 255]], [[0, 44, 255]], [[0, 40, 255]], [[0, 36, 255]], [[0, 32, 255]], [[0, 28, 255]], [[0, 24, 255]], + [[0, 20, 255]], [[0, 16, 255]], [[0, 12, 255]], [[0, 8, 255]], [[0, 4, 255]], [[0, 0, 255]], [[0, 0, 252]], + [[0, 0, 248]], [[0, 0, 244]], [[0, 0, 240]], [[0, 0, 236]], [[0, 0, 232]], [[0, 0, 228]], [[0, 0, 224]], + [[0, 0, 220]], [[0, 0, 216]], [[0, 0, 212]], [[0, 0, 208]], [[0, 0, 204]], [[0, 0, 200]], [[0, 0, 196]], + [[0, 0, 192]], [[0, 0, 188]], [[0, 0, 184]], [[0, 0, 180]], [[0, 0, 176]], [[0, 0, 172]], [[0, 0, 168]], + [[0, 0, 164]], [[0, 0, 160]], [[0, 0, 156]], [[0, 0, 152]], [[0, 0, 148]], [[0, 0, 144]], [[0, 0, 140]], + [[0, 0, 136]], [[0, 0, 132]], [[0, 0, 128]]], dtype=np.uint8)) wls_filter: bool = False wls_lambda: float = 8000 wls_sigma: float = 1.5 + @dataclass class DetectionConfig: """Configuration for drawing bounding boxes.""" @@ -94,7 +132,7 @@ class TextConfig: @dataclass class TrackingConfig: """Configuration for drawing tracking bounding boxes.""" - max_length: int = -1 + max_length: int = 500 deletion_lost_threshold: int = 5 line_thickness: int = 1 fading_tails: bool = False diff --git a/depthai_sdk/src/depthai_sdk/visualize/objects.py b/depthai_sdk/src/depthai_sdk/visualize/objects.py index 8fe4991ee..e7d19704b 100644 --- a/depthai_sdk/src/depthai_sdk/visualize/objects.py +++ b/depthai_sdk/src/depthai_sdk/visualize/objects.py @@ -1,12 +1,9 @@ import logging +import math from abc import ABC, abstractmethod from collections import defaultdict -from typing import Tuple, List, Union, Optional, Sequence - -try: - import cv2 -except ImportError: - cv2 = None +from types import SimpleNamespace +from typing import Tuple, List, Union import depthai as dai import numpy as np @@ -14,7 +11,14 @@ from depthai_sdk.visualize.bbox import BoundingBox from depthai_sdk.visualize.configs import VisConfig, BboxStyle, TextPosition -from depthai_sdk.visualize.visualizer_helper import spatials_text + + +def spatials_text(spatials: dai.Point3f): + return SimpleNamespace( + x="X: " + ("{:.1f}m".format(spatials.x / 1000) if not math.isnan(spatials.x) else "--"), + y="Y: " + ("{:.1f}m".format(spatials.y / 1000) if not math.isnan(spatials.y) else "--"), + z="Z: " + ("{:.1f}m".format(spatials.z / 1000) if not math.isnan(spatials.z) else "--"), + ) class GenericObject(ABC): @@ -53,21 +57,6 @@ def set_frame_shape(self, frame_shape: Tuple[int, ...]) -> 'GenericObject': self.frame_shape = frame_shape return self - @abstractmethod - def draw(self, frame: np.ndarray) -> None: - """ - Draw the object on the frame. - - Args: - frame: frame to draw on. - """ - raise NotImplementedError - - def draw_children(self, frame: np.ndarray) -> None: - for child in self.children: - child.draw(frame) - - @abstractmethod def prepare(self) -> 'GenericObject': """ Prepare necessary data for drawing. @@ -75,7 +64,7 @@ def prepare(self) -> 'GenericObject': Returns: self """ - raise NotImplementedError + return self @abstractmethod def serialize(self) -> dict: @@ -107,132 +96,6 @@ def children(self) -> List['GenericObject']: """ return self._children - def draw_bbox(self, - img: np.ndarray, - pt1: Tuple[int, int], - pt2: Tuple[int, int], - color: Tuple[int, int, int], - thickness: int, - r: int, - line_width: int, - line_height: int - ) -> None: - """ - Draw a rounded rectangle on the image (in-place). - - Args: - img: Image to draw on. - pt1: Top-left corner of the rectangle. - pt2: Bottom-right corner of the rectangle. - color: Rectangle color. - thickness: Rectangle line thickness. - r: Radius of the rounded corners. - line_width: Width of the rectangle line. - line_height: Height of the rectangle line. - """ - x1, y1 = pt1 - x2, y2 = pt2 - - if line_width == 0: - line_width = np.abs(x2 - x1) - line_width -= 2 * r if r > 0 else 0 # Adjust for rounded corners - - if line_height == 0: - line_height = np.abs(y2 - y1) - line_height -= 2 * r if r > 0 else 0 # Adjust for rounded corners - - # Top left - cv2.line(img, (x1 + r, y1), (x1 + r + line_width, y1), color, thickness) - cv2.line(img, (x1, y1 + r), (x1, y1 + r + line_height), color, thickness) - cv2.ellipse(img, (x1 + r, y1 + r), (r, r), 180, 0, 90, color, thickness) - - # Top right - cv2.line(img, (x2 - r, y1), (x2 - r - line_width, y1), color, thickness) - cv2.line(img, (x2, y1 + r), (x2, y1 + r + line_height), color, thickness) - cv2.ellipse(img, (x2 - r, y1 + r), (r, r), 270, 0, 90, color, thickness) - - # Bottom left - cv2.line(img, (x1 + r, y2), (x1 + r + line_width, y2), color, thickness) - cv2.line(img, (x1, y2 - r), (x1, y2 - r - line_height), color, thickness) - cv2.ellipse(img, (x1 + r, y2 - r), (r, r), 90, 0, 90, color, thickness) - - # Bottom right - cv2.line(img, (x2 - r, y2), (x2 - r - line_width, y2), color, thickness) - cv2.line(img, (x2, y2 - r), (x2, y2 - r - line_height), color, thickness) - cv2.ellipse(img, (x2 - r, y2 - r), (r, r), 0, 0, 90, color, thickness) - - # Fill the area - alpha = self.config.detection.fill_transparency - if alpha > 0: - overlay = img.copy() - - thickness = -1 - bbox = (pt1[0], pt1[1], pt2[0], pt2[1]) - - top_left = (bbox[0], bbox[1]) - bottom_right = (bbox[2], bbox[3]) - top_right = (bottom_right[0], top_left[1]) - bottom_left = (top_left[0], bottom_right[1]) - - top_left_main_rect = (int(top_left[0] + r), int(top_left[1])) - bottom_right_main_rect = (int(bottom_right[0] - r), int(bottom_right[1])) - - top_left_rect_left = (top_left[0], top_left[1] + r) - bottom_right_rect_left = (bottom_left[0] + r, bottom_left[1] - r) - - top_left_rect_right = (top_right[0] - r, top_right[1] + r) - bottom_right_rect_right = (bottom_right[0], bottom_right[1] - r) - - all_rects = [ - [top_left_main_rect, bottom_right_main_rect], - [top_left_rect_left, bottom_right_rect_left], - [top_left_rect_right, bottom_right_rect_right] - ] - - [cv2.rectangle(overlay, pt1=rect[0], pt2=rect[1], color=color, thickness=thickness) for rect in all_rects] - - cv2.ellipse(overlay, (top_left[0] + r, top_left[1] + r), (r, r), 180.0, 0, 90, color, thickness) - cv2.ellipse(overlay, (top_right[0] - r, top_right[1] + r), (r, r), 270.0, 0, 90, color, thickness) - cv2.ellipse(overlay, (bottom_right[0] - r, bottom_right[1] - r), (r, r), 0.0, 0, 90, color, thickness) - cv2.ellipse(overlay, (bottom_left[0] + r, bottom_left[1] - r), (r, r), 90.0, 0, 90, color, thickness) - - cv2.addWeighted(overlay, alpha, img, 1 - alpha, 0, img) - - def draw_stylized_bbox(self, - img: np.ndarray, - pt1: Tuple[int, int], - pt2: Tuple[int, int], - color: Tuple[int, int, int], - thickness: int, - bbox_style: BboxStyle = None - ) -> None: - """ - Draw a stylized bounding box. The style is either passed as an argument or defined in the config. - - Args: - img: Image to draw on. - pt1: Top left corner. - pt2: Bottom right corner. - color: Boundary color. - thickness: Border thickness. - bbox_style: Bounding box style. - """ - box_w = pt2[0] - pt1[0] - box_h = pt2[1] - pt1[1] - line_width = int(box_w * self.config.detection.line_width) // 2 - line_height = int(box_h * self.config.detection.line_height) // 2 - roundness = int(self.config.detection.box_roundness) - bbox_style = bbox_style or self.config.detection.bbox_style - - if bbox_style == BboxStyle.RECTANGLE: - self.draw_bbox(img, pt1, pt2, color, thickness, 0, line_width=0, line_height=0) - elif bbox_style == BboxStyle.CORNERS: - self.draw_bbox(img, pt1, pt2, color, thickness, 0, line_width=line_width, line_height=line_height) - elif bbox_style == BboxStyle.ROUNDED_RECTANGLE: - self.draw_bbox(img, pt1, pt2, color, thickness, roundness, line_width=0, line_height=0) - elif bbox_style == BboxStyle.ROUNDED_CORNERS: - self.draw_bbox(img, pt1, pt2, color, thickness, roundness, line_width=line_width, line_height=line_height) - class VisImage(GenericObject): def __init__(self, image: np.ndarray, frame_shape: Tuple[int, ...]): @@ -255,7 +118,7 @@ class VisBoundingBox(GenericObject): """ def __init__(self, - bbox: Union[np.ndarray, Tuple[int, ...]], + bbox: BoundingBox, label: str, color: Tuple[int, int, int], thickness: int, @@ -267,9 +130,6 @@ def __init__(self, self.thickness = thickness self.bbox_style = bbox_style - def draw(self, frame: np.ndarray) -> None: - self.draw_stylized_bbox(frame, self.bbox[0:2], self.bbox[2:4], self.color, self.thickness, self.bbox_style) - def prepare(self) -> 'GenericObject': return self @@ -326,7 +186,7 @@ def __init__(self, self.labels = [] self.colors = [] - try: # Check if the detections are of type _TrackingDetection + try: # Check if the detections are of type TrackingDetection self.detections = [t.srcImgDetection for t in self.detections] except AttributeError: pass @@ -373,8 +233,7 @@ def prepare(self) -> 'VisDetections': # Get normalized bounding box normalized_bbox = self.normalizer.get_relative_bbox(BoundingBox(detection)) if len(self.frame_shape) < 2: - logging.debug('Visualizer: skipping detection because frame shape is invalid: {}' - .format(self.frame_shape)) + logging.debug(f'Visualizer: skipping detection because frame shape is invalid: {self.frame_shape}') return self # TODO can normalize accept frame shape? @@ -421,24 +280,6 @@ def get_detections(self) -> List[Tuple[np.ndarray, str, Tuple[int, int, int]]]: """ return list(zip(self.bboxes, self.labels, self.colors)) - def draw(self, frame: np.ndarray) -> None: - if self.frame_shape is None: - self.frame_shape = frame.shape - - for bbox, _, color in self.get_detections(): - tl, br = bbox.denormalize(frame.shape) - # Draw bounding box - self.draw_stylized_bbox( - img=frame, - pt1=tl, - pt2=br, - color=color, - thickness=self.config.detection.thickness - ) - - for child in self.children: - child.draw(frame) - class VisText(GenericObject): """ @@ -503,145 +344,6 @@ def serialize(self): 'background_transparency': self.background_transparency } - def prepare(self) -> 'VisText': - # TODO: in the future, we can stop support for passing pixel-space bbox to the - # visualizer. - if isinstance(self.bbox, (Sequence, np.ndarray)): - # Convert to BoundingBox. Divide by self.frame_shape and load into the BoundingBox - self.bbox = list(self.bbox) - self.bbox[0] /= self.frame_shape[1] - self.bbox[1] /= self.frame_shape[0] - self.bbox[2] /= self.frame_shape[1] - self.bbox[3] /= self.frame_shape[0] - self.bbox = BoundingBox(self.bbox) - self.coords = self.coords or self.get_relative_position(bbox=self.bbox, - position=self.position, - padding=self.padding) - return self - - def draw(self, frame: np.ndarray) -> None: - if self.frame_shape is None: - self.frame_shape = frame.shape - - text_config = self.config.text - - # Extract shape of the bbox if exists - if self.bbox is not None: - tl, br = self.bbox.denormalize(frame.shape) - shape = br[0] - tl[0], br[1] - tl[1] - else: - shape = frame.shape[:2] - - font_scale = self.size or text_config.font_scale - if self.size is None and text_config.auto_scale: - font_scale = self.get_text_scale(shape, self.bbox) - - # Calculate font thickness - font_thickness = max(1, int(font_scale * 2)) \ - if text_config.auto_scale else self.thickness or text_config.font_thickness - - dx, dy = cv2.getTextSize(self.text, text_config.font_face, font_scale, font_thickness)[0] - dy += 10 - - for line in self.text.splitlines(): - y = self.coords[1] - - background_color = self.background_color or text_config.background_color - background_transparency = self.background_transparency or text_config.background_transparency - if background_color is not None: - img_with_background = cv2.rectangle(img=frame.copy(), - pt1=(self.coords[0], y - dy), - pt2=(self.coords[0] + dx, y + 10), - color=background_color, - thickness=-1) - # take transparency into account - cv2.addWeighted(src1=img_with_background, - alpha=background_transparency, - src2=frame, - beta=1 - background_transparency, - gamma=0, - dst=frame) - - if self.outline: - # Background - cv2.putText(img=frame, - text=line, - org=self.coords, - fontFace=text_config.font_face, - fontScale=font_scale, - color=text_config.outline_color, - thickness=font_thickness + 1, - lineType=text_config.line_type) - - # Front text - cv2.putText(img=frame, - text=line, - org=self.coords, - fontFace=text_config.font_face, - fontScale=font_scale, - color=self.color or text_config.font_color, - thickness=font_thickness, - lineType=text_config.line_type) - - self.coords = (self.coords[0], y + dy) - - def get_relative_position(self, - bbox: BoundingBox, - position: TextPosition, - padding: int - ) -> Tuple[int, int]: - """ - Get relative position of the text w.r.t. the bounding box. - If bbox is None,the position is relative to the frame. - """ - if bbox is None: - bbox = BoundingBox() - text_config = self.config.text - - tl, br = bbox.denormalize(self.frame_shape) - shape = br[0] - tl[0], br[1] - tl[1] - - bbox_arr = bbox.to_tuple(self.frame_shape) - - font_scale = self.size or text_config.font_scale - if self.size is None and text_config.auto_scale: - font_scale = self.get_text_scale(shape, bbox_arr) - - text_width, text_height = 0, 0 - for text in self.text.splitlines(): - text_size = cv2.getTextSize(text=text, - fontFace=text_config.font_face, - fontScale=font_scale, - thickness=text_config.font_thickness)[0] - text_width = max(text_width, text_size[0]) - text_height += text_size[1] - - x, y = bbox_arr[0], bbox_arr[1] - - y_pos = position.value % 10 - if y_pos == 0: # Y top - y = bbox_arr[1] + text_height + padding - elif y_pos == 1: # Y mid - y = (bbox_arr[1] + bbox_arr[3]) // 2 + text_height // 2 - elif y_pos == 2: # Y bottom - y = bbox_arr[3] - text_height - padding - - x_pos = position.value // 10 - if x_pos == 0: # X Left - x = bbox_arr[0] + padding - elif x_pos == 1: # X mid - x = (bbox_arr[0] + bbox_arr[2]) // 2 - text_width // 2 - elif x_pos == 2: # X right - x = bbox_arr[2] - text_width - padding - - return x, y - - def get_text_scale(self, - frame_shape: Union[np.ndarray, Tuple[int, ...]], - bbox: Optional[BoundingBox] = None - ) -> float: - return min(1.0, min(frame_shape) / (1000 if bbox is None else 200)) - class VisTrail(GenericObject): """ @@ -730,12 +432,6 @@ def get_rect_centroid(rect: dai.Rect, w, h) -> Tuple[int, int]: """ return int(w * (rect.x + rect.width) // 2), int(h * (rect.y + rect.height) // 2) - def draw(self, frame: np.ndarray) -> None: - if self.frame_shape is None: - self.frame_shape = frame.shape - - self.draw_children(frame) - class VisLine(GenericObject): """ @@ -777,17 +473,6 @@ def serialize(self): def prepare(self) -> 'VisLine': return self - def draw(self, frame: np.ndarray) -> None: - if self.frame_shape is None: - self.frame_shape = frame.shape - - tracking_config = self.config.tracking - cv2.line(frame, - self.pt1, self.pt2, - self.color or tracking_config.line_color, - self.thickness or tracking_config.line_thickness, - tracking_config.line_type) - class VisCircle(GenericObject): def __init__(self, @@ -824,18 +509,6 @@ def serialize(self): return parent - def draw(self, frame: np.ndarray) -> None: - if self.frame_shape is None: - self.frame_shape = frame.shape - - circle_config = self.config.circle - cv2.circle(frame, - self.coords, - self.radius, - self.color or circle_config.color, - self.thickness or circle_config.thickness, - circle_config.line_type) - class VisMask(GenericObject): def __init__(self, mask: np.ndarray, alpha: float = None): @@ -857,12 +530,6 @@ def serialize(self): return parent - def draw(self, frame: np.ndarray) -> None: - if self.frame_shape is None: - self.frame_shape = frame.shape - - cv2.addWeighted(frame, 1 - self.alpha, self.mask, self.alpha, 0, frame) - class VisPolygon(GenericObject): def __init__(self, polygon): diff --git a/depthai_sdk/src/depthai_sdk/visualize/visualizer.py b/depthai_sdk/src/depthai_sdk/visualize/visualizer.py index 67d214f69..18c5e1948 100644 --- a/depthai_sdk/src/depthai_sdk/visualize/visualizer.py +++ b/depthai_sdk/src/depthai_sdk/visualize/visualizer.py @@ -1,44 +1,39 @@ import json -import os from dataclasses import replace -from enum import Enum from typing import List, Tuple, Optional, Union, Any, Dict -try: - import cv2 -except ImportError: - cv2 = None - import depthai as dai import numpy as np from depthai import ImgDetection +from depthai_sdk.fps import FPSHandler from depthai_sdk.visualize.bbox import BoundingBox from depthai_sdk.visualize.configs import VisConfig, TextPosition, BboxStyle, StereoColor from depthai_sdk.visualize.encoder import JSONEncoder from depthai_sdk.visualize.objects import VisDetections, GenericObject, VisText, VisTrail, VisCircle, VisLine, VisMask, \ VisBoundingBox -from depthai_sdk.visualize.visualizer_helper import VisualizerHelper -class Platform(Enum): - """ - Platform on which the visualizer is running. - """ - ROBOTHUB = 'robothub' - PC = 'pc' +class VisualzierFps: + def __init__(self): + self.fps_list: Dict[str, FPSHandler] = {} + def get_fps(self, name: str) -> float: + if name not in self.fps_list: + self.fps_list[name] = FPSHandler() -class Visualizer(VisualizerHelper): - # Constants - IS_INTERACTIVE = 'DISPLAY' in os.environ or os.name == 'nt' + self.fps_list[name].nextIter() + return self.fps_list[name].fps() + +class Visualizer: + # Constants def __init__(self, scale: float = None, fps: bool = False): - self.platform: Platform = self._detect_platform() self.objects: List[GenericObject] = [] self._frame_shape: Optional[Tuple[int, ...]] = None self.config = VisConfig() + self.fps = VisualzierFps() if fps: self.output(show_fps=fps) @@ -59,7 +54,7 @@ def add_object(self, obj: GenericObject) -> 'Visualizer': return self def add_bbox(self, - bbox: Union[np.ndarray, Tuple[int, ...]], + bbox: BoundingBox, color: Tuple[int, int, int] = None, thickness: int = None, bbox_style: BboxStyle = None, @@ -135,7 +130,7 @@ def add_text(self, outline: bool = True, background_color: Tuple[int, int, int] = None, background_transparency: float = 0.5, - bbox: Union[np.ndarray, Tuple[int, ...], BoundingBox] = None, + bbox: Union[np.ndarray, Tuple, BoundingBox] = None, position: TextPosition = TextPosition.TOP_LEFT, padding: int = 10) -> 'Visualizer': """ @@ -157,6 +152,9 @@ def add_text(self, Returns: self """ + if isinstance(bbox, Tuple) and type(bbox[0]) == float: + bbox = BoundingBox(bbox) + text_overlay = VisText(text=text, coords=coords, size=size, @@ -258,7 +256,7 @@ def add_mask(self, mask: np.ndarray, alpha: float): self.add_object(mask_overlay) return self - def draw(self, frame: np.ndarray) -> Optional[np.ndarray]: + def drawn(self, frame: np.ndarray) -> Optional[np.ndarray]: """ Draw all objects on the frame if the platform is PC. Otherwise, serialize the objects and communicate with the RobotHub application. @@ -269,20 +267,13 @@ def draw(self, frame: np.ndarray) -> Optional[np.ndarray]: Returns: np.ndarray if the platform is PC, None otherwise. """ - # Draw overlays - for obj in self.objects: - obj.draw(frame) - - # Resize frame if needed - img_scale = self.config.output.img_scale - if img_scale: - if isinstance(img_scale, Tuple): - frame = cv2.resize(frame, img_scale) - elif isinstance(img_scale, float) and img_scale != 1.0: - frame = cv2.resize(frame, dsize=None, fx=img_scale, fy=img_scale) + raise NotImplementedError('Visualizers that inherit from Visualizer must implement draw() method!') - self.reset() - return frame + def show(self, packet): + """ + Show the packet on the screen. + """ + pass def serialize(self, force_reset: bool = True) -> str: """ @@ -295,7 +286,6 @@ def serialize(self, force_reset: bool = True) -> str: Stringified JSON. """ parent = { - 'platform': self.platform.value, 'frame_shape': self.frame_shape, 'config': self.config, 'objects': [obj.serialize() for obj in self.objects] @@ -458,15 +448,6 @@ def segmentation(self, return self - def _detect_platform(self) -> Platform: - """ - Detect the platform on which the visualizer is running. - - Returns: - Platform - """ - return Platform.PC if self.IS_INTERACTIVE else Platform.ROBOTHUB - @property def frame_shape(self) -> Tuple[int, ...]: return self._frame_shape @@ -481,3 +462,6 @@ def _process_kwargs(kwargs: Dict[str, Any]) -> Dict[str, Any]: kwargs.pop('self') kwargs = {k: v for k, v in kwargs.items() if v is not None} return kwargs + + def close(self): + pass diff --git a/depthai_sdk/src/depthai_sdk/visualize/visualizer_helper.py b/depthai_sdk/src/depthai_sdk/visualize/visualizer_helper.py index 398e3c9f7..f7602fce7 100644 --- a/depthai_sdk/src/depthai_sdk/visualize/visualizer_helper.py +++ b/depthai_sdk/src/depthai_sdk/visualize/visualizer_helper.py @@ -1,8 +1,10 @@ -import math from enum import IntEnum -from types import SimpleNamespace from typing import Tuple, Union, List, Any, Dict +from depthai_sdk.classes.nn_results import TrackingDetection, TwoStageDetection +from depthai_sdk.visualize.configs import BboxStyle +from depthai_sdk.visualize.objects import VisBoundingBox + try: import cv2 except ImportError: @@ -13,13 +15,12 @@ from depthai_sdk.classes.packets import ( DetectionPacket, - _TwoStageDetection, SpatialBbMappingPacket, TrackerPacket, - _TrackingDetection ) from depthai_sdk.visualize.bbox import BoundingBox + class FramePosition(IntEnum): """ Where on frame do we want to print text. @@ -238,6 +239,7 @@ def rectangle(src, return src + def draw_mappings(packet: SpatialBbMappingPacket): dets = packet.spatials.detections for det in dets: @@ -254,15 +256,7 @@ def draw_mappings(packet: SpatialBbMappingPacket): cv2.rectangle(packet.frame, (x_min, y_min), (x_max, y_max), VisualizerHelper.front_color, 1) -def spatials_text(spatials: dai.Point3f): - return SimpleNamespace( - x="X: " + ("{:.1f}m".format(spatials.x / 1000) if not math.isnan(spatials.x) else "--"), - y="Y: " + ("{:.1f}m".format(spatials.y / 1000) if not math.isnan(spatials.y) else "--"), - z="Z: " + ("{:.1f}m".format(spatials.z / 1000) if not math.isnan(spatials.z) else "--"), - ) - - -def draw_detections(packet: Union[DetectionPacket, _TwoStageDetection, TrackerPacket], +def draw_detections(packet: Union[DetectionPacket, TwoStageDetection, TrackerPacket], norm: BoundingBox, label_map: List[Tuple[str, Tuple]] = None): """ @@ -312,7 +306,7 @@ def draw_tracklet_id(packet: TrackerPacket): def draw_breadcrumb_trail(packets: List[TrackerPacket]): packet = packets[-1] # Current packet - dict_: Dict[str, List[_TrackingDetection]] = {} + dict_: Dict[str, List[TrackingDetection]] = {} valid_ids = [t.id for t in packet.daiTracklets.tracklets] for idx in valid_ids: dict_[str(idx)] = [] @@ -369,27 +363,149 @@ def depth_to_disp_factor(device: dai.Device, stereo: dai.node.StereoDepth) -> fl @param device: OAK device """ calib = device.readCalibration() - cam1=calib.getStereoLeftCameraId() - cam2=calib.getStereoRightCameraId() + cam1 = calib.getStereoLeftCameraId() + cam2 = calib.getStereoRightCameraId() baseline = calib.getBaselineDistance(cam1=cam1, cam2=cam2, useSpecTranslation=True) * 10 # cm to mm - rawConf = stereo.initialConfig.get() + raw_conf = stereo.initialConfig.get() align: dai.CameraBoardSocket = stereo.properties.depthAlignCamera if align == dai.CameraBoardSocket.AUTO: align = cam2 intrinsics = calib.getCameraIntrinsics(align) - focalLength = intrinsics[0][0] + focal_length = intrinsics[0][0] - factor = baseline * focalLength - if rawConf.algorithmControl.enableExtended: + factor = baseline * focal_length + if raw_conf.algorithmControl.enableExtended: factor /= 2 return factor -def hex_to_bgr(hex: str) -> Tuple[int, ...]: + +def draw_bbox(img: np.ndarray, + pt1: Tuple[int, int], + pt2: Tuple[int, int], + color: Tuple[int, int, int], + thickness: int, + r: int, + line_width: int, + line_height: int, + alpha: float + ) -> None: + """ + Draw a rounded rectangle on the image (in-place). + + Args: + img: Image to draw on. + pt1: Top-left corner of the rectangle. + pt2: Bottom-right corner of the rectangle. + color: Rectangle color. + thickness: Rectangle line thickness. + r: Radius of the rounded corners. + line_width: Width of the rectangle line. + line_height: Height of the rectangle line. + alpha: Opacity of the rectangle. + """ + x1, y1 = pt1 + x2, y2 = pt2 + + if line_width == 0: + line_width = np.abs(x2 - x1) + line_width -= 2 * r if r > 0 else 0 # Adjust for rounded corners + + if line_height == 0: + line_height = np.abs(y2 - y1) + line_height -= 2 * r if r > 0 else 0 # Adjust for rounded corners + + # Top left + cv2.line(img, (x1 + r, y1), (x1 + r + line_width, y1), color, thickness) + cv2.line(img, (x1, y1 + r), (x1, y1 + r + line_height), color, thickness) + cv2.ellipse(img, (x1 + r, y1 + r), (r, r), 180, 0, 90, color, thickness) + + # Top right + cv2.line(img, (x2 - r, y1), (x2 - r - line_width, y1), color, thickness) + cv2.line(img, (x2, y1 + r), (x2, y1 + r + line_height), color, thickness) + cv2.ellipse(img, (x2 - r, y1 + r), (r, r), 270, 0, 90, color, thickness) + + # Bottom left + cv2.line(img, (x1 + r, y2), (x1 + r + line_width, y2), color, thickness) + cv2.line(img, (x1, y2 - r), (x1, y2 - r - line_height), color, thickness) + cv2.ellipse(img, (x1 + r, y2 - r), (r, r), 90, 0, 90, color, thickness) + + # Bottom right + cv2.line(img, (x2 - r, y2), (x2 - r - line_width, y2), color, thickness) + cv2.line(img, (x2, y2 - r), (x2, y2 - r - line_height), color, thickness) + cv2.ellipse(img, (x2 - r, y2 - r), (r, r), 0, 0, 90, color, thickness) + + # Fill the area + if 0 < alpha: + overlay = img.copy() + + thickness = -1 + bbox = (pt1[0], pt1[1], pt2[0], pt2[1]) + + top_left = (bbox[0], bbox[1]) + bottom_right = (bbox[2], bbox[3]) + top_right = (bottom_right[0], top_left[1]) + bottom_left = (top_left[0], bottom_right[1]) + + top_left_main_rect = (int(top_left[0] + r), int(top_left[1])) + bottom_right_main_rect = (int(bottom_right[0] - r), int(bottom_right[1])) + + top_left_rect_left = (top_left[0], top_left[1] + r) + bottom_right_rect_left = (bottom_left[0] + r, bottom_left[1] - r) + + top_left_rect_right = (top_right[0] - r, top_right[1] + r) + bottom_right_rect_right = (bottom_right[0], bottom_right[1] - r) + + all_rects = [ + [top_left_main_rect, bottom_right_main_rect], + [top_left_rect_left, bottom_right_rect_left], + [top_left_rect_right, bottom_right_rect_right] + ] + + [cv2.rectangle(overlay, pt1=rect[0], pt2=rect[1], color=color, thickness=thickness) for rect in all_rects] + + cv2.ellipse(overlay, (top_left[0] + r, top_left[1] + r), (r, r), 180.0, 0, 90, color, thickness) + cv2.ellipse(overlay, (top_right[0] - r, top_right[1] + r), (r, r), 270.0, 0, 90, color, thickness) + cv2.ellipse(overlay, (bottom_right[0] - r, bottom_right[1] - r), (r, r), 0.0, 0, 90, color, thickness) + cv2.ellipse(overlay, (bottom_left[0] + r, bottom_left[1] - r), (r, r), 90.0, 0, 90, color, thickness) + + cv2.addWeighted(overlay, alpha, img, 1 - alpha, 0, img) + + +def draw_stylized_bbox(img: np.ndarray, obj: VisBoundingBox) -> None: """ - "#ff1f00" (red) => (0, 31, 255) + Draw a stylized bounding box. The style is either passed as an argument or defined in the config. + + Args: + img: Image to draw on. + obj: Bounding box to draw. """ - value = hex.lstrip('#') - return tuple(int(value[i:i + 2], 16) for i in (4, 2, 0)) + pt1, pt2 = obj.bbox.denormalize(img.shape) + + box_w = pt2[0] - pt1[0] + box_h = pt2[1] - pt1[1] + + line_width = int(box_w * obj.config.detection.line_width) // 2 + line_height = int(box_h * obj.config.detection.line_height) // 2 + roundness = int(obj.config.detection.box_roundness) + bbox_style = obj.bbox_style or obj.config.detection.bbox_style + alpha = obj.config.detection.fill_transparency + + if bbox_style == BboxStyle.RECTANGLE: + draw_bbox(img, pt1, pt2, + obj.color, obj.thickness, 0, + line_width=0, line_height=0, alpha=alpha) + elif bbox_style == BboxStyle.CORNERS: + draw_bbox(img, pt1, pt2, + obj.color, obj.thickness, 0, + line_width=line_width, line_height=line_height, alpha=alpha) + elif bbox_style == BboxStyle.ROUNDED_RECTANGLE: + draw_bbox(img, pt1, pt2, + obj.color, obj.thickness, roundness, + line_width=0, line_height=0, alpha=alpha) + elif bbox_style == BboxStyle.ROUNDED_CORNERS: + draw_bbox(img, pt1, pt2, + obj.color, obj.thickness, roundness, + line_width=line_width, line_height=line_height, alpha=alpha) diff --git a/depthai_sdk/src/depthai_sdk/visualize/visualizers/opencv_text.py b/depthai_sdk/src/depthai_sdk/visualize/visualizers/opencv_text.py new file mode 100644 index 000000000..32e76da86 --- /dev/null +++ b/depthai_sdk/src/depthai_sdk/visualize/visualizers/opencv_text.py @@ -0,0 +1,132 @@ +from typing import Optional, Sequence, Tuple, Union + +import cv2 +import numpy as np + +from depthai_sdk.visualize.bbox import BoundingBox +from depthai_sdk.visualize.configs import VisConfig +from depthai_sdk.visualize.objects import VisText + + +class OpenCvTextVis: + def __init__(self, text: VisText, config: VisConfig): + self.text = text + self.config = config + + def draw_text(self, frame: np.ndarray): + obj = self.text + + self.prepare(frame.shape) + + text_config = self.config.text + + # Extract shape of the bbox if exists + if obj.bbox is not None: + # shape = self.bbox[2] - self.bbox[0], self.bbox[3] - self.bbox[1] + tl, br = obj.bbox.denormalize(frame.shape) + shape = br[0] - tl[0], br[1] - tl[1] + else: + shape = frame.shape[:2] + + font_scale = obj.size or text_config.font_scale + if obj.size is None and text_config.auto_scale: + font_scale = self.get_text_scale(shape, obj.bbox) + + # Calculate font thickness + font_thickness = max(1, int(font_scale * 2)) \ + if text_config.auto_scale else obj.thickness or text_config.font_thickness + + dy = cv2.getTextSize(obj.text, text_config.font_face, font_scale, font_thickness)[0][1] + 10 + + for line in obj.text.splitlines(): + y = obj.coords[1] + + if obj.outline: + # Background + cv2.putText(img=frame, + text=line, + org=obj.coords, + fontFace=text_config.font_face, + fontScale=font_scale, + color=text_config.background_color, + thickness=font_thickness + 1, + lineType=text_config.line_type) + + # Front text + cv2.putText(img=frame, + text=line, + org=obj.coords, + fontFace=text_config.font_face, + fontScale=font_scale, + color=obj.color or text_config.font_color, + thickness=font_thickness, + lineType=text_config.line_type) + + obj.coords = (obj.coords[0], y + dy) + + def get_relative_position(self, obj: VisText, frame_shape) -> Tuple[int, int]: + """ + Get relative position of the text w.r.t. the bounding box. + If bbox is None,the position is relative to the frame. + """ + if obj.bbox is None: + obj.bbox = BoundingBox() + text_config = self.config.text + + tl, br = obj.bbox.denormalize(frame_shape) + shape = br[0] - tl[0], br[1] - tl[1] + + bbox_arr = obj.bbox.to_tuple(frame_shape) + + font_scale = obj.size or text_config.font_scale + if obj.size is None and text_config.auto_scale: + self.get_text_scale(shape, bbox_arr) + + text_width, text_height = 0, 0 + for text in obj.text.splitlines(): + text_size = cv2.getTextSize(text=text, + fontFace=text_config.font_face, + fontScale=font_scale, + thickness=text_config.font_thickness)[0] + text_width = max(text_width, text_size[0]) + text_height += text_size[1] + + x, y = bbox_arr[0], bbox_arr[1] + + y_pos = obj.position.value % 10 + if y_pos == 0: # Y top + y = bbox_arr[1] + text_height + obj.padding + elif y_pos == 1: # Y mid + y = (bbox_arr[1] + bbox_arr[3]) // 2 + text_height // 2 + elif y_pos == 2: # Y bottom + y = bbox_arr[3] - text_height - obj.padding + + x_pos = obj.position.value // 10 + if x_pos == 0: # X Left + x = bbox_arr[0] + obj.padding + elif x_pos == 1: # X mid + x = (bbox_arr[0] + bbox_arr[2]) // 2 - text_width // 2 + elif x_pos == 2: # X right + x = bbox_arr[2] - text_width - obj.padding + + return x, y + + def prepare(self, frame_shape): + # TODO: in the future, we can stop support for passing pixel-space bbox to the + # visualizer. + if isinstance(self.text.bbox, (Sequence, np.ndarray)) and type(self.text.bbox[0]) == int: + # Convert to BoundingBox. Divide by self.frame_shape and load into the BoundingBox + bbox = list(self.text.bbox) + bbox[0] /= frame_shape[1] + bbox[1] /= frame_shape[0] + bbox[2] /= frame_shape[1] + bbox[3] /= frame_shape[0] + self.text.bbox = BoundingBox(bbox) + + self.text.coords = self.text.coords or self.get_relative_position(self.text, frame_shape) + + def get_text_scale(self, + frame_shape: Union[np.ndarray, Tuple[int, ...]], + bbox: Optional[BoundingBox] = None + ) -> float: + return min(1.0, min(frame_shape) / (1000 if bbox is None else 200)) diff --git a/depthai_sdk/src/depthai_sdk/visualize/visualizers/opencv_visualizer.py b/depthai_sdk/src/depthai_sdk/visualize/visualizers/opencv_visualizer.py new file mode 100644 index 000000000..ca204a39b --- /dev/null +++ b/depthai_sdk/src/depthai_sdk/visualize/visualizers/opencv_visualizer.py @@ -0,0 +1,102 @@ +import logging +from typing import Optional + +import cv2 +import numpy as np + +from depthai_sdk.classes.packets import DisparityPacket, FramePacket +from depthai_sdk.visualize.configs import TextPosition +from depthai_sdk.visualize.objects import ( + VisBoundingBox, + VisCircle, + VisDetections, + VisLine, + VisMask, + VisText, + VisTrail, +) +from depthai_sdk.visualize.visualizer import Visualizer +from depthai_sdk.visualize.visualizer_helper import draw_stylized_bbox, draw_bbox +from depthai_sdk.visualize.visualizers.opencv_text import OpenCvTextVis + + +class OpenCvVisualizer(Visualizer): + def draw(self, frame: np.ndarray) -> Optional[np.ndarray]: + """ + Draw all objects on the frame if the platform is PC. Otherwise, serialize the objects + and communicate with the RobotHub application. + + Args: + frame: The frame to draw on. + + Returns: + np.ndarray if the platform is PC, None otherwise. + """ + # Draw overlays + for obj in self.objects: + if type(obj) == VisBoundingBox: + draw_stylized_bbox(frame, obj=obj) + elif type(obj) == VisDetections: + for bbox, _, color in obj.get_detections(): + tl, br = bbox.denormalize(frame.shape) + draw_bbox( + img=frame, + pt1=tl, + pt2=br, + color=color, + thickness=self.config.detection.thickness, + r=self.config.detection.radius, + line_width=self.config.detection.line_width, + line_height=self.config.detection.line_height, + alpha=self.config.detection.alpha, + ) + elif type(obj) == VisText: + OpenCvTextVis(obj, self.config).draw_text(frame) + elif type(obj) == VisTrail: + obj = obj.prepare() + # Children: VisLine + self.objects.extend(obj.children) + elif type(obj) == VisLine: + cv2.line(frame, + obj.pt1, obj.pt2, + obj.color or self.config.tracking.line_color, + obj.thickness or self.config.tracking.line_thickness, + self.config.tracking.line_type) + elif type(obj) == VisCircle: + circle_config = self.config.circle + cv2.circle(frame, + obj.coords, + obj.radius, + obj.color or circle_config.color, + obj.thickness or circle_config.thickness, + circle_config.line_type) + elif type(obj) == VisMask: + cv2.addWeighted(frame, 1 - obj.alpha, obj.mask, obj.alpha, 0, frame) + + self.reset() + return frame + + def show(self, packet) -> None: + if self.config.output.show_fps: + fps = self.fps.get_fps(packet.name) + self.add_text(text=f'FPS: {fps:.1f}', position=TextPosition.TOP_LEFT) + + if isinstance(packet, DisparityPacket): + frame = packet.get_colorized_frame(self) + elif isinstance(packet, FramePacket): + frame = packet.decode() + else: + logging.warning(f'Unknown packet type: {type(packet)}') + return + + if frame is not None: + drawn_frame = self.draw(frame) + if self.config.output.img_scale: + drawn_frame = cv2.resize(drawn_frame, + None, + fx=self.config.output.img_scale, + fy=self.config.output.img_scale) + cv2.imshow(packet.name, drawn_frame) + + def close(self): + cv2.destroyAllWindows() diff --git a/depthai_sdk/src/depthai_sdk/visualize/visualizers/viewer_visualizer.py b/depthai_sdk/src/depthai_sdk/visualize/visualizers/viewer_visualizer.py new file mode 100644 index 000000000..90f788005 --- /dev/null +++ b/depthai_sdk/src/depthai_sdk/visualize/visualizers/viewer_visualizer.py @@ -0,0 +1,111 @@ +import logging +import subprocess +import sys + +import depthai_viewer as viewer +import numpy as np +from depthai_viewer.components.rect2d import RectFormat + +from depthai_sdk.classes.packets import FramePacket, IMUPacket, PointcloudPacket +from depthai_sdk.visualize.objects import ( + VisBoundingBox, + VisCircle, + VisDetections, + VisLine, + VisMask, + VisText, + VisTrail, +) +from depthai_sdk.visualize.visualizer import Visualizer + + +class DepthaiViewerVisualizer(Visualizer): + """ + Visualizer for Depthai Viewer (https://github.com/luxonis/depthai-viewer) + """ + + def __init__(self, scale, fps): + super().__init__(scale, fps) + + try: + # timeout is optional, but it might be good to prevent the script from hanging if the module is large. + process = subprocess.Popen([sys.executable, "-m", "depthai_viewer"], stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + stdout, stderr = process.communicate(timeout=3) + + if process.returncode != 0: + err_msg = stderr.decode("utf-8") + if 'Failed to bind TCP address' in err_msg: + # Already running + pass + elif 'No module named depthai_viewer' in err_msg: + raise Exception(f"DepthAI Viewer is not installed. " + f"Please run '{sys.executable} -m pip install depthai_viewer' to install it.") + else: + logging.exception(f"Error occurred while trying to run depthai_viewer: {err_msg}") + else: + print("depthai_viewer ran successfully.") + except subprocess.TimeoutExpired: + # Installed and running depthai_viewer successfully + pass + except subprocess.CalledProcessError as e: + print(f"An error occurred while trying to run 'depthai_viewer': {str(e)}") + + viewer.init("Depthai Viewer") + viewer.connect() + + def show(self, packet) -> None: + if isinstance(packet, FramePacket): + bgr_frame = packet.decode() + rgb_frame = bgr_frame[..., ::-1] + frame = np.dstack((rgb_frame, np.full(bgr_frame.shape[:2], 255, dtype=np.uint8))) + viewer.log_image(packet.name, frame) + + if type(packet) == IMUPacket: + viewer.log_imu(*packet.get_imu_vals()) + elif type(packet) == PointcloudPacket: + if packet.colorize_frame is not None: + bgr_frame = packet.colorize_frame + rgb_frame = bgr_frame[..., ::-1] + frame = np.dstack((rgb_frame, np.full(bgr_frame.shape[:2], 255, dtype=np.uint8))) + viewer.log_image(f'color', frame) + viewer.log_points(packet.name, packet.points.reshape(-1, 3) / 1000, colors=rgb_frame.reshape(-1, 3)) + else: + viewer.log_points(packet.name, packet.points.reshape(-1, 3) / 1000) + + vis_bbs = [] + for i, obj in enumerate(self.objects): + if type(obj) == VisBoundingBox: + vis_bbs.append(obj) + elif type(obj) == VisDetections: + pass + elif type(obj) == VisText: + pass + elif type(obj) == VisTrail: + pass + elif type(obj) == VisLine: + pass + elif type(obj) == VisCircle: + pass + elif type(obj) == VisMask: + pass + + if 0 < len(vis_bbs): + rects = [vis_bb.bbox.clip().denormalize(frame.shape) for vis_bb in vis_bbs] + # Convert from (pt1,pt2) to [x1,y1,x2,y2] + rects = [np.array([*rect[0], *rect[1]]) for rect in rects] + # BGR to RGB + colors = [np.array(vis_bb.color)[..., ::-1] for vis_bb in vis_bbs] + labels = [vis_bb.label for vis_bb in vis_bbs] + print(rects) + viewer.log_rects( + f"{packet.name}/Detections", + rects=rects, + rect_format=RectFormat.XYXY, + colors=colors, + labels=labels + ) + self.reset() + + def close(self): + pass diff --git a/resources/nn/yolo-v3-tf/yolo-v3-tf.json b/resources/nn/yolo-v3-tf/yolo-v3-tf.json index fc3950010..7d7eb8996 100644 --- a/resources/nn/yolo-v3-tf/yolo-v3-tf.json +++ b/resources/nn/yolo-v3-tf/yolo-v3-tf.json @@ -5,11 +5,11 @@ "NN_family" : "YOLO", "input_size": "416x416", "NN_specific_metadata" : - { + { "classes" : 80, "coordinates" : 4, "anchors" : [10.0, 13.0, 16.0, 30.0, 33.0, 23.0, 30.0, 61.0, 62.0, 45.0, 59.0, 119.0,116.0, 90.0, 156.0,198.0,373.0, 326.0], - "anchor_masks" : + "anchor_masks" : { "side52" : [0,1,2], "side26" : [3,4,5], @@ -102,7 +102,7 @@ "scissors", "teddy bear", "hair drier", - "toothbrush" + "toothbrush" ] } }