diff --git a/depthai_sdk/docs/source/conf.py b/depthai_sdk/docs/source/conf.py
index 518fea8f7..be6d4bf19 100644
--- a/depthai_sdk/docs/source/conf.py
+++ b/depthai_sdk/docs/source/conf.py
@@ -22,7 +22,7 @@
 author = 'Luxonis'
 
 # The full version, including alpha/beta/rc tags
-release = '1.12.1'
+release = '1.13.0'
 
 
 # -- General configuration ---------------------------------------------------
diff --git a/depthai_sdk/docs/source/features/ai_models.rst b/depthai_sdk/docs/source/features/ai_models.rst
index a14f4b44c..02affd2e6 100644
--- a/depthai_sdk/docs/source/features/ai_models.rst
+++ b/depthai_sdk/docs/source/features/ai_models.rst
@@ -53,6 +53,15 @@ The following table lists all the models supported by the SDK. The model name is
    * - ``face-detection-retail-0004``
      - `OMZ <https://docs.openvino.ai/2022.1/omz_models_model_face_detection_retail_0004.html>`__
      - 33
+   * - ``facemesh_192x192``
+     - `DMZ <https://github.com/luxonis/depthai-model-zoo/tree/main/models/facemesh_192x192>`__
+     - 32
+   * - ``facial_landmarks_68_160x160``
+     - 32
+     - `DMZ <https://github.com/luxonis/depthai-model-zoo/blob/main/models/facial_landmarks_68_160x160>`__
+   * - ``human-pose-estimation-0001``
+     - `OMZ <https://docs.openvino.ai/2022.1/omz_models_model_human_pose_estimation_0001.html>`__
+     - 8
    * - ``mobilenet-ssd``
      - `OMZ <https://docs.openvino.ai/2022.1/omz_models_model_mobilenet_ssd.html>`__
      - 31
@@ -60,34 +69,37 @@ The following table lists all the models supported by the SDK. The model name is
      - `DMZ <https://github.com/luxonis/depthai-model-zoo/tree/main/models/mobilenetv2_imagenet_embedder_224x224>`__
      - /
    * - ``pedestrian-detection-adas-0002``
-     - `OMZ <https://docs.openvino.ai/latest/omz_models_model_pedestrian_detection_adas_0002.html>`__
+     - `OMZ <https://docs.openvino.ai/2022.1/omz_models_model_pedestrian_detection_adas_0002.html>`__
      - 19
    * - ``person-detection-0200``
-     - `OMZ <https://docs.openvino.ai/latest/omz_models_model_person_detection_0200.html>`__
+     - `OMZ <https://docs.openvino.ai/2022.1/omz_models_model_person_detection_0200.html>`__
      - 14
    * - ``person-detection-retail-0013``
-     - `OMZ <https://docs.openvino.ai/latest/omz_models_model_person_detection_retail_0013.html>`__
+     - `OMZ <https://docs.openvino.ai/2022.1/omz_models_model_person_detection_retail_0013.html>`__
      - 15
    * - ``person-reidentification-retail-0288``
-     - `OMZ <https://docs.openvino.ai/cn/latest/omz_models_model_person_reidentification_retail_0288.html>`__
+     - `OMZ <https://docs.openvino.ai/cn/2021.4/omz_models_model_person_reidentification_retail_0288.html>`__
      - 33
    * - ``person-vehicle-bike-detection-crossroad-1016``
-     - `OMZ <https://docs.openvino.ai/latest/omz_models_model_person_vehicle_bike_detection_crossroad_1016.html>`__
+     - `OMZ <https://docs.openvino.ai/2022.1/omz_models_model_person_vehicle_bike_detection_crossroad_1016.html>`__
      - 12
+   * - ``sbd_mask_classification_224x224``
+     - `DMZ <https://github.com/luxonis/depthai-model-zoo/tree/main/models/sbd_mask_classification_224x224>`__
+     - 64+
    * - ``vehicle-detection-0202``
-     - `OMZ <https://docs.openvino.ai/latest/omz_models_model_vehicle_detection_0202.html>`__
+     - `OMZ <https://docs.openvino.ai/2022.1/omz_models_model_vehicle_detection_0202.html>`__
      - 14
    * - ``vehicle-detection-adas-0002``
-     - `OMZ <https://docs.openvino.ai/latest/omz_models_model_vehicle_detection_adas_0002.html>`__
+     - `OMZ <https://docs.openvino.ai/2022.1/omz_models_model_vehicle_detection_adas_0002.html>`__
      - 14
    * - ``vehicle-license-plate-detection-barrier-0106``
-     - `OMZ <https://docs.openvino.ai/latest/omz_models_model_vehicle_license_plate_detection_barrier_0106.html>`__
+     - `OMZ <https://docs.openvino.ai/2022.1/omz_models_model_vehicle_license_plate_detection_barrier_0106.html>`__
      - 29
    * - ``yolo-v3-tf``
-     - `OMZ <https://docs.openvino.ai/latest/omz_models_model_yolo_v3_tf.html>`__
+     - `OMZ <https://docs.openvino.ai/2022.1/omz_models_model_yolo_v3_tf.html>`__
      - 3.5
    * - ``yolo-v3-tiny-tf``
-     - `OMZ <https://docs.openvino.ai/latest/omz_models_model_yolo_v3_tiny_tf.html>`__
+     - `OMZ <https://docs.openvino.ai/2022.1/omz_models_model_yolo_v3_tiny_tf.html>`__
      - 33
    * - ``yolov4_coco_608x608``
      - `DMZ <https://github.com/luxonis/depthai-model-zoo/tree/main/models/yolov4_coco_608x608>`__
diff --git a/depthai_sdk/docs/source/fundamentals/visualizer.rst b/depthai_sdk/docs/source/fundamentals/visualizer.rst
index aa5ce408b..cbecd1a12 100644
--- a/depthai_sdk/docs/source/fundamentals/visualizer.rst
+++ b/depthai_sdk/docs/source/fundamentals/visualizer.rst
@@ -200,10 +200,6 @@ Visualizer
     :members:
     :undoc-members:
 
-.. autoclass:: depthai_sdk.visualize.visualizer.Platform
-    :members:
-    :undoc-members:
-
 Objects
 -------
 
diff --git a/depthai_sdk/docs/source/oak-camera.rst b/depthai_sdk/docs/source/oak-camera.rst
index a06a36b00..60d327c3b 100644
--- a/depthai_sdk/docs/source/oak-camera.rst
+++ b/depthai_sdk/docs/source/oak-camera.rst
@@ -17,8 +17,8 @@ Interoperability with DepthAI API
 ---------------------------------
 
 DepthAI SDK was developed with `DepthAI API <https://docs.luxonis.com/projects/api/en/latest/>`__ interoperability in mind.
-Users can access all depthai API nodes inside components, and after ``oak.build()`` also the `dai.Pipeline <https://docs.luxonis.com/projects/api/en/latest/components/pipeline/>`__
-and `dai.Device <https://docs.luxonis.com/projects/api/en/latest/components/device/>`__ objects.
+Users can access all depthai API nodes inside components, along with the `dai.Pipeline <https://docs.luxonis.com/projects/api/en/latest/components/pipeline/>`__ (``oak.pipeline``)
+and `dai.Device <https://docs.luxonis.com/projects/api/en/latest/components/device/>`__ (``oak.device``) objects.
 
 .. literalinclude:: ../../examples/mixed/api_interop.py
    :language: python
diff --git a/depthai_sdk/docs/source/tutorials/code_samples.rst b/depthai_sdk/docs/source/tutorials/code_samples.rst
index 1364dc2d4..03f82e918 100644
--- a/depthai_sdk/docs/source/tutorials/code_samples.rst
+++ b/depthai_sdk/docs/source/tutorials/code_samples.rst
@@ -47,8 +47,7 @@ are presented with code.
 .. rubric:: NN
 - :ref:`Age-Gender Inference` - Demonstrates age-gender inference
 - :ref:`Custom Decode Function` - Demonstrates custom decoding function
-- :ref:`Deeplabv3 Person Segmentation` - Demonstrates Deeplabv3 person segmentation 
-- :ref:`Emotion Recognition` - Demonstrates emotion recognition 
+- :ref:`Emotion Recognition` - Demonstrates emotion recognition
 - :ref:`Face Detection RGB` - Run face detection on RGB camera
 - :ref:`Face Detection Mono` - Run face detection on mono camera
 - :ref:`Human Pose Estimation` - Run human pose estimation inference
diff --git a/depthai_sdk/docs/source/visualizer_formats/example.json b/depthai_sdk/docs/source/visualizer_formats/example.json
index 56f270aad..8914791c3 100644
--- a/depthai_sdk/docs/source/visualizer_formats/example.json
+++ b/depthai_sdk/docs/source/visualizer_formats/example.json
@@ -1,5 +1,4 @@
 {
-  "platform": "pc",
   "frame_shape": [720, 1280],
   "config": {
     "output": {
diff --git a/depthai_sdk/docs/source/visualizer_formats/format.json b/depthai_sdk/docs/source/visualizer_formats/format.json
index 10d826ad7..5cc0c548a 100644
--- a/depthai_sdk/docs/source/visualizer_formats/format.json
+++ b/depthai_sdk/docs/source/visualizer_formats/format.json
@@ -1,11 +1,4 @@
 {
-  "platform": {
-    "type": "string",
-    "enum": [
-      "pc",
-      "robothub"
-    ]
-  },
   "frame_shape": {
     "type": "array",
     "items": {
diff --git a/depthai_sdk/examples/CameraComponent/camera_encode.py b/depthai_sdk/examples/CameraComponent/camera_encode.py
new file mode 100644
index 000000000..481a52aec
--- /dev/null
+++ b/depthai_sdk/examples/CameraComponent/camera_encode.py
@@ -0,0 +1,9 @@
+from depthai_sdk import OakCamera
+
+with OakCamera() as oak:
+    color = oak.create_camera('color', encode='h265')
+
+    oak.visualize(color.out.encoded, fps=True, scale=2/3)
+    # By default, it will stream non-encoded frames
+    oak.visualize(color, fps=True, scale=2/3)
+    oak.start(blocking=True)
diff --git a/depthai_sdk/examples/CameraComponent/preview_all_cameras.py b/depthai_sdk/examples/CameraComponent/preview_all_cameras.py
index 3d3257910..9b1b102ec 100644
--- a/depthai_sdk/examples/CameraComponent/preview_all_cameras.py
+++ b/depthai_sdk/examples/CameraComponent/preview_all_cameras.py
@@ -1,6 +1,6 @@
 from depthai_sdk import OakCamera
 
 with OakCamera() as oak:
-    cams = oak.create_all_cameras()
-    oak.visualize(cams)
+    cams = oak.create_all_cameras(resolution='max')
+    oak.visualize(cams, fps=True)
     oak.start(blocking=True)
diff --git a/depthai_sdk/examples/CameraComponent/rotated.py b/depthai_sdk/examples/CameraComponent/rotated.py
index d80311036..dbfaf7ec4 100644
--- a/depthai_sdk/examples/CameraComponent/rotated.py
+++ b/depthai_sdk/examples/CameraComponent/rotated.py
@@ -1,8 +1,6 @@
 from depthai_sdk import OakCamera
 
 with OakCamera(rotation=90) as oak:
-    color = oak.create_camera('color', resolution='1080p')
-    left = oak.create_camera('left', resolution='400p')
-    right = oak.create_camera('right', resolution='400p')
-    oak.visualize([color, left, right], fps=True)
+    all_cams = oak.create_all_cameras()
+    oak.visualize(all_cams, fps=True)
     oak.start(blocking=True)
diff --git a/depthai_sdk/examples/IMUComponent/imu.py b/depthai_sdk/examples/IMUComponent/imu.py
index 9ebafac5a..3db1d2d6d 100644
--- a/depthai_sdk/examples/IMUComponent/imu.py
+++ b/depthai_sdk/examples/IMUComponent/imu.py
@@ -3,5 +3,7 @@
 with OakCamera() as oak:
     imu = oak.create_imu()
     imu.config_imu(report_rate=400, batch_report_threshold=5)
-    oak.visualize(imu.out.main)
+    # DepthAI viewer should open, and IMU data can be viewed on the right-side panel,
+    # under "Stats" tab (right of the "Device Settings" tab).
+    oak.visualize(imu.out.main, visualizer='viewer')
     oak.start(blocking=True)
diff --git a/depthai_sdk/examples/IMUComponent/imu_rerun.py b/depthai_sdk/examples/IMUComponent/imu_rerun.py
deleted file mode 100644
index f0ca9c694..000000000
--- a/depthai_sdk/examples/IMUComponent/imu_rerun.py
+++ /dev/null
@@ -1,30 +0,0 @@
-from depthai_sdk import OakCamera
-from depthai_sdk.classes.packets import IMUPacket
-import rerun as rr
-import subprocess
-import depthai as dai
-
-def callback(packet: IMUPacket):
-     for d in packet.data:
-        gyro: dai.IMUReportGyroscope = d.gyroscope
-        accel: dai.IMUReportAccelerometer = d.acceleroMeter
-        mag: dai.IMUReportMagneticField = d.magneticField
-        rot: dai.IMUReportRotationVectorWAcc = d.rotationVector
-        print(accel.x, accel.y, accel.z)
-        rr.log_scalar('world/accel_x', accel.x, color=(255,0,0))
-        rr.log_scalar('world/accel_y', accel.y, color=(0,255,0))
-        rr.log_scalar('world/accel_z', accel.z, color=(0,0,255))
-
-
-with OakCamera() as oak:
-    subprocess.Popen(["rerun", "--memory-limit", "200MB"])
-    rr.init("Rerun ", spawn=False)
-    rr.connect()
-
-
-    imu = oak.create_imu()
-    imu.config_imu(report_rate=10, batch_report_threshold=2)
-    print(oak.device.getConnectedIMU())
-    oak.callback(imu, callback=callback)
-    oak.start(blocking=True)
-
diff --git a/depthai_sdk/examples/NNComponent/age-gender.py b/depthai_sdk/examples/NNComponent/age-gender.py
index d1715933f..241f878b8 100644
--- a/depthai_sdk/examples/NNComponent/age-gender.py
+++ b/depthai_sdk/examples/NNComponent/age-gender.py
@@ -14,7 +14,7 @@ def callback(packet: TwoStagePacket):
         gender_str = "Woman" if gender[0] > gender[1] else "Man"
 
         visualizer.add_text(f'{gender_str}\nAge: {age}',
-                            bbox=(*det.top_left, *det.bottom_right),
+                            bbox=packet.bbox.get_relative_bbox(det.bbox),
                             position=TextPosition.BOTTOM_RIGHT)
 
     frame = visualizer.draw(packet.frame)
diff --git a/depthai_sdk/examples/NNComponent/custom_decode.py b/depthai_sdk/examples/NNComponent/custom_decode.py
index f627d950e..dcec18a71 100644
--- a/depthai_sdk/examples/NNComponent/custom_decode.py
+++ b/depthai_sdk/examples/NNComponent/custom_decode.py
@@ -1,13 +1,11 @@
 import blobconverter
-import cv2
 import numpy as np
-from depthai import NNData
-
+import depthai as dai
 from depthai_sdk import OakCamera
-from depthai_sdk.classes import Detections, DetectionPacket
+from depthai_sdk.classes import Detections
 
 
-def decode(nn_data: NNData) -> Detections:
+def decode(nn_data: dai.NNData) -> Detections:
     """
     Custom decode function for the NN component. Decode function has to accept NNData argument.
     The return type should preferably be a class that inherits from depthai_sdk.classes.GenericNNOutput,
@@ -18,29 +16,27 @@ def decode(nn_data: NNData) -> Detections:
     layer = nn_data.getFirstLayerFp16()
     results = np.array(layer).reshape((1, 1, -1, 7))
     dets = Detections(nn_data)
-
     for result in results[0][0]:
-        if result[2] > 0.5:
+        if result[2] > 0.3:
             label = int(result[1])
             conf = result[2]
             bbox = result[3:]
-            dets.add(label, conf, bbox)
+            det = dai.ImgDetection()
+            det.confidence = conf
+            det.label = label
+            det.xmin = bbox[0]
+            det.ymin = bbox[1]
+            det.xmax = bbox[2]
+            det.ymax = bbox[3]
+            dets.detections.append(det)
 
     return dets
 
-
-def callback(packet: DetectionPacket):
-    visualizer = packet.visualizer
-    frame = packet.frame
-    frame = visualizer.draw(frame)
-    cv2.imshow('Custom decode function', frame)
-
-
 with OakCamera() as oak:
     color = oak.create_camera('color')
 
-    nn_path = blobconverter.from_zoo(name='person-detection-0200', version='2021.4')
+    nn_path = blobconverter.from_zoo(name='person-detection-0200', version='2021.4', shaves=6)
     nn = oak.create_nn(nn_path, color, decode_fn=decode)
 
-    oak.visualize(nn, callback=callback)
+    oak.visualize(nn)
     oak.start(blocking=True)
diff --git a/depthai_sdk/examples/NNComponent/deeplabv3_person.py b/depthai_sdk/examples/NNComponent/deeplabv3_person.py
deleted file mode 100644
index e641227c5..000000000
--- a/depthai_sdk/examples/NNComponent/deeplabv3_person.py
+++ /dev/null
@@ -1,11 +0,0 @@
-from depthai_sdk import OakCamera
-
-
-with OakCamera() as oak:
-    color = oak.create_camera('color', resolution='1080p')
-
-    nn = oak.create_nn('deeplabv3_person', color)
-    nn.config_nn(resize_mode='letterbox')  # Options: 'letterbox', 'crop', 'stretch'
-
-    visualizer = oak.visualize([nn, nn.out.passthrough], fps=True)
-    oak.start(blocking=True)
diff --git a/depthai_sdk/examples/NNComponent/emotion-recognition.py b/depthai_sdk/examples/NNComponent/emotion-recognition.py
index 3d44a9f82..43e6bf3e8 100644
--- a/depthai_sdk/examples/NNComponent/emotion-recognition.py
+++ b/depthai_sdk/examples/NNComponent/emotion-recognition.py
@@ -16,7 +16,7 @@ def callback(packet: TwoStagePacket):
         emotion_name = emotions[np.argmax(emotion_results)]
 
         visualizer.add_text(emotion_name,
-                            bbox=(*det.top_left, *det.bottom_right),
+                            bbox=packet.bbox.get_relative_bbox(det.bbox),
                             position=TextPosition.BOTTOM_RIGHT)
 
     visualizer.draw(packet.frame)
diff --git a/depthai_sdk/examples/NNComponent/nn_component.py b/depthai_sdk/examples/NNComponent/nn_component.py
index d50c28cc6..3bc89fc26 100644
--- a/depthai_sdk/examples/NNComponent/nn_component.py
+++ b/depthai_sdk/examples/NNComponent/nn_component.py
@@ -4,7 +4,8 @@
     color = oak.create_camera('color')
     # List of models that are supported out-of-the-box by the SDK:
     # https://docs.luxonis.com/projects/sdk/en/latest/features/ai_models/#sdk-supported-models
-    nn = oak.create_nn('yolov7tiny_coco_640x352', color)
+    nn = oak.create_nn('yolov5n_coco_416x416', color)
+    nn.config_nn(resize_mode='stretch')
     oak.visualize([nn.out.main], fps=True)
     oak.visualize(nn.out.passthrough)
     oak.start(blocking=True)
diff --git a/depthai_sdk/examples/NNComponent/spatial_detection.py b/depthai_sdk/examples/NNComponent/spatial_detection.py
index bce3ef57d..646963748 100644
--- a/depthai_sdk/examples/NNComponent/spatial_detection.py
+++ b/depthai_sdk/examples/NNComponent/spatial_detection.py
@@ -15,6 +15,6 @@
         calc_algo=dai.SpatialLocationCalculatorAlgorithm.AVERAGE
     )
 
-    oak.visualize([nn.out.main], fps=True)
-    oak.visualize(nn.out.passthrough)
+    oak.visualize(nn.out.main, fps=True)
+    oak.visualize([nn.out.passthrough, nn.out.spatials])
     oak.start(blocking=True)
diff --git a/depthai_sdk/examples/PointcloudComponent/pointcloud.py b/depthai_sdk/examples/PointcloudComponent/pointcloud.py
index fb741ef01..c746a11e3 100644
--- a/depthai_sdk/examples/PointcloudComponent/pointcloud.py
+++ b/depthai_sdk/examples/PointcloudComponent/pointcloud.py
@@ -1,23 +1,9 @@
-import cv2
 from depthai_sdk import OakCamera
-from depthai_sdk.classes.packets import PointcloudPacket, FramePacket
-import rerun as rr
-import subprocess
-import time
-
-subprocess.Popen(["rerun", "--memory-limit", "200MB"])
-time.sleep(1)  # Wait til rerun spins up
-rr.init("Rerun ", spawn=False)
-rr.connect()
-
-def callback(packet: PointcloudPacket):
-    colors = packet.color_frame.getCvFrame()[..., ::-1] # BGR to RGB
-    rr.log_image('Color Image', colors)
-    points = packet.points.reshape(-1, 3)
-    rr.log_points("Pointcloud", points, colors=colors.reshape(-1, 3))
-
 
 with OakCamera() as oak:
-    pcl = oak.create_pointcloud()
-    oak.callback(pcl, callback=callback)
+    color = oak.camera('color')
+    stereo = oak.create_stereo()
+    stereo.config_stereo(align=color)
+    pcl = oak.create_pointcloud(stereo=stereo, colorize=color)
+    oak.visualize(pcl, visualizer='depthai-viewer')
     oak.start(blocking=True)
diff --git a/depthai_sdk/examples/mixed/api_interop.py b/depthai_sdk/examples/mixed/api_interop.py
index f5ea3baed..ec8a5cc08 100644
--- a/depthai_sdk/examples/mixed/api_interop.py
+++ b/depthai_sdk/examples/mixed/api_interop.py
@@ -6,15 +6,12 @@
     nn = oak.create_nn('mobilenet-ssd', color)
     oak.visualize([nn.out.passthrough, nn], fps=True)
 
-    # Build the pipeline, connect to the oak, update components. Place interop logic AFTER oak.build()
-    pipeline = oak.build()
-
     nn.node.setNumInferenceThreads(2) # Configure components' nodes
 
-    features = pipeline.create(dai.node.FeatureTracker) # Create new pipeline nodes
+    features = oak.pipeline.create(dai.node.FeatureTracker) # Create new pipeline nodes
     color.node.video.link(features.inputImage)
 
-    out = pipeline.create(dai.node.XLinkOut)
+    out = oak.pipeline.create(dai.node.XLinkOut)
     out.setStreamName('features')
     features.outputFeatures.link(out.input)
 
diff --git a/depthai_sdk/examples/mixed/collision_avoidance.py b/depthai_sdk/examples/mixed/collision_avoidance.py
index 214fdcbdd..eb74b4d30 100644
--- a/depthai_sdk/examples/mixed/collision_avoidance.py
+++ b/depthai_sdk/examples/mixed/collision_avoidance.py
@@ -1,6 +1,6 @@
 from depthai_sdk import OakCamera
 from depthai_sdk.visualize.configs import StereoColor
-from depthai_sdk.classes.packets import DepthPacket
+from depthai_sdk.classes.packets import DisparityDepthPacket
 import math
 import depthai as dai
 import cv2
@@ -11,7 +11,7 @@
 
 slc_data = []
 
-def cb(packet: DepthPacket):
+def cb(packet: DisparityDepthPacket):
     global slc_data
     fontType = cv2.FONT_HERSHEY_TRIPLEX
 
@@ -56,8 +56,6 @@ def cb(packet: DepthPacket):
 
     oak.visualize([stereo], fps=True, callback=cb)
 
-    oak.build()
-
     slc = oak.pipeline.create(dai.node.SpatialLocationCalculator)
     for x in range(15):
         for y in range(9):
diff --git a/depthai_sdk/examples/mixed/packet_callback.py b/depthai_sdk/examples/mixed/packet_callback.py
new file mode 100644
index 000000000..6d690d007
--- /dev/null
+++ b/depthai_sdk/examples/mixed/packet_callback.py
@@ -0,0 +1,45 @@
+from depthai_sdk import OakCamera
+from depthai_sdk.classes.packets import FramePacket
+from datetime import timedelta
+from typing import Dict
+import cv2
+
+def cb_1(packet: FramePacket):
+    # Called from main thread, so we can call cv2.imshow
+    cv2.imshow('Color frames from cb', packet.frame)
+
+def cb_2(packets: Dict[str, FramePacket]):
+    print(packets)
+    # Sycned packets.
+    ts_color = packets['color'].get_timestamp()
+    ts_left = packets['left'].get_timestamp()
+    ts_imu = packets['imu'].get_timestamp()
+    print(f"---- New synced packets. Diff between color and left: {abs(ts_color-ts_left) / timedelta(milliseconds=1)} ms, color and IMU: {abs(ts_imu-ts_color) / timedelta(milliseconds=1)} ms")
+
+    for name, packet in packets.items():
+        print(f'Packet {name}, timestamp: {packet.get_timestamp()}, Seq number: {packet.get_sequence_num()}')
+
+with OakCamera() as oak:
+    color = oak.create_camera('color', fps=32)
+    left = oak.create_camera('left', fps=30)
+    right = oak.create_camera('right', fps=30)
+    imu = oak.create_imu()
+
+    oak.callback(
+        color, # Outputs whose packets we want to receive via callback
+        callback=cb_1, # Callback function
+        main_thread=True # Whether to call the callback in the main thread. For OpenCV's imshow to work, it must be called in the main thread.
+    )
+
+    cb_handler = oak.callback(
+        [left, right, color, imu],
+        callback=cb_2,
+        main_thread=False # Will be called from a different thread, instead of putting packets into queue and waiting for main thread to pick it up.
+    )
+    # Timestamp syncing all 3 streams. We selected (1000/30) / 2 as threshold_ms, because
+    # left/right are slower (30FPS), so threshold should be about 16ms. This means SDK will discard some
+    # color packets (2 per second), but we will have synced frames.
+    cb_handler.configure_syncing(threshold_ms=int((1000/30) / 2))
+
+    # oak.show_graph()
+    oak.start(blocking=True)
\ No newline at end of file
diff --git a/depthai_sdk/examples/mixed/packet_queue.py b/depthai_sdk/examples/mixed/packet_queue.py
new file mode 100644
index 000000000..768967b07
--- /dev/null
+++ b/depthai_sdk/examples/mixed/packet_queue.py
@@ -0,0 +1,46 @@
+from queue import Empty
+from depthai_sdk import OakCamera
+from depthai_sdk.classes.packets import FramePacket
+from datetime import timedelta
+from typing import Dict
+import cv2
+
+with OakCamera() as oak:
+    color = oak.create_camera('color', fps=32)
+    left = oak.create_camera('left', fps=30)
+    right = oak.create_camera('right', fps=30)
+    imu = oak.create_imu()
+
+    q1 = oak.queue(color, max_size=5).get_queue()
+
+    # Timestamp syncing all 3 streams. We selected (1000/30) / 2 as threshold_ms, because
+    # left/right are slower (30FPS), so threshold should be about 16ms. This means SDK will discard some
+    # color packets (2 per second), but we will have synced frames.
+    q2 = oak.queue([left, right, color, imu], max_size=5).configure_syncing(threshold_ms=int((1000/30) / 2)).get_queue()
+
+    # oak.show_graph()
+    oak.start()
+
+    while oak.running():
+        oak.poll()
+
+        # This will block until a new packet arrives
+        p: FramePacket = q1.get(block=True)
+        cv2.imshow('Video from q1', p.frame)
+
+        try:
+            packets: Dict[str, FramePacket] = q2.get(block=False)
+
+            ts_color = packets[color].get_timestamp()
+            ts_left = packets[left].get_timestamp()
+            ts_imu = packets[imu].get_timestamp()
+            print(f"---- New synced packets. Diff between color and left: {abs(ts_color-ts_left) / timedelta(milliseconds=1)} ms, color and IMU: {abs(ts_imu-ts_color) / timedelta(milliseconds=1)} ms")
+
+            for name, packet in packets.items():
+                print(f'Packet {name}, timestamp: {packet.get_timestamp()}, Seq number: {packet.get_sequence_num()}')
+                if not hasattr(packet, 'frame'):
+                    continue # IMUPacket doesn't have a frame
+                cv2.imshow(name, packet.frame)
+        except Empty:
+            # q2.get(block=False) will throw Empty exception if there are no new packets
+            pass
\ No newline at end of file
diff --git a/depthai_sdk/examples/mixed/speed_calculation.py b/depthai_sdk/examples/mixed/speed_calculation.py
index 3380b0533..f029beb72 100644
--- a/depthai_sdk/examples/mixed/speed_calculation.py
+++ b/depthai_sdk/examples/mixed/speed_calculation.py
@@ -1,13 +1,17 @@
 import cv2
 
 from depthai_sdk import OakCamera
+from depthai_sdk.classes.packets import TrackerPacket
 
 
-def callback(packet):
-    for detection in packet.detections:
-        print(f'Speed: {detection.speed:.02f} m/s, {detection.speed_kmph:.02f} km/h, {detection.speed_mph:.02f} mph')
+def callback(packet: TrackerPacket):
+    for obj_id, tracklets in packet.tracklets.items():
+        if len(tracklets) != 0:
+            tracklet = tracklets[-1]
+        if tracklet.speed is not None:
+            print(f'Speed for object {obj_id}: {tracklet.speed:.02f} m/s, {tracklet.speed_kmph:.02f} km/h, {tracklet.speed_mph:.02f} mph')
 
-    frame = packet.visualizer.draw(packet.frame)
+    frame = packet.visualizer.draw(packet.decode())
     cv2.imshow('Speed estimation', frame)
 
 
diff --git a/depthai_sdk/examples/mixed/switch_between_models.py b/depthai_sdk/examples/mixed/switch_between_models.py
index 8619ec892..9ae8332a1 100644
--- a/depthai_sdk/examples/mixed/switch_between_models.py
+++ b/depthai_sdk/examples/mixed/switch_between_models.py
@@ -3,7 +3,7 @@
 import depthai as dai
 import cv2
 
-# We use callback, so we only have cv2 window for all models
+# We use callback, so we only have cv2 window for both models
 def cb(packet: DetectionPacket):
     frame = packet.visualizer.draw(packet.frame)
     cv2.imshow('Frame', frame)
@@ -32,13 +32,18 @@ def cb(packet: DetectionPacket):
 
     # We can have multiple models here, not just 2 object detection models
     nn1 = oak.create_nn('yolov6nr3_coco_640x352', input=script.outputs['out1'])
+    nn1.config_nn(resize_mode='stretch') # otherwise, BB mappings will be incorrect
     nn2 = oak.create_nn('mobilenet-ssd', input=script.outputs['out2'])
+    nn2.config_nn(resize_mode='stretch') # otherwise, BB mappings will be incorrect
+
     # We will send "switch" message via XLinkIn
     xin = oak.pipeline.create(dai.node.XLinkIn)
     xin.setStreamName('switch')
     xin.out.link(script.inputs['switch'])
 
+    # We don't want syncing, we just want either of the model packets in the callback
     oak.visualize([nn1, nn2], fps=True, callback=cb)
+
     oak.visualize([nn1.out.passthrough, nn2.out.passthrough], fps=True)
 
     # oak.show_graph()
diff --git a/depthai_sdk/examples/mixed/sync_multiple_outputs.py b/depthai_sdk/examples/mixed/sync_multiple_outputs.py
index a61c5f0d0..64197a018 100644
--- a/depthai_sdk/examples/mixed/sync_multiple_outputs.py
+++ b/depthai_sdk/examples/mixed/sync_multiple_outputs.py
@@ -3,15 +3,17 @@
 from depthai_sdk import OakCamera
 
 with OakCamera() as oak:
-    color = oak.create_camera('color', encode='h264', name='color')
-    nn = oak.create_nn('mobilenet-ssd', color, name='mobilenet')
-    nn2 = oak.create_nn('face-detection-retail-0004', color, name='face-detection')
-    # oak.visualize([nn.out.main, nn.out.passthrough])
-    # oak.visualize(nn.out.spatials, scale=1 / 2)
+    color = oak.create_camera('color', encode='h264')
+    nn = oak.create_nn('mobilenet-ssd', color)
+    nn2 = oak.create_nn('face-detection-retail-0004', color)
+
     def cb(msgs: Dict):
-        print('synced!', msgs)
+        print('====== New synced packets! ======')
+        for name, packet in msgs.items():
+            print(f"Packet '{name}' with timestamp:", packet.get_timestamp(), 'Seq number:', packet.get_sequence_num(), 'Object', packet)
 
-    oak.sync([color.out.encoded, nn.out.passthrough, nn.out.main, nn2.out.main], cb)
+    oak.callback([nn.out.passthrough, nn.out.encoded, nn2.out.encoded], cb) \
+        .configure_syncing(enable_sync=True, threshold_ms=30)
     # oak.show_graph()
 
     oak.start(blocking=True)
diff --git a/depthai_sdk/examples/recording/encode.py b/depthai_sdk/examples/recording/encode.py
index bf098931e..44b235d12 100644
--- a/depthai_sdk/examples/recording/encode.py
+++ b/depthai_sdk/examples/recording/encode.py
@@ -9,7 +9,9 @@
     nn = oak.create_nn('mobilenet-ssd', color, spatial=stereo)
 
     # Sync & save all (encoded) streams
-    oak.record([color.out.encoded, left.out.encoded, right.out.encoded], './record', RecordType.VIDEO)
+    oak.record([color.out.encoded, left.out.encoded, right.out.encoded], './record', RecordType.VIDEO) \
+        .configure_syncing(enable_sync=True, threshold_ms=50)
+
     oak.visualize([color.out.encoded], fps=True)
 
     oak.start(blocking=True)
diff --git a/depthai_sdk/examples/recording/encoder_preview.py b/depthai_sdk/examples/recording/encoder_preview.py
index 52fa83ff0..f519b808f 100644
--- a/depthai_sdk/examples/recording/encoder_preview.py
+++ b/depthai_sdk/examples/recording/encoder_preview.py
@@ -3,18 +3,21 @@
 from depthai_sdk import OakCamera
 from depthai_sdk.recorders.video_writers.av_writer import AvWriter
 
-rec = AvWriter(Path('./'), 'color', 'mjpeg', fps=30)
+fourcc = 'h264' # Can be 'mjpeg', 'h264', or 'hevc'
 
+rec = AvWriter(Path('./'), 'color', fourcc=fourcc)
 
 def save_raw_mjpeg(packet):
     rec.write(packet.msg)
 
-
 with OakCamera() as oak:
-    color = oak.create_camera('color', encode='MJPEG', fps=30)
+    color = oak.create_camera('color', encode=fourcc, fps=20)
+
+    # Stream encoded video packets to host. For visualization, we decode them
+    # on the host side, and for callback we write encoded frames directly to disk.
+    oak.visualize(color.out.encoded, scale=2 / 3, fps=True)
+    oak.callback(color.out.encoded, callback=save_raw_mjpeg)
 
-    oak.visualize(color, scale=2 / 3, fps=True)
-    oak.callback(color, callback=save_raw_mjpeg)
     oak.start(blocking=True)
 
 rec.close()
diff --git a/depthai_sdk/examples/recording/mcap_record_imu.py b/depthai_sdk/examples/recording/mcap_record_imu.py
index ce865d8fd..084f108e0 100644
--- a/depthai_sdk/examples/recording/mcap_record_imu.py
+++ b/depthai_sdk/examples/recording/mcap_record_imu.py
@@ -8,6 +8,7 @@
     imu = oak.create_imu()
     imu.config_imu(report_rate=500, batch_report_threshold=5)
 
+    # Note that for MCAP recording, user has to have ROS installed
     recorder = oak.record([imu, stereo.out.depth], './', RecordType.MCAP)
 
     oak.visualize([left, stereo])
diff --git a/depthai_sdk/examples/recording/record_all.py b/depthai_sdk/examples/recording/record_all.py
new file mode 100644
index 000000000..3e5f7e9a2
--- /dev/null
+++ b/depthai_sdk/examples/recording/record_all.py
@@ -0,0 +1,22 @@
+from depthai_sdk import OakCamera, RecordType
+from depthai_sdk.args_parser import ArgsParser
+import argparse
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--recordStreams', action='store_true', help="Record frames to file")
+parser.add_argument('--saveStreamsTo', type=str, help="Save frames to directory", default="./record")
+args= ArgsParser.parseArgs(parser=parser)
+
+with OakCamera(args=args) as oak:
+    cams = oak.create_all_cameras()
+    left = oak.camera('left')
+    right = oak.camera('right')
+    if left is not None and right is not None:
+        stereo = oak.create_stereo(left=left, right=right)
+        oak.visualize(stereo)
+    # Sync & save all streams
+    if args["recordStreams"]:
+        oak.record(cams, args["saveStreamsTo"], RecordType.VIDEO_LOSSLESS).configure_syncing(True, 50)
+    oak.visualize(cams, fps=True)
+
+    oak.start(blocking=True)
diff --git a/depthai_sdk/examples/recording/rosbag_record.py b/depthai_sdk/examples/recording/rosbag_record.py
index 995e84260..4f462dd59 100644
--- a/depthai_sdk/examples/recording/rosbag_record.py
+++ b/depthai_sdk/examples/recording/rosbag_record.py
@@ -1,15 +1,18 @@
 from depthai_sdk import OakCamera, RecordType
 
 with OakCamera() as oak:
+    color = oak.create_camera('color', encode='jpeg', fps=30)
     left = oak.create_camera('left', resolution='800p', encode='jpeg', fps=30)
     right = oak.create_camera('right', resolution='800p', encode='jpeg', fps=30)
     stereo = oak.create_stereo(left=left, right=right)
+    stereo.config_stereo(align=color)
     imu = oak.create_imu()
     imu.config_imu(report_rate=400, batch_report_threshold=5)
 
-    # DB3 / ROSBAG
-    oak.record([left.out.encoded, right.out.encoded, stereo.out.depth, imu], 'records', record_type=RecordType.DB3)
+    # DB3 / ROSBAG. ROSBAG doesn't require having ROS installed, while DB3 does.
+    record_components = [left.out.encoded, color.out.encoded, right.out.encoded, stereo.out.depth, imu]
+    oak.record(record_components, 'record', record_type=RecordType.ROSBAG)
 
-    # Record left only
-    oak.visualize(left)
+    # Visualize only color stream
+    oak.visualize(color.out.encoded)
     oak.start(blocking=True)
diff --git a/depthai_sdk/requirements.txt b/depthai_sdk/requirements.txt
index 162165241..01580b840 100644
--- a/depthai_sdk/requirements.txt
+++ b/depthai_sdk/requirements.txt
@@ -10,3 +10,4 @@ marshmallow==3.17.0
 xmltodict
 sentry-sdk==1.21.0
 depthai-pipeline-graph==0.0.5
+ahrs==0.3.1
diff --git a/depthai_sdk/setup.py b/depthai_sdk/setup.py
index 9bda36cb4..6891734b6 100644
--- a/depthai_sdk/setup.py
+++ b/depthai_sdk/setup.py
@@ -9,7 +9,7 @@
 
 setup(
     name='depthai-sdk',
-    version='1.12.1',
+    version='1.13.0',
     description='This package provides an abstraction of the DepthAI API library.',
     long_description=io.open("README.md", encoding="utf-8").read(),
     long_description_content_type="text/markdown",
diff --git a/depthai_sdk/src/depthai_sdk/__init__.py b/depthai_sdk/src/depthai_sdk/__init__.py
index 9412b350b..516a1e620 100644
--- a/depthai_sdk/src/depthai_sdk/__init__.py
+++ b/depthai_sdk/src/depthai_sdk/__init__.py
@@ -3,15 +3,14 @@
 from depthai_sdk.constants import CV2_HAS_GUI_SUPPORT
 from depthai_sdk.logger import set_logging_level
 from depthai_sdk.oak_camera import OakCamera
-from depthai_sdk.oak_device import OakDevice
 from depthai_sdk.previews import *
 from depthai_sdk.record import *
 from depthai_sdk.replay import *
 from depthai_sdk.utils import *
-from depthai_sdk.utils import _create_config, get_config_field
+from depthai_sdk.utils import _create_config, get_config_field, _sentry_before_send
 from depthai_sdk.visualize import *
 
-__version__ = '1.12.1'
+__version__ = '1.13.0'
 
 
 def __import_sentry(sentry_dsn: str) -> None:
@@ -23,21 +22,13 @@ def __import_sentry(sentry_dsn: str) -> None:
             traces_sample_rate=1.0,
             release=f'depthai_sdk@{__version__}',
             with_locals=False,
+            before_send=_sentry_before_send
         )
     except:
         pass
 
 
-config_exists = False
-# Check if sentry is enabled
-try:
-    sentry_status = get_config_field('sentry')
-    config_exists = True
-except FileNotFoundError:
-    sentry_status = False
-
-if config_exists and sentry_status:
-    sentry_dsn = get_config_field('sentry_dsn')
+sentry_dsn = get_config_field('sentry_dsn')
+sentry_status = get_config_field('sentry')
+if sentry_dsn and sentry_status:
     __import_sentry(sentry_dsn)
-elif not config_exists:
-    _create_config()
diff --git a/depthai_sdk/src/depthai_sdk/args_parser.py b/depthai_sdk/src/depthai_sdk/args_parser.py
index e1e5e9958..96a190ab4 100644
--- a/depthai_sdk/src/depthai_sdk/args_parser.py
+++ b/depthai_sdk/src/depthai_sdk/args_parser.py
@@ -92,7 +92,8 @@ def parseArgs(parser: argparse.ArgumentParser = None) -> Dict[str, Any]:
         parser.add_argument("-monof", "--monoFps", type=float,
                             help="Mono cam fps: max 60.0 for H:720 or H:800, max 120.0 for H:400. Default: %(default)s")
         parser.add_argument('-fps', '--fps', type=float, help='Camera FPS applied to all sensors')
-
+        parser.add_argument('-defaultRes', '--defaultResolution', type=str, choices=[None, 'min', 'max'],
+                            help="Default resolution preset for the cameras that don't have a specific resolution set. Default: %(default)s")
         # ColorCamera ISP values
         parser.add_argument('-isp', '--ispScale', type=_commaSeparated(None), help="Sets ColorCamera's ISP scale")
         parser.add_argument('-sharpness', '--sharpness', type=_checkRange(0, 4),
diff --git a/depthai_sdk/src/depthai_sdk/classes/enum.py b/depthai_sdk/src/depthai_sdk/classes/enum.py
index dffc3ada1..5c23cdbbf 100644
--- a/depthai_sdk/src/depthai_sdk/classes/enum.py
+++ b/depthai_sdk/src/depthai_sdk/classes/enum.py
@@ -1,6 +1,7 @@
 from enum import IntEnum
 from typing import Union
 
+
 class ResizeMode(IntEnum):
     """
     If NN input frame is in different aspect ratio than what the model expect, we have 3 different
@@ -10,7 +11,7 @@ class ResizeMode(IntEnum):
     LETTERBOX = 0  # Preserves full FOV by padding/letterboxing, but smaller frame means less features which might decrease NN accuracy
     STRETCH = 1  # Preserves full FOV, but frames are stretched to match the FOV, which might decrease NN accuracy
     CROP = 2  # Crops some FOV to match the required FOV, then scale. No potential NN accuracy decrease.
-    FULL_CROP = 3 # No scaling is done, cropping is applied and FOV can be reduced by a lot
+    FULL_CROP = 3  # No scaling is done, cropping is applied and FOV can be reduced by a lot
 
     # Parse string to ResizeMode
     @staticmethod
@@ -30,4 +31,3 @@ def parse(mode: Union[str, 'ResizeMode']) -> 'ResizeMode':
         else:
             raise ValueError(f"Unknown resize mode {mode}! 'Options (case insensitive):" \
                              "STRETCH, CROP, LETTERBOX. Using default LETTERBOX mode.")
-
diff --git a/depthai_sdk/src/depthai_sdk/classes/nn_results.py b/depthai_sdk/src/depthai_sdk/classes/nn_results.py
index 31282c98a..9dc8af9e1 100644
--- a/depthai_sdk/src/depthai_sdk/classes/nn_results.py
+++ b/depthai_sdk/src/depthai_sdk/classes/nn_results.py
@@ -5,10 +5,54 @@
 These will be integrated into depthai-core, bonus points for on-device decoding of some popular models.
 """
 from dataclasses import dataclass
-from typing import List, Tuple, Any
+from datetime import timedelta
+from typing import List, Tuple, Any, Union, Optional
 
+import depthai as dai
 import numpy as np
-from depthai import NNData, ImgDetection
+
+from depthai_sdk.visualize.bbox import BoundingBox
+
+
+@dataclass
+class Detection:
+    # Original ImgDetection
+    img_detection: Union[None, dai.ImgDetection, dai.SpatialImgDetection]
+    label_str: str
+    confidence: float
+    color: Tuple[int, int, int]
+    bbox: BoundingBox
+    angle: Optional[int]
+    ts: Optional[timedelta]
+
+    @property
+    def top_left(self) -> Tuple[float, float]:
+        return self.bbox.top_left()
+
+    @property
+    def bottom_right(self) -> Tuple[float, float]:
+        return self.bbox.bottom_right()
+
+
+@dataclass
+class TrackingDetection(Detection):
+    tracklet: dai.Tracklet
+    filtered_2d: BoundingBox
+    filtered_3d: dai.Point3f
+    speed: Union[float, None]  # m/s
+
+    @property
+    def speed_kmph(self) -> float:
+        return self.speed * 3.6
+
+    @property
+    def speed_mph(self) -> float:
+        return self.speed * 2.236936
+
+
+@dataclass
+class TwoStageDetection(Detection):
+    nn_data: dai.NNData
 
 
 class GenericNNOutput:
@@ -16,9 +60,20 @@ class GenericNNOutput:
     Generic NN output, to be used for higher-level abstractions (eg. automatic visualization of results).
     """
 
-    def __init__(self, nn_data: NNData):
+    def __init__(self, nn_data: Union[dai.NNData, dai.ImgDetections, dai.SpatialImgDetections]):
         self.nn_data = nn_data
 
+    def getTimestamp(self) -> timedelta:
+        return self.nn_data.getTimestamp()
+
+    def getSequenceNum(self) -> int:
+        return self.nn_data.getSequenceNum()
+
+
+@dataclass
+class ExtendedImgDetection(dai.ImgDetection):
+    angle: int
+
 
 # First we have Object detection results, which are already standarized with dai.ImgDetections
 
@@ -28,25 +83,12 @@ class Detections(GenericNNOutput):
     Detection results containing bounding boxes, labels and confidences. Optionally can contain rotation angles.
     """
 
-    def __init__(self, nn_data: NNData, is_rotated: bool = False):
+    def __init__(self,
+                 nn_data: Union[dai.NNData, dai.ImgDetections, dai.SpatialImgDetections],
+                 is_rotated: bool = False):
         GenericNNOutput.__init__(self, nn_data)
-
-        self.detections = []
+        self.detections: List[ExtendedImgDetection] = []
         self.is_rotated = is_rotated
-        if is_rotated:
-            self.angles = []
-
-    def add(self, label: int, confidence: float, bbox: Tuple[float, ...], angle: int = 0) -> None:
-        det = ImgDetection()
-        det.label = label
-        det.confidence = confidence
-        det.xmin = bbox[0]
-        det.ymin = bbox[1]
-        det.xmax = bbox[2]
-        det.ymax = bbox[3]
-        self.detections.append(det)
-        if self.is_rotated:
-            self.angles.append(angle)
 
 
 @dataclass
@@ -58,7 +100,7 @@ class SemanticSegmentation(GenericNNOutput):  # In core, extend from NNData
     """
     mask: List[np.ndarray]  # 2D np.array for each class
 
-    def __init__(self, nn_data: NNData, mask: List[np.ndarray]):
+    def __init__(self, nn_data: dai.NNData, mask: List[np.ndarray]):
         super().__init__(nn_data)
         self.mask = mask
 
@@ -72,7 +114,7 @@ class ImgLandmarks(GenericNNOutput):  # In core, extend from NNData
     """
 
     def __init__(self,
-                 nn_data: NNData,
+                 nn_data: dai.NNData,
                  landmarks: List[List[Any]] = None,
                  landmarks_indices: List[List[int]] = None,
                  pairs: List[Tuple[int, int]] = None,
@@ -93,6 +135,5 @@ class InstanceSegmentation(GenericNNOutput):
     masks: List[np.ndarray]  # 2D np.array for each instance
     labels: List[int]  # Class label for each instance
 
-    def __init__(self, nn_data: NNData, masks: List[np.ndarray], labels: List[int]):
+    def __init__(self, nn_data: dai.NNData, masks: List[np.ndarray], labels: List[int]):
         raise NotImplementedError('Instance segmentation not yet implemented')
-        super().__init__(nn_data)
diff --git a/depthai_sdk/src/depthai_sdk/classes/output_config.py b/depthai_sdk/src/depthai_sdk/classes/output_config.py
deleted file mode 100644
index 77a6144a9..000000000
--- a/depthai_sdk/src/depthai_sdk/classes/output_config.py
+++ /dev/null
@@ -1,211 +0,0 @@
-import os
-from abc import abstractmethod
-from pathlib import Path
-from typing import Optional, Callable, List, Union
-
-import depthai as dai
-from depthai_sdk.oak_outputs.syncing import SequenceNumSync
-from depthai_sdk.oak_outputs.xout.xout_base import XoutBase
-from depthai_sdk.oak_outputs.xout.xout_depth import XoutDepth
-from depthai_sdk.oak_outputs.xout.xout_frames import XoutFrames
-from depthai_sdk.record import Record
-from depthai_sdk.recorders.video_recorder import VideoRecorder
-from depthai_sdk.trigger_action.actions.abstract_action import Action
-from depthai_sdk.trigger_action.actions.record_action import RecordAction
-from depthai_sdk.trigger_action.trigger_action import TriggerAction
-from depthai_sdk.trigger_action.triggers.abstract_trigger import Trigger
-from depthai_sdk.visualize.visualizer import Visualizer
-
-def find_new_name(name: str, names: List[str]):
-    while True:
-        arr = name.split(' ')
-        num = arr[-1]
-        if num.isnumeric():
-            arr[-1] = str(int(num) + 1)
-            name = " ".join(arr)
-        else:
-            name = f"{name} 2"
-        if name not in names:
-            return name
-
-class BaseConfig:
-    @abstractmethod
-    def setup(self, pipeline: dai.Pipeline, device: dai.Device, names: List[str]) -> List[XoutBase]:
-        raise NotImplementedError()
-
-
-class OutputConfig(BaseConfig):
-    """
-    Saves callbacks/visualizers until the device is fully initialized. I'll admit it's not the cleanest solution.
-    """
-
-    def __init__(self, output: Callable,
-                 callback: Callable,
-                 visualizer: Visualizer = None,
-                 visualizer_enabled: bool = False,
-                 record_path: Optional[str] = None):
-        self.output = output  # Output of the component (a callback)
-        self.callback = callback  # Callback that gets called after syncing
-        self.visualizer = visualizer
-        self.visualizer_enabled = visualizer_enabled
-        self.record_path = record_path
-
-    def setup(self, pipeline: dai.Pipeline, device, names: List[str]) -> List[XoutBase]:
-        xoutbase: XoutBase = self.output(pipeline, device)
-        xoutbase.setup_base(self.callback)
-
-        if xoutbase.name in names:  # Stream name already exist, append a number to it
-            xoutbase.name = find_new_name(xoutbase.name, names)
-        names.append(xoutbase.name)
-
-        recorder = None
-        if self.record_path:
-            recorder = VideoRecorder()
-
-            if isinstance(xoutbase, XoutDepth):
-                raise NotImplementedError('Depth recording is not implemented yet.'
-                                          'Please use OakCamera.record() instead.')
-
-            recorder.update(Path(self.record_path), device, [xoutbase])
-
-        if self.visualizer:
-            xoutbase.setup_visualize(visualizer=self.visualizer,
-                                     visualizer_enabled=self.visualizer_enabled,
-                                     name=xoutbase.name)
-
-        if self.record_path:
-            xoutbase.setup_recorder(recorder=recorder)
-
-        return [xoutbase]
-
-
-class RecordConfig(BaseConfig):
-    def __init__(self, outputs: List[Callable], rec: Record):
-        self.outputs = outputs
-        self.rec = rec
-
-    def setup(self, pipeline: dai.Pipeline, device: dai.Device, _) -> List[XoutBase]:
-        xouts: List[XoutFrames] = []
-        for output in self.outputs:
-            xoutbase: XoutFrames = output(pipeline, device)
-            xoutbase.setup_base(None)
-            xouts.append(xoutbase)
-
-        self.rec.setup_base(None)
-        self.rec.start(device, xouts)
-
-        return [self.rec]
-
-
-class RosStreamConfig(BaseConfig):
-    outputs: List[Callable]
-    ros = None
-
-    def __init__(self, outputs: List[Callable]):
-        self.outputs = outputs
-
-    def setup(self, pipeline: dai.Pipeline, device, names: List[str]) -> List[XoutBase]:
-        xouts: List[XoutFrames] = []
-        for output in self.outputs:
-            xoutbase: XoutFrames = output(pipeline, device)
-            xoutbase.setup_base(None)
-            xouts.append(xoutbase)
-
-        envs = os.environ
-        if 'ROS_VERSION' not in envs:
-            raise Exception('ROS installation not found! Please install or source the ROS you would like to use.')
-
-        version = envs['ROS_VERSION']
-        if version == '1':
-            raise Exception('ROS1 publsihing is not yet supported!')
-            from depthai_sdk.integrations.ros.ros1_streaming import Ros1Streaming
-            self.ros = Ros1Streaming()
-        elif version == '2':
-            from depthai_sdk.integrations.ros.ros2_streaming import Ros2Streaming
-            self.ros = Ros2Streaming()
-        else:
-            raise Exception(f"ROS version '{version}' not recognized! Should be either '1' or '2'")
-
-        self.ros.update(device, xouts)
-        return [self]
-
-    def new_msg(self, name, msg):
-        self.ros.new_msg(name, msg)
-
-    def check_queue(self, block):
-        pass  # No queues
-
-    def start_fps(self):
-        pass
-
-    # def is_ros1(self) -> bool:
-    #     try:
-    #         import rospy
-    #         return True
-    #     except:
-    #         return False
-    #
-    # def is_ros2(self):
-    #     try:
-    #         import rclpy
-    #         return True
-    #     except:
-    #         return False
-
-
-class SyncConfig(BaseConfig, SequenceNumSync):
-    def __init__(self, outputs: List[Callable], callback: Callable):
-        self.outputs = outputs
-        self.callback = callback
-
-        SequenceNumSync.__init__(self, len(outputs))
-
-        self.packets = dict()
-
-    def new_packet(self, packet):
-        # print('new packet', packet, packet.name, 'seq num',packet.imgFrame.getSequenceNum())
-        synced = self.sync(
-            packet.msg.getSequenceNum(),
-            packet.name,
-            packet
-        )
-        if synced:
-            self.callback(synced)
-
-    def setup(self, pipeline: dai.Pipeline, device: dai.Device, _) -> List[XoutBase]:
-        xouts = []
-        for output in self.outputs:
-            xoutbase: XoutBase = output(pipeline, device)
-            xoutbase.setup_base(self.new_packet)
-            xouts.append(xoutbase)
-
-        return xouts
-
-
-class TriggerActionConfig(BaseConfig):
-    def __init__(self, trigger: Trigger, action: Union[Callable, Action]):
-        self.trigger = trigger
-        self.action = Action(None, action) if isinstance(action, Callable) else action
-
-    def setup(self, pipeline: dai.Pipeline, device, _) -> List[XoutBase]:
-        controller = TriggerAction(self.trigger, self.action)
-
-        trigger_xout: XoutBase = self.trigger.input(pipeline, device)
-        trigger_xout.setup_base(controller.new_packet_trigger)
-        # without setting visualizer up, XoutNnResults.on_callback() won't work
-        trigger_xout.setup_visualize(visualizer=Visualizer(), name=trigger_xout.name, visualizer_enabled=False)
-
-        if isinstance(self.action, Callable):
-            return [trigger_xout]
-
-        action_xouts = []
-        if self.action.inputs:
-            for output in self.action.inputs:
-                xout: XoutBase = output(pipeline, device)
-                xout.setup_base(controller.new_packet_action)
-                action_xouts.append(xout)
-
-        if isinstance(self.action, RecordAction):
-            self.action.setup(device, action_xouts)  # creates writers for VideoRecorder()
-
-        return [trigger_xout] + action_xouts
diff --git a/depthai_sdk/src/depthai_sdk/classes/packet_handlers.py b/depthai_sdk/src/depthai_sdk/classes/packet_handlers.py
new file mode 100644
index 000000000..6b2a08ae4
--- /dev/null
+++ b/depthai_sdk/src/depthai_sdk/classes/packet_handlers.py
@@ -0,0 +1,346 @@
+import logging
+import os
+from abc import abstractmethod
+from queue import Queue, Empty
+from typing import Optional, Callable, List, Union, Dict
+
+import depthai as dai
+
+from depthai_sdk.classes.packets import BasePacket
+from depthai_sdk.components.component import Component, ComponentOutput
+from depthai_sdk.oak_outputs.fps import FPS
+from depthai_sdk.oak_outputs.syncing import TimestampSync
+from depthai_sdk.oak_outputs.xout.xout_base import XoutBase, ReplayStream
+from depthai_sdk.oak_outputs.xout.xout_frames import XoutFrames
+from depthai_sdk.record import Record
+from depthai_sdk.trigger_action.actions.abstract_action import Action
+from depthai_sdk.trigger_action.actions.record_action import RecordAction
+from depthai_sdk.trigger_action.trigger_action import TriggerAction
+from depthai_sdk.trigger_action.triggers.abstract_trigger import Trigger
+from depthai_sdk.visualize.visualizer import Visualizer
+
+
+class BasePacketHandler:
+    def __init__(self, main_thread=False):
+        self.fps = FPS()
+        self.queue = Queue(2) if main_thread else None
+        self.outputs: List[ComponentOutput]
+        self.sync = None
+
+        self._packet_names = {}  # Check for duplicate packet name, raise error if found (user error)
+
+    @abstractmethod
+    def setup(self, pipeline: dai.Pipeline, device: dai.Device, xout_streams: Dict[str, List]):
+        raise NotImplementedError()
+
+    def get_fps(self) -> float:
+        return self.fps.fps()
+
+    def _new_packet_callback(self, packet: BasePacket):
+        """
+        Callback from XoutBase. Don't override it. Does FPS counting and calls new_packet().
+        """
+        if self.sync is not None:
+            packet = self.sync.sync(packet.get_timestamp(), packet.name, packet)
+            if packet is None:
+                return
+
+        self.fps.next_iter()
+        if self.queue:
+            if self.queue.full():
+                self.queue.get()  # Remove oldest packet
+            self.queue.put(packet)
+        else:
+            self.new_packet(packet)
+
+    def configure_syncing(self,
+                          enable_sync: bool = True,
+                          threshold_ms: int = 17):
+        """
+        If multiple outputs are used, then PacketHandler can do timestamp syncing of multiple packets
+        before calling new_packet().
+        Args:
+            enable_sync: If True, then syncing is enabled.
+            threshold_ms: Maximum time difference between packets in milliseconds.
+        """
+        if enable_sync:
+            if len(self.outputs) < 2:
+                logging.error('Syncing requires at least 2 outputs! Skipping syncing.')
+                return
+            self.sync = TimestampSync(len(self.outputs), threshold_ms)
+
+    def _poll(self):
+        """
+        Called from main thread.
+        """
+        if self.queue:
+            try:
+                packet = self.queue.get_nowait()
+                self.new_packet(packet)
+            except Empty:
+                pass
+
+    @abstractmethod
+    def new_packet(self, packet):
+        raise NotImplementedError()
+
+    def close(self):
+        """
+        Used as a cleanup method (eg. close recording), other classes can override it.
+        """
+        pass
+
+    def _save_outputs(self, output: Union[List, ComponentOutput, Component]):
+        if not isinstance(output, List):
+            output = [output]
+
+        for i in range(len(output)):
+            if isinstance(output[i], Component):
+                # Select default (main) output of the component
+                output[i] = output[i].out.main
+
+        self.outputs = output
+
+    def _create_xout(self,
+                     pipeline: dai.Pipeline,
+                     xout: XoutBase,
+                     xout_streams: Dict,
+                     custom_callback: Callable = None,
+                     custom_packet_postfix: str = None):
+        # Check for duplicate packet name, raise error if found (user error)
+        if custom_packet_postfix:
+            xout.set_packet_name_postfix(custom_packet_postfix)
+
+        name = xout.get_packet_name()
+        if name in self._packet_names:
+            raise ValueError(
+                f'User specified duplicate packet name "{name}"! Please specify unique names (or leave empty) for each component output.')
+        self._packet_names[name] = True
+
+        # Assign which callback to call when packet is prepared
+        xout.new_packet_callback = custom_callback or self._new_packet_callback
+
+        for xstream in xout.xstreams():
+            if xstream.name not in xout_streams:
+                xout_streams[xstream.name] = []
+                if not isinstance(xstream, ReplayStream):
+                    xlink = pipeline.createXLinkOut()
+                    xlink.setStreamName(xstream.name)
+                    xstream.stream.link(xlink.input)
+            xout_streams[xstream.name].append(xout.device_msg_callback)
+
+
+class VisualizePacketHandler(BasePacketHandler):
+    def __init__(self,
+                 outputs,
+                 visualizer: Visualizer,
+                 callback: Callable = None,
+                 record_path: Optional[str] = None,
+                 main_thread: bool = True,
+                 ):
+        self._save_outputs(outputs)
+
+        if 1 < len(self.outputs) and record_path is not None:
+            raise Exception('Recording multiple streams is not supported! '
+                            'Call oak.visualize(out, record_path="vid.mp4") for each stream separately')
+
+        self.callback = callback  # Callback that gets called after syncing
+        self.visualizer = visualizer
+        self.record_path = record_path
+        self.recorder = None
+        # Main thread: if opencv visualizer, then we need to poll it
+        super().__init__(main_thread)
+
+    def setup(self, pipeline: dai.Pipeline, device: dai.Device, xout_streams: Dict[str, List]):
+        for output in self.outputs:
+            xout: XoutBase = output(device)
+            self._create_xout(pipeline, xout, xout_streams)
+
+    def new_packet(self, packet: BasePacket):
+        # Create visualizer objects for the visualizer. These objects will then be visualized
+        # by the selected visualizer
+        packet.prepare_visualizer_objects(self.visualizer)
+
+        if self.callback:
+            # Add self.visualizer to packet attributes
+            packet.visualizer = self.visualizer
+            self.callback(packet)
+        else:
+            self.visualizer.show(packet)
+
+        if self.recorder:
+            self.recorder.write(packet)
+
+    def close(self):
+        self.visualizer.close()
+
+
+class RecordPacketHandler(BasePacketHandler):
+    def __init__(self, outputs, recorder: Record):
+        self._save_outputs(outputs)
+        self.recorder = recorder
+        super().__init__()
+
+    def setup(self, pipeline: dai.Pipeline, device: dai.Device, xout_streams: Dict[str, List]):
+        xouts: List[XoutFrames] = []
+        for output in self.outputs:
+            xout = output(device)
+            xouts.append(xout)
+            self._create_xout(pipeline, xout, xout_streams)
+
+        self.recorder.start(device, xouts)
+
+    def new_packet(self, packet: BasePacket):
+        self.recorder.write(packet)
+
+    def close(self):
+        self.recorder.close()
+
+
+class CallbackPacketHandler(BasePacketHandler):
+    def __init__(self, outputs, callback: Callable, main_thread=False):
+        self._save_outputs(outputs)
+        self.callback = callback
+        super().__init__(main_thread)
+
+    def setup(self, pipeline: dai.Pipeline, device: dai.Device, xout_streams: Dict[str, List]):
+        for output in self.outputs:
+            xout = output(device)
+            self._create_xout(pipeline, xout, xout_streams)
+
+    def new_packet(self, packet):
+        self.callback(packet)
+
+
+class QueuePacketHandler(BasePacketHandler):
+    def __init__(self, outputs, max_size: int):
+        super().__init__()
+        self._save_outputs(outputs)
+        self.queue = Queue(max_size)
+
+    def get_queue(self) -> Queue:
+        return self.queue
+
+    def setup(self, pipeline: dai.Pipeline, device: dai.Device, xout_streams: Dict[str, List]):
+        for output in self.outputs:
+            xout = output(device)
+            self._create_xout(pipeline, xout, xout_streams)
+
+    def configure_syncing(self,
+                          enable_sync: bool = True,
+                          threshold_ms: int = 17) -> 'QueuePacketHandler':
+        """
+        If multiple outputs are used, then PacketHandler can do timestamp syncing of multiple packets
+        before calling new_packet().
+        Args:
+            enable_sync: If True, then syncing is enabled.
+            threshold_ms: Maximum time difference between packets in milliseconds.
+        """
+        super().configure_syncing(enable_sync, threshold_ms)
+        return self
+
+    def new_packet(self, packet):
+        # It won't be called, we just added this function to satisfy the abstract class
+        pass
+
+
+class RosPacketHandler(BasePacketHandler):
+    def __init__(self, outputs):
+        super().__init__()
+        self._save_outputs(outputs)
+
+        envs = os.environ
+        if 'ROS_VERSION' not in envs:
+            raise Exception('ROS installation not found! Please install or source the ROS you would like to use.')
+
+        version = envs['ROS_VERSION']
+        if version == '1':
+            raise Exception('ROS1 publsihing is not yet supported!')
+            from depthai_sdk.integrations.ros.ros1_streaming import Ros1Streaming
+            self.ros = Ros1Streaming()
+        elif version == '2':
+            from depthai_sdk.integrations.ros.ros2_streaming import Ros2Streaming
+            self.ros = Ros2Streaming()
+        else:
+            raise Exception(f"ROS version '{version}' not recognized! Should be either '1' or '2'")
+
+    def setup(self, pipeline: dai.Pipeline, device: dai.Device, xout_streams: Dict[str, List]):
+        xouts = []
+        for output in self.outputs:
+            xout = output(device)
+            self._create_xout(pipeline, xout, xout_streams)
+            xouts.append(xout)
+
+        self.ros.update(device, xouts)
+
+    def new_packet(self, packet):
+        # self.ros.new_msg(name, msg)
+        # TODO: implement
+        pass
+
+    # def is_ros1(self) -> bool:
+    #     try:
+    #         import rospy
+    #         return True
+    #     except:
+    #         return False
+    #
+    # def is_ros2(self):
+    #     try:
+    #         import rclpy
+    #         return True
+    #     except:
+    #         return False
+
+
+class TriggerActionPacketHandler(BasePacketHandler):
+    def __init__(self, trigger: Trigger, action: Union[Callable, Action]):
+        super().__init__()
+        self.trigger = trigger
+        self.action = Action(None, action) if isinstance(action, Callable) else action
+        self.controller = TriggerAction(self.trigger, self.action)
+
+    def setup(self, pipeline: dai.Pipeline, device: dai.Device, xout_streams: Dict[str, List]):
+        trigger_xout: XoutBase = self.trigger.input(device)
+        self._create_xout(pipeline=pipeline,
+                          xout=trigger_xout,
+                          xout_streams=xout_streams,
+                          custom_callback=self.controller.new_packet_trigger,
+                          custom_packet_postfix='trigger')
+
+        if isinstance(self.action, Callable):
+            self._save_outputs([trigger_xout])
+            return
+
+        action_xouts = []
+        if self.action.inputs:
+            for output in self.action.inputs:
+                xout: XoutBase = output(device)
+                xout.new_packet_callback = self.controller.new_packet_action
+                self._create_xout(pipeline=pipeline,
+                                  xout=xout,
+                                  xout_streams=xout_streams,
+                                  custom_callback=self.controller.new_packet_action,
+                                  custom_packet_postfix='action')
+                action_xouts.append(xout)
+
+        if isinstance(self.action, RecordAction):
+            self.action.setup(device, action_xouts)  # creates writers for VideoRecorder()
+
+        self._save_outputs([trigger_xout] + action_xouts)
+
+    def new_packet(self, packet):
+        pass
+
+
+class StreamPacketHandler(BasePacketHandler):
+    """
+    TODO. API:
+    oak.stream_rtsp([color, left, right], port=8888)
+    oak.stream_webrtc(color, port=8881)
+
+    Creates a server and just sends forward
+    the frames. Doesn't use any queues.
+
+    """
+    pass
diff --git a/depthai_sdk/src/depthai_sdk/classes/packets.py b/depthai_sdk/src/depthai_sdk/classes/packets.py
index 0fa36c904..931821b0f 100644
--- a/depthai_sdk/src/depthai_sdk/classes/packets.py
+++ b/depthai_sdk/src/depthai_sdk/classes/packets.py
@@ -1,58 +1,65 @@
-from typing import Tuple, List, Union, Optional
+from abc import ABC, abstractmethod
+from datetime import timedelta
+from typing import Sequence, Tuple, List, Union, Optional, Dict, Callable
 
 import depthai as dai
 import numpy as np
 
+from depthai_sdk.classes import ImgLandmarks, SemanticSegmentation
+from depthai_sdk.classes.nn_results import Detection, TrackingDetection, TwoStageDetection
+from depthai_sdk.visualize.bbox import BoundingBox
+from depthai_sdk.visualize.configs import StereoColor, TextPosition
+from depthai_sdk.visualize.visualizer import Visualizer
+
 try:
     import cv2
 except ImportError:
     cv2 = None
 
 
-class _Detection:
-    # Original ImgDetection
-    img_detection: dai.ImgDetection
-    label_str: str
-    color: Tuple[int, int, int]
-
-    # Normalized bounding box
-    top_left: Tuple[int, int]
-    bottom_right: Tuple[int, int]
-
-    def centroid(self) -> Tuple[int, int]:
-        return (
-            int((self.bottom_right[0] + self.top_left[0]) / 2),
-            int((self.bottom_right[1] + self.top_left[1]) / 2),
-        )
+class BasePacket(ABC):
+    """
+    Base class for all packets.
+    """
 
-    def get_bbox(self) -> Tuple[float, float, float, float]:
-        return self.img_detection.xmin, self.img_detection.ymin, self.img_detection.xmax, self.img_detection.ymax
+    def __init__(self, name: str):
+        self.name = name
 
+    def prepare_visualizer_objects(self, visualizer: 'Visualizer') -> None:
+        """
+        Prepare visualizer objects (boxes, lines, text, etc.), so visualizer can draw them on the frame.
 
-class _TrackingDetection(_Detection):
-    tracklet: dai.Tracklet
-    speed: float = 0.0  # m/s
-    speed_kmph: float = 0.0  # km/h
-    speed_mph: float = 0.0  # mph
+        Args:
+            visualizer: Visualizer object.
+        """
+        pass
 
+    @abstractmethod
+    def get_timestamp(self) -> timedelta:
+        raise NotImplementedError()
 
-class _TwoStageDetection(_Detection):
-    nn_data: dai.NNData
+    @abstractmethod
+    def get_sequence_num(self) -> int:
+        raise NotImplementedError()
 
 
-class NNDataPacket:
+class NNDataPacket(BasePacket):
     """
     Contains only dai.NNData message
     """
-    name: str  # NNData stream name
-    msg: dai.NNData  # Original depthai message
 
     def __init__(self, name: str, nn_data: dai.NNData):
-        self.name = name
         self.msg = nn_data
+        super().__init__(name)
+
+    def get_timestamp(self) -> timedelta:
+        return self.msg.getTimestamp()
 
+    def get_sequence_num(self) -> int:
+        return self.msg.getTimestampDevice()
 
-class FramePacket:
+
+class FramePacket(BasePacket):
     """
     Contains only dai.ImgFrame message and cv2 frame, which is used by visualization logic.
     """
@@ -60,65 +67,268 @@ class FramePacket:
     def __init__(self,
                  name: str,
                  msg: dai.ImgFrame,
-                 frame: Optional[np.ndarray],
-                 visualizer: 'Visualizer' = None):
-        self.name = name
+                 ):
         self.msg = msg
-        self.frame = frame
+        self._get_codec = None
+        self.__frame = None
+        super().__init__(name)
+
+    @property
+    def frame(self):
+        if self.__frame is None:
+            self.__frame = self.decode()
+        return self.__frame
 
-        self.visualizer = visualizer
+    def get_timestamp(self) -> timedelta:
+        return self.msg.getTimestampDevice(dai.CameraExposureOffset.MIDDLE)
 
+    def get_sequence_num(self) -> int:
+        return self.msg.getSequenceNum()
 
-class PointcloudPacket:
+    def set_decode_codec(self, get_codec: Callable):
+        self._get_codec = get_codec
+
+    def decode(self) -> Optional[np.ndarray]:
+        if self._get_codec is None:
+            return self.msg.getCvFrame() if cv2 else None
+
+        codec = self._get_codec()
+        if codec is None:
+            return self.msg.getCvFrame() if cv2 else None
+
+        # PyAV decoding support H264, H265, JPEG and Lossless JPEG
+        enc_packets = codec.parse(self.msg.getData())
+        if len(enc_packets) == 0:
+            return None
+
+        frames = codec.decode(enc_packets[-1])
+        if not frames:
+            return None
+
+        return frames[0].to_ndarray(format='bgr24')
+
+    def get_size(self) -> Tuple[int, int]:
+        return self.msg.getWidth(), self.msg.getHeight()
+
+
+class DisparityPacket(FramePacket):
     def __init__(self,
                  name: str,
-                 points: np.ndarray,
-                 depth_map: dai.ImgFrame,
-                 color_frame: Optional[np.ndarray],
-                 visualizer: 'Visualizer' = None):
-        self.name = name
-        self.points = points
-        self.depth_imgFrame = dai.ImgFrame
-        self.color_frame = color_frame
-        self.visualizer = visualizer
+                 img: dai.ImgFrame,
+                 multiplier: float,
+                 disparity_map: Optional[np.ndarray] = None,
+                 colorize: StereoColor = None,
+                 colormap: int = None,
+                 mono_frame: Optional[dai.ImgFrame] = None,
+                 ):
+        """
+        disparity_map might be filtered, eg. if WLS filter is enabled
+        """
+        super().__init__(name=name, msg=img)
+        self.mono_frame = mono_frame
+        self.disparity_map = disparity_map
+        self.multiplier = multiplier
+        self.colorize = colorize
+        self.colormap = colormap
+
+    def get_disparity(self) -> np.ndarray:
+        if self.disparity_map is not None:
+            return self.disparity_map
+        else:
+            self.msg.getFrame()
+
+    def get_colorized_frame(self, visualizer) -> np.ndarray:
+        frame = self.get_disparity()
+        colorized_disp = frame * self.multiplier
+
+        try:
+            mono_frame = self.mono_frame.getCvFrame()
+        except AttributeError:
+            mono_frame = None
+
+        stereo_config = visualizer.config.stereo
+
+        colorize = self.colorize or stereo_config.colorize
+        if self.colormap is not None:
+            colormap = self.colormap
+        else:
+            colormap = stereo_config.colormap
+            colormap[0] = [0, 0, 0]  # Invalidate pixels 0 to be black
+
+        if mono_frame is not None and colorized_disp.ndim == 2 and mono_frame.ndim == 3:
+            colorized_disp = colorized_disp[..., np.newaxis]
+
+        if colorize == StereoColor.GRAY:
+            pass
+        elif colorize == StereoColor.RGB:
+            colorized_disp = cv2.applyColorMap(colorized_disp.astype(np.uint8), colormap)
+        elif colorize == StereoColor.RGBD:
+            colorized_disp = cv2.applyColorMap(
+                (colorized_disp + mono_frame * 0.5).astype(np.uint8), colormap
+            )
+        return colorized_disp
 
 
 class DepthPacket(FramePacket):
-    mono_frame: dai.ImgFrame
+    def __init__(self, name: str,
+                 msg: dai.ImgFrame):
+        super().__init__(name, msg)
+        self.depth = msg.getFrame()
+
 
+class DisparityDepthPacket(DisparityPacket):
     def __init__(self,
                  name: str,
                  img_frame: dai.ImgFrame,
-                 mono_frame: Optional[dai.ImgFrame],
-                 depth_map: Optional[np.ndarray] = None,
-                 visualizer: 'Visualizer' = None):
-        super().__init__(name=name,
-                         msg=img_frame,
-                         frame=img_frame.getCvFrame() if cv2 else None,
-                         visualizer=visualizer)
+                 colorize: StereoColor = None,
+                 colormap: int = None,
+                 mono_frame: Optional[dai.ImgFrame] = None,
+                 disp_scale_factor=255 / 95,
+                 ):
+        # DepthPacket.__init__(self, name=name, msg=img_frame)
+        super().__init__(
+            name=name,
+            img=img_frame,
+            disparity_map=None,
+            multiplier=255 / 95,
+            colorize=colorize,
+            colormap=colormap,
+            mono_frame=mono_frame,
+        )
+        self.disp_scale_factor = disp_scale_factor
 
-        if mono_frame is not None:
-            self.mono_frame = mono_frame
+    def get_disparity(self) -> np.ndarray:
+        with np.errstate(divide='ignore'):
+            disparity = self.disp_scale_factor / self.msg.getFrame()
+        disparity[disparity == np.inf] = 0
+        return disparity
 
+    # def get_colorized_frame(self, visualizer) -> np.ndarray:
+    # Convert depth to disparity for nicer visualization
+
+
+class PointcloudPacket(BasePacket):
+    def __init__(self,
+                 name: str,
+                 points: np.ndarray,
+                 depth_map: dai.ImgFrame,
+                 colorize_frame: Optional[dai.ImgFrame]):
+        super().__init__(name=name)
+        self.points = points
+        self.colorize_frame = colorize_frame.getCvFrame() if colorize_frame is not None else None
         self.depth_map = depth_map
 
-class SpatialBbMappingPacket(FramePacket):
+    def get_sequence_num(self) -> int:
+        return self.depth_map.getSequenceNum()
+
+    def get_timestamp(self) -> timedelta:
+        return self.depth_map.getTimestampDevice()
+
+
+class SpatialBbMappingPacket(DisparityDepthPacket):
     """
     Output from Spatial Detection nodes - depth frame + bounding box mappings. Inherits FramePacket.
     """
-    spatials: dai.SpatialImgDetections
 
     def __init__(self,
                  name: str,
                  msg: dai.ImgFrame,
                  spatials: dai.SpatialImgDetections,
-                 visualizer: 'Visualizer' = None):
+                 disp_scale_factor: float):
         super().__init__(name=name,
-                         msg=msg,
-                         frame=msg.getFrame() if cv2 else None,
-                         visualizer=visualizer)
+                         img_frame=msg,
+                         disp_scale_factor=disp_scale_factor)
         self.spatials = spatials
 
+    def prepare_visualizer_objects(self, vis: Visualizer) -> None:
+        # Add detections to packet
+        for detection in self.spatials.detections:
+            br = detection.boundingBoxMapping.roi.bottomRight()
+            tl = detection.boundingBoxMapping.roi.topLeft()
+            bbox = BoundingBox([tl.x, tl.y, br.x, br.y])
+            # Add detections to visualizer
+            vis.add_bbox(
+                bbox=bbox,
+                thickness=3,
+                color=(0, 0, 0)
+            )
+            vis.add_bbox(
+                bbox=bbox,
+                thickness=1,
+                color=(255, 255, 255)
+            )
+
+
+class NnOutputPacket(FramePacket):
+    """
+    NN result + image frame. Inherits FramePacket.
+    """
+
+    def __init__(self,
+                 name: str,
+                 msg: dai.ImgFrame,
+                 nn_data: dai.NNData,
+                 bbox: BoundingBox
+                 ):
+        super().__init__(name=name,
+                         msg=msg)
+        self.nn_data = nn_data
+        self.bbox = bbox
+
+
+class ImgLandmarksPacket(NnOutputPacket):
+    """
+    Output from Landmarks Estimation nodes - image frame + landmarks. Inherits NnOutputPacket.
+    """
+
+    def __init__(self,
+                 name: str,
+                 msg: dai.ImgFrame,
+                 nn_data: dai.NNData,
+                 landmarks: ImgLandmarks,
+                 bbox: BoundingBox):
+        super().__init__(name=name,
+                         msg=msg,
+                         nn_data=nn_data,
+                         bbox=bbox)
+        self.landmarks = landmarks
+
+    def prepare_visualizer_objects(self, vis: Visualizer) -> None:
+        all_landmarks = self.landmarks.landmarks
+        all_landmarks_indices = self.landmarks.landmarks_indices
+        colors = self.landmarks.colors
+        w, h = self.get_size()
+        for landmarks, indices in zip(all_landmarks, all_landmarks_indices):
+            for i, landmark in enumerate(landmarks):
+                # Map normalized coordinates to frame coordinates
+                l = [(int(point[0] * w), int(point[1] * h)) for point in landmark]
+                idx = indices[i]
+
+                vis.add_line(pt1=tuple(l[0]), pt2=tuple(l[1]), color=colors[idx], thickness=4)
+                vis.add_circle(coords=tuple(l[0]), radius=8, color=colors[idx], thickness=-1)
+                vis.add_circle(coords=tuple(l[1]), radius=8, color=colors[idx], thickness=-1)
+
+
+class SemanticSegmentationPacket(NnOutputPacket):
+    """
+    Output from Semantic Segmentation nodes - image frame + segmentation mask. Inherits NnOutputPacket.
+    """
+
+    def __init__(self,
+                 name: str,
+                 msg: dai.ImgFrame,
+                 nn_data: dai.NNData,
+                 segmentation: SemanticSegmentation,
+                 bbox: BoundingBox):
+        super().__init__(name=name,
+                         msg=msg,
+                         nn_data=nn_data,
+                         bbox=bbox)
+        self.segmentation = segmentation
+
+    def prepare_visualizer_objects(self, vis: Visualizer) -> None:
+        raise NotImplementedError('Semantic segmentation visualization is not implemented yet!')
+
 
 class DetectionPacket(FramePacket):
     """
@@ -128,26 +338,44 @@ class DetectionPacket(FramePacket):
     def __init__(self,
                  name: str,
                  msg: dai.ImgFrame,
-                 img_detections: Union[dai.ImgDetections, dai.SpatialImgDetections],
-                 visualizer: 'Visualizer' = None):
+                 dai_msg: Union[dai.ImgDetections, dai.SpatialImgDetections, dai.NNData],
+                 bbox: BoundingBox,
+                 ):
+
         super().__init__(name=name,
-                         msg=msg,
-                         frame=msg.getCvFrame() if cv2 else None,
-                         visualizer=visualizer)
-        self.img_detections = img_detections
-        self.detections = []
+                         msg=msg)
+
+        self.img_detections = dai_msg
+        self.bbox = bbox
+        self.detections: List[Detection] = []
 
     def _is_spatial_detection(self) -> bool:
         return isinstance(self.img_detections, dai.SpatialImgDetections)
 
-    def _add_detection(self, img_det: dai.ImgDetection, bbox: np.ndarray, txt: str, color) -> None:
-        det = _Detection()
-        det.img_detection = img_det
-        det.label_str = txt
-        det.color = color
-        det.top_left = (bbox[0], bbox[1])
-        det.bottom_right = (bbox[2], bbox[3])
-        self.detections.append(det)
+    def prepare_visualizer_objects(self, vis: Visualizer) -> None:
+        # Add detections to packet
+        for detection in self.detections:
+            # Add detections to visualizer
+            vis.add_bbox(
+                bbox=self.bbox.get_relative_bbox(detection.bbox),
+                # label=detection.label_str,
+                color=detection.color,
+            )
+            vis.add_text(
+                f'{detection.label_str} {100 * detection.confidence:.0f}%',
+                bbox=self.bbox.get_relative_bbox(detection.bbox),
+                position=TextPosition.TOP_LEFT,
+            )
+            # Spatial coordinates
+            if type(detection.img_detection) == dai.SpatialImgDetection:
+                x_meters = detection.img_detection.spatialCoordinates.x / 1000
+                y_meters = detection.img_detection.spatialCoordinates.y / 1000
+                z_meters = detection.img_detection.spatialCoordinates.z / 1000
+                vis.add_text(
+                    f'X: {x_meters:.2f}m\nY: {y_meters:.2f}m\nZ: {z_meters:.2f}m',
+                    bbox=self.bbox.get_relative_bbox(detection.bbox),
+                    position=TextPosition.BOTTOM_LEFT,
+                )
 
 
 class TrackerPacket(FramePacket):
@@ -159,32 +387,62 @@ def __init__(self,
                  name: str,
                  msg: dai.ImgFrame,
                  tracklets: dai.Tracklets,
-                 visualizer: 'Visualizer' = None):
+                 bbox: BoundingBox,
+                 ):
         super().__init__(name=name,
-                         msg=msg,
-                         frame=msg.getCvFrame() if cv2 else None,
-                         visualizer=visualizer)
-        self.detections: List[_TrackingDetection] = []
-        self.daiTracklets = tracklets
+                         msg=msg)
 
-    def _add_detection(self, img_det: dai.ImgDetection, bbox: np.ndarray, txt: str, color):
-        det = _TrackingDetection()
-        det.img_detection = img_det
-        det.label_str = txt
-        det.color = color
-        det.top_left = (bbox[0], bbox[1])
-        det.bottom_right = (bbox[2], bbox[3])
-        self.detections.append(det)
+        # int: object_id, list: TrackingDetection
+        self.tracklets: Dict[int, List[TrackingDetection]] = {}
+        self.daiTracklets = tracklets
+        self.bbox = bbox
 
     def _is_spatial_detection(self) -> bool:
         coords = self.daiTracklets.tracklets[0].spatialCoordinates
         return coords.x != 0.0 or coords.y != 0.0 or coords.z != 0.0
 
-    def _get_spatials(self, det: dai.ImgDetection) -> dai.Point3f:
-        # Not the cleanest solution, but oh well
-        for t in self.daiTracklets.tracklets:
-            if t.srcImgDetection == det:
-                return t.spatialCoordinates
+    def prepare_visualizer_objects(self, visualizer: Visualizer) -> None:
+        tracking_config = visualizer.config.tracking
+        for obj_id, tracking_dets in self.tracklets.items():
+            tracking_det = tracking_dets[-1]  # Get the last detection
+            bb = tracking_det.filtered_2d or tracking_det.bbox
+            visualizer.add_bbox(
+                bbox=self.bbox.get_relative_bbox(bb),
+                label=f"[{obj_id}] {tracking_det.label_str}",
+                color=tracking_det.color,
+            )
+            visualizer.add_text(
+                f'{tracking_det.label_str} {100 * tracking_det.confidence:.0f}%',
+                bbox=self.bbox.get_relative_bbox(bb),
+                position=TextPosition.TOP_LEFT,
+            )
+            if visualizer.config.tracking.show_speed and \
+                    tracking_det.speed is not None:
+                visualizer.add_text(
+                    text=f"{tracking_det.speed:.2f} m/s",
+                    color=tracking_det.color,
+                    bbox=self.bbox.get_relative_bbox(bb),
+                    position=TextPosition.BOTTOM_RIGHT,
+                )
+            w, h = self.get_size()
+            tracklet_length = 0
+            for i in reversed(range(len(tracking_dets) - 1)):
+                p1 = self.bbox.get_relative_bbox(tracking_dets[i].bbox).get_centroid().denormalize((h, w))
+                p2 = self.bbox.get_relative_bbox(tracking_dets[i + 1].bbox).get_centroid().denormalize((h, w))
+
+                if tracking_config.max_length != -1:
+                    tracklet_length += np.linalg.norm(np.array(p1) - np.array(p2))
+                    if tracking_config.max_length < tracklet_length:
+                        break
+
+                thickness = tracking_config.line_thickness
+                if tracking_config.fading_tails:
+                    thickness = max(1, int(np.ceil(thickness * i / len(tracking_dets))))
+
+                visualizer.add_line(pt1=p1, pt2=p2,
+                                    color=tracking_dets[i].color,
+                                    thickness=thickness
+                                    )
 
 
 class TwoStagePacket(DetectionPacket):
@@ -197,18 +455,19 @@ def __init__(self, name: str,
                  img_detections: dai.ImgDetections,
                  nn_data: List[dai.NNData],
                  labels: List[int],
-                 visualizer: 'Visualizer' = None):
+                 bbox: BoundingBox
+                 ):
         super().__init__(name=name,
                          msg=msg,
-                         img_detections=img_detections,
-                         visualizer=visualizer)
-        self.frame = self.msg.getCvFrame() if cv2 else None
+                         dai_msg=img_detections,
+                         bbox=bbox
+                         )
         self.nnData = nn_data
         self.labels = labels
         self._cntr = 0
 
     def _add_detection(self, img_det: dai.ImgDetection, bbox: np.ndarray, txt: str, color):
-        det = _TwoStageDetection()
+        det = TwoStageDetection()
         det.img_detection = img_det
         det.color = color
         det.top_left = (bbox[0], bbox[1])
@@ -222,27 +481,57 @@ def _add_detection(self, img_det: dai.ImgDetection, bbox: np.ndarray, txt: str,
         self.detections.append(det)
 
 
-class IMUPacket:
-    def __init__(self, data: List[dai.IMUData]):
-        self.data = data
+class IMUPacket(BasePacket):
+    def __init__(self, name, packet: dai.IMUPacket, rotation=None):
+        self.packet = packet
+        super().__init__(name)
+
+        self.acceleroMeter = packet.acceleroMeter
+        self.gyroscope = packet.gyroscope
+        self.magneticField = packet.magneticField
+        self.rotationVector = rotation if rotation is not None else packet.rotationVector
+
+        # Check which reports are available
+        self.available_reports: Dict[str, dai.IMUReport] = {}
+        for i, val in enumerate([self.acceleroMeter, self.gyroscope, self.magneticField, self.rotationVector]):
+            if (i == 3 and rotation) or val.getTimestampDevice() != timedelta(0):
+                self.available_reports[val.__class__.__name__] = val
+
+    def get_imu_vals(self) -> Tuple[Sequence, Sequence, Sequence, Sequence]:
+        """
+        Returns imu values in a tuple. Returns in format (accelerometer_values, gyroscope_values, quaternion, magnetometer_values)
+        """
+        return (
+            [self.acceleroMeter.x, self.acceleroMeter.y, self.acceleroMeter.z],
+            [self.gyroscope.x, self.gyroscope.y, self.gyroscope.z],
+            [self.rotationVector.i, self.rotationVector.j, self.rotationVector.k, self.rotationVector.real],
+            [self.magneticField.x, self.magneticField.y, self.magneticField.z]
+        )
 
     def __str__(self):
-        packet_details = []
-
-        for imu_data in self.data:
-            # TODO print more details if needed
-            accelerometer_str = 'Accelerometer [m/s^2]: (x: %.2f, y: %.2f, z: %.2f)' % (
-                imu_data.acceleroMeter.x,
-                imu_data.acceleroMeter.y,
-                imu_data.acceleroMeter.z
-            )
+        accelerometer_str = 'Accelerometer [m/s^2]: (x: %.2f, y: %.2f, z: %.2f)' % (
+            self.packet.acceleroMeter.x,
+            self.packet.acceleroMeter.y,
+            self.packet.acceleroMeter.z
+        )
 
-            gyroscope_str = 'Gyroscope [rad/s]: (x: %.2f, y: %.2f, z: %.2f)' % (
-                imu_data.gyroscope.x,
-                imu_data.gyroscope.y,
-                imu_data.gyroscope.z
-            )
+        gyroscope_str = 'Gyroscope [rad/s]: (x: %.2f, y: %.2f, z: %.2f)' % (
+            self.packet.gyroscope.x,
+            self.packet.gyroscope.y,
+            self.packet.gyroscope.z
+        )
+
+        return f'IMU Packet: {accelerometer_str} {gyroscope_str}'
+
+    def _get_imu_report(self) -> dai.IMUReport:
+        """
+        Get the first available IMU report
+        """
+        for name, val in self.available_reports.items():
+            return val
 
-            packet_details.append(f'{accelerometer_str}, {gyroscope_str})')
+    def get_timestamp(self) -> timedelta:
+        return self._get_imu_report().getTimestampDevice()
 
-        return f'IMU Packet: {packet_details}'
+    def get_sequence_num(self) -> int:
+        return self._get_imu_report().getSequenceNum()
diff --git a/depthai_sdk/src/depthai_sdk/components/camera_component.py b/depthai_sdk/src/depthai_sdk/components/camera_component.py
index d304bfcae..34ae1534a 100644
--- a/depthai_sdk/src/depthai_sdk/components/camera_component.py
+++ b/depthai_sdk/src/depthai_sdk/components/camera_component.py
@@ -2,27 +2,27 @@
 from typing import Dict
 
 from depthai_sdk.classes.enum import ResizeMode
+from depthai_sdk.components.camera_control import CameraControl
 from depthai_sdk.components.camera_helper import *
-from depthai_sdk.components.component import Component
-from depthai_sdk.components.parser import parse_resolution, parse_encode, parse_camera_socket
+from depthai_sdk.components.component import Component, ComponentOutput
+from depthai_sdk.components.parser import parse_resolution, parse_encode, encoder_profile_to_fourcc
 from depthai_sdk.oak_outputs.xout.xout_base import XoutBase, StreamXout, ReplayStream
 from depthai_sdk.oak_outputs.xout.xout_frames import XoutFrames
-from depthai_sdk.oak_outputs.xout.xout_h26x import XoutH26x
-from depthai_sdk.oak_outputs.xout.xout_mjpeg import XoutMjpeg
 from depthai_sdk.replay import Replay
-from depthai_sdk.components.camera_control import CameraControl
+from depthai_sdk.types import Resolution
 
 
 class CameraComponent(Component):
     def __init__(self,
                  device: dai.Device,
                  pipeline: dai.Pipeline,
-                 source: Union[str, dai.CameraBoardSocket],
+                 source: dai.CameraBoardSocket,
                  resolution: Optional[Union[
                      str, dai.ColorCameraProperties.SensorResolution, dai.MonoCameraProperties.SensorResolution
                  ]] = None,
                  fps: Optional[float] = None,
                  encode: Union[None, str, bool, dai.VideoEncoderProperties.Profile] = None,
+                 sensor_type: Optional[dai.CameraSensorType] = None,
                  rotation: Optional[int] = None,
                  replay: Optional[Replay] = None,
                  name: Optional[str] = None,
@@ -39,14 +39,19 @@ def __init__(self,
             resolution (optional): Camera resolution, eg. '800p' or '4k'
             fps (float, optional): Camera FPS
             encode: Encode streams before sending them to the host. Either True (use default), or mjpeg/h264/h265
+            sensor_type: To force color/mono/tof camera
             rotation (int, optional): Rotate the camera by 90, 180, 270 degrees
             replay (Replay object): Replay object to use for mocking the camera
             name (str, optional): Name of the output stream
             args (Dict): Use user defined arguments when constructing the pipeline
         """
         super().__init__()
+        # _replay should be set before .out, as it's used in .out
+        self._replay: Optional[Replay] = replay
         self.out = self.Out(self)
+
         self._pipeline = pipeline
+        self._device = device
 
         self.node: Optional[Union[dai.node.ColorCamera, dai.node.MonoCamera, dai.node.XLinkIn]] = None
         self.encoder: Optional[dai.node.VideoEncoder] = None
@@ -55,8 +60,13 @@ def __init__(self,
         self.stream_size: Optional[Tuple[int, int]] = None  # Output size
 
         self._source = str(source)
+        if self._source.startswith('CameraBoardSocket.'):
+            self._source = self._source[len('CameraBoardSocket.'):]
+
+        self._socket = source
         self._replay: Optional[Replay] = replay
         self._args: Dict = args
+
         self.name = name
 
         if rotation not in [None, 0, 90, 180, 270]:
@@ -66,8 +76,14 @@ def __init__(self,
         self._preview_num_frames_pool = 4
 
         if self.is_replay():
-            if source.casefold() not in list(map(lambda x: x.casefold(), self._replay.getStreams())):
+            stream_name = None
+            for name, stream in self._replay.streams.items():
+                if stream.get_socket() == self._socket:
+                    stream_name = name
+                    break
+            if stream_name is None:
                 raise Exception(f"{source} stream was not found in specified depthai-recording!")
+            self._source = stream_name
             res = self._replay.getShape(self._source)
             # print('resolution', res)
             # resize = getResize(res, width=1200)
@@ -91,44 +107,22 @@ def __init__(self,
         # Livestreaming, not replay
         else:
             node_type: dai.node = None
-            if isinstance(source, str):
-                source = source.upper()
-                # When sensors can be either color or mono (eg. AR0234), we allow specifying it
-                if "," in source:  # For sensors that support multiple
-                    parts = source.split(',')
-                    source = parts[0]
-                    if parts[1] in ["C", "COLOR"]:
-                        node_type = dai.node.ColorCamera
-                    elif parts[1] in ["M", "MONO"]:
-                        node_type = dai.node.MonoCamera
-                    else:
-                        raise Exception(
-                            "Please specify sensor type with c/color or m/mono after the ','"
-                            " - eg. `cam = oak.create_camera('cama,c')`"
-                        )
-                elif source in ["COLOR", "RGB"]:
-                    for features in device.getConnectedCameraFeatures():
-                        if dai.CameraSensorType.COLOR in features.supportedTypes:
-                            source = features.socket
-                            break
-                    if not isinstance(source, dai.CameraBoardSocket):
-                        raise ValueError("Couldn't find a color camera!")
-
-            socket = parse_camera_socket(source)
-            sensor = [f for f in device.getConnectedCameraFeatures() if f.socket == socket][0]
-
-            if node_type is None:  # User specified camera type
-                type = sensor.supportedTypes[0]
-                if type == dai.CameraSensorType.COLOR:
-                    node_type = dai.node.ColorCamera
-                elif type == dai.CameraSensorType.MONO:
-                    node_type = dai.node.MonoCamera
-                else:
-                    raise Exception(f"{sensor} doesn't support either COLOR or MONO ")
+            sensors = [f for f in device.getConnectedCameraFeatures() if f.socket == source]
+            if len(sensors) == 0:
+                raise Exception(f"No camera found on user-specified socket {source}")
+            sensor = sensors[0]
+
+            sensor_type = sensor_type or sensor.supportedTypes[0]
+            if sensor_type == dai.CameraSensorType.COLOR:
+                node_type = dai.node.ColorCamera
+            elif sensor_type == dai.CameraSensorType.MONO:
+                node_type = dai.node.MonoCamera
+            else:
+                raise Exception(f"{sensor} doesn't support either COLOR or MONO ")
 
             # Create the node, and set the socket
             self.node = pipeline.create(node_type)
-            self.node.setBoardSocket(socket)
+            self.node.setBoardSocket(source)
 
         self._resolution_forced: bool = resolution is not None
         if resolution:
@@ -147,13 +141,22 @@ def __init__(self,
             if not self._resolution_forced:  # Find the closest resolution
                 sensor = [f for f in device.getConnectedCameraFeatures() if f.socket == self.node.getBoardSocket()][0]
                 sensor_type = dai.CameraSensorType.COLOR if dai.node.ColorCamera else dai.CameraSensorType.MONO
-                res = getClosesResolution(sensor, sensor_type, width=1300)
+                targetWidthRes = 1300
+                targetWidthIsp = targetWidthRes
+                if self._args["defaultResolution"] == "min":
+                    targetWidthRes = 0
+                    targetWidthIsp = 1300  # Still keep the same target for the ISP
+                elif self._args["defaultResolution"] == "max":
+                    targetWidthRes = 1000000  # Some big number
+                    targetWidthIsp = targetWidthRes
+                res = getClosesResolution(sensor, sensor_type, width=targetWidthRes)
                 self.node.setResolution(res)
-                scale = getClosestIspScale(self.node.getIspSize(), width=1300, videoEncoder=(self.encoder is not None))
+                scale = getClosestIspScale(self.node.getIspSize(), width=targetWidthIsp,
+                                           videoEncoder=(encode is not None))
                 self.node.setIspScale(*scale)
 
             curr_size = self.node.getVideoSize()
-            closest = getClosestVideoSize(*curr_size)
+            closest = getClosestVideoSize(*curr_size, videoEncoder=encode)
             self.node.setVideoSize(*closest)
             self.node.setVideoNumFramesPool(2)  # We will increase it later if we are streaming to host
 
@@ -197,7 +200,6 @@ def __init__(self,
         if self._args:
             self._config_camera_args(self._args)
 
-
         # Runtime camera control
         self.control = CameraControl()
         self._control_xlink_in = None
@@ -205,7 +207,8 @@ def __init__(self,
             self._control_xlink_in = pipeline.create(dai.node.XLinkIn)
             self._control_xlink_in.setStreamName(f"{self.node.id}_inputControl")
             self._control_xlink_in.out.link(self.node.inputControl)
-            self._control_xlink_in.setMaxDataSize(1) # CameraControl message doesn't use any additional data (only metadata)
+            # CameraControl message doesn't use any additional data (only metadata)
+            self._control_xlink_in.setMaxDataSize(1)
 
     def on_pipeline_started(self, device: dai.Device):
         if self._control_xlink_in is not None:
@@ -223,23 +226,21 @@ def _create_rotation_manip(self, pipeline: dai.Pipeline, rotation: int):
         rot_manip.setMaxOutputFrameSize(w * h * 3)
         return rot_manip
 
-    # Should be mono/color camera agnostic. Also call this from __init__ if args is enabled
     def config_camera(self,
                       # preview: Union[None, str, Tuple[int, int]] = None,
                       size: Union[None, Tuple[int, int], str] = None,
                       resize_mode: ResizeMode = ResizeMode.CROP,
                       fps: Optional[float] = None,
-                      resolution: Optional[Union[
-                          str, dai.ColorCameraProperties.SensorResolution, dai.MonoCameraProperties.SensorResolution
-                      ]] = None
+                      resolution: Optional[Resolution] = None
                       ) -> None:
         """
         Configure resolution, scale, FPS, etc.
         """
-
-        # TODO
-        if fps: self.set_fps(fps)
-        if resolution: self._set_resolution(resolution)
+        # TODO: Should be mono/color camera agnostic. Also call this from __init__ if args is enabled
+        if fps:
+            self.set_fps(fps)
+        if resolution:
+            self._set_resolution(resolution)
 
         if size:
             from .parser import parse_size
@@ -255,7 +256,6 @@ def config_camera(self,
                     raise ValueError("Currently only ResizeMode.CROP is supported mode for specifying size!")
             else:
                 # TODO: Use ImageManip to set mono frame size
-
                 raise NotImplementedError("Not yet implemented")
 
     def _config_camera_args(self, args: Dict):
@@ -294,17 +294,16 @@ def control_with_nn(self, detection_component: 'NNComponent', auto_focus=True, a
         :param detection_component: NNComponent that will be used to control the camera
         :param auto_focus: Enable auto focus to the object
         :param auto_exposure: Enable auto exposure to the object
-        :param auto_white_balance: auto white balance to the object
         """
 
         if not auto_focus and not auto_exposure:
-            logging.error(
-                'Attempted to control camera with NN, but both Auto-Focus and Auto-Exposure were disabled! Attempt ignored.'
-            )
+            logging.error('Attempted to control camera with NN, '
+                          'but both Auto-Focus and Auto-Exposure were disabled! Attempt ignored.')
             return
+
         if 'NNComponent' not in str(type(detection_component)):
             raise ValueError('nn_component must be an instance of NNComponent!')
-        if not detection_component._is_detector():
+        if not detection_component.is_detector():
             raise ValueError('nn_component must be a object detection model (YOLO/MobileNetSSD based)!')
 
         from depthai_sdk.components.control_camera_with_nn import control_camera_with_nn
@@ -315,7 +314,7 @@ def control_with_nn(self, detection_component: 'NNComponent', auto_focus=True, a
             nn_output=detection_component.node.out,
             resize_mode=detection_component._ar_resize_mode,
             resolution=self.node.getResolution(),
-            nn_size = detection_component._size,
+            nn_size=detection_component._size,
             af=auto_focus,
             ae=auto_exposure,
             debug=debug
@@ -338,17 +337,14 @@ def config_color_camera(self,
                             chroma_denoise: Optional[int] = None,
                             ) -> None:
         if not self.is_color():
-            logging.info(
-                'Attempted to configure ColorCamera, but this component doesn\'t have it. Config attempt ignored.'
-            )
+            logging.info('Attempted to configure ColorCamera, '
+                         'but this component doesn\'t have it. Config attempt ignored.')
             return
 
         if self.is_replay():
             logging.info('Tried configuring ColorCamera, but replaying is enabled. Config attempt ignored.')
             return
 
-        self.node: dai.node.ColorCamera
-
         if interleaved is not None: self.node.setInterleaved(interleaved)
         if color_order:
             if isinstance(color_order, str):
@@ -377,7 +373,12 @@ def config_color_camera(self,
 
     def _set_resolution(self, resolution):
         if not self.is_replay():
-            self.node.setResolution(parse_resolution(type(self.node), resolution))
+            if isinstance(resolution, str) and resolution.lower() in ['max', 'maximum']:
+                sensor = [f for f in self._device.getConnectedCameraFeatures() if f.socket == self._socket][0]
+                resolution = get_max_resolution(type(self.node), sensor)
+            else:
+                resolution = parse_resolution(type(self.node), resolution)
+            self.node.setResolution(resolution)
         # TODO: support potentially downscaling depthai-recording
 
     def is_replay(self) -> bool:
@@ -437,17 +438,19 @@ def config_encoder_mjpeg(self,
         if lossless is not None:
             self.encoder.setLossless(lossless)
 
-    def get_stream_xout(self) -> StreamXout:
-        if self.is_replay():
-            return ReplayStream(self._source)
+    def get_stream_xout(self, fourcc: Optional[str] = None) -> StreamXout:
+        if self.encoder is not None and fourcc is not None:
+            return StreamXout(self.encoder.bitstream, name=self.name or self._source + '_bitstream')
+        elif self.is_replay():
+            return ReplayStream(self.name or self._source)
         elif self.is_mono():
-            return StreamXout(self.node.id, self.stream, name=self.name)
+            return StreamXout(self.stream, name=self.name or self._source + '_mono')
         else:  # ColorCamera
             self.node.setVideoNumFramesPool(self._num_frames_pool)
             self.node.setPreviewNumFramesPool(self._preview_num_frames_pool)
             # node.video instead of preview (self.stream) was used to reduce bandwidth
             # consumption by 2 (3bytes/pixel vs 1.5bytes/pixel)
-            return StreamXout(self.node.id, self.node.video, name=self.name)
+            return StreamXout(self.node.video, name=self.name or self._source + '_video')
 
     def set_num_frames_pool(self, num_frames: int, preview_num_frames: Optional[int] = None):
         """
@@ -461,61 +464,32 @@ def set_num_frames_pool(self, num_frames: int, preview_num_frames: Optional[int]
             if preview_num_frames is not None:
                 self._preview_num_frames_pool = preview_num_frames
 
+    def get_fourcc(self) -> Optional[str]:
+        if self.encoder is None:
+            return None
+        return encoder_profile_to_fourcc(self._encoder_profile)
+
     """
     Available outputs (to the host) of this component
     """
 
     class Out:
+        class CameraOut(ComponentOutput):
+            def __call__(self, device: dai.Device, fourcc: Optional[str] = None) -> XoutBase:
+                return XoutFrames(self._comp.get_stream_xout(fourcc), fourcc).set_comp_out(self)
+
+        class ReplayOut(ComponentOutput):
+            def __call__(self, device: dai.Device) -> XoutBase:
+                return XoutFrames(ReplayStream(self._comp._source)).set_comp_out(self)
+
+        class EncodedOut(CameraOut):
+            def __call__(self, device: dai.Device) -> XoutBase:
+                return super().__call__(device, fourcc=self._comp.get_fourcc())
+
+
         def __init__(self, camera_component: 'CameraComponent'):
-            self._comp = camera_component
-
-        def main(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutBase:
-            """
-            Default output. Uses either camera(), replay(), or encoded() depending on the component settings.
-            """
-            if self._comp.encoder:
-                return self.encoded(pipeline, device)
-            elif self._comp.is_replay():
-                return self.replay(pipeline, device)
-            else:
-                return self.camera(pipeline, device)
-
-        def camera(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutFrames:
-            """
-            Streams camera output to the OAK camera. Produces FramePacket.
-            """
-            out = XoutFrames(self._comp.get_stream_xout(), self._comp.get_fps())
-            out.name = self._comp._source
-            return self._comp._create_xout(pipeline, out)
-
-        def replay(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutBase:
-            """
-            If depthai-recording was used, it won't stream anything, but it will instead use frames that were sent to the OAK.
-            Produces FramePacket.
-            """
-            out = XoutFrames(ReplayStream(self._comp._source), self._comp.get_fps())
-            return self._comp._create_xout(pipeline, out)
-
-        def encoded(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutBase:
-            """
-            If encoding was enabled, it will stream bitstream from VideoEncoder node to the host.
-            Produces FramePacket.
-            """
-            if self._comp._encoder_profile == dai.VideoEncoderProperties.Profile.MJPEG:
-                out = XoutMjpeg(
-                    frames=StreamXout(self._comp.encoder.id, self._comp.encoder.bitstream, name=self._comp.name),
-                    color=self._comp.is_color(),
-                    lossless=self._comp.encoder.getLossless(),
-                    fps=self._comp.encoder.getFrameRate(),
-                    frame_shape=self._comp.stream_size
-                )
-            else:
-                out = XoutH26x(
-                    frames=StreamXout(self._comp.encoder.id, self._comp.encoder.bitstream, name=self._comp.name),
-                    color=self._comp.is_color(),
-                    profile=self._comp._encoder_profile,
-                    fps=self._comp.encoder.getFrameRate(),
-                    frame_shape=self._comp.stream_size
-                )
-            out.name = self._comp._source
-            return self._comp._create_xout(pipeline, out)
+            self.replay = self.ReplayOut(camera_component)
+            self.camera = self.CameraOut(camera_component)
+            self.encoded = self.EncodedOut(camera_component)
+
+            self.main = self.replay if camera_component.is_replay() else self.camera
diff --git a/depthai_sdk/src/depthai_sdk/components/camera_control.py b/depthai_sdk/src/depthai_sdk/components/camera_control.py
index a5c8107ea..fb871c62d 100644
--- a/depthai_sdk/src/depthai_sdk/components/camera_control.py
+++ b/depthai_sdk/src/depthai_sdk/components/camera_control.py
@@ -1,6 +1,7 @@
-import depthai as dai
-from itertools import cycle
 import logging
+from itertools import cycle
+
+import depthai as dai
 
 logger = logging.getLogger(__name__)
 
@@ -18,17 +19,22 @@
     'chroma_denoise': (0, 4)
 }
 
+
 def clamp(value, min_value, max_value):
     return max(min(value, max_value), min_value)
 
+
 class CameraControl:
     def __init__(self):
         self.queue = None
 
-        self._cycle_awb_mode = cycle([item for name, item in vars(dai.CameraControl.AutoWhiteBalanceMode).items() if name.isupper()])
-        self._cycle_ab_mode = cycle([item for name, item in vars(dai.CameraControl.AntiBandingMode).items() if name.isupper()])
+        self._cycle_awb_mode = cycle(
+            [item for name, item in vars(dai.CameraControl.AutoWhiteBalanceMode).items() if name.isupper()])
+        self._cycle_ab_mode = cycle(
+            [item for name, item in vars(dai.CameraControl.AntiBandingMode).items() if name.isupper()])
         # self._cycle_effect_mode = cycle([item for name, item in vars(dai.CameraControl.EffectMode).items() if name.isupper()])
-        self._cycle_af_mode = cycle([item for name, item in vars(dai.CameraControl.AutoFocusMode).items() if name.isupper()])
+        self._cycle_af_mode = cycle(
+            [item for name, item in vars(dai.CameraControl.AutoFocusMode).items() if name.isupper()])
 
         self._current_vals = {
             'exposure_time': 20000,
@@ -76,7 +82,9 @@ def exposure_time_up(self, step=500):
             logger.error(f'Exposure time cannot be greater than {LIMITS["exposure"][1]}')
             return
         self._current_vals['exposure_time'] += step
-        self.send_controls({'exposure': {'manual': [self._current_vals['exposure_time'], self._current_vals['sensitivity']]}})
+        self.send_controls(
+            {'exposure': {'manual': [self._current_vals['exposure_time'], self._current_vals['sensitivity']]}})
+
     def exposure_time_down(self, step=500):
         """
         Decrease exposure time by step.
@@ -87,7 +95,8 @@ def exposure_time_down(self, step=500):
             logger.error(f'Exposure time cannot be less than {LIMITS["exposure"][0]}')
             return
         self._current_vals['exposure_time'] -= step
-        self.send_controls({'exposure': {'manual': [self._current_vals['exposure_time'], self._current_vals['sensitivity']]}})
+        self.send_controls(
+            {'exposure': {'manual': [self._current_vals['exposure_time'], self._current_vals['sensitivity']]}})
 
     def sensitivity_up(self, step=50):
         """
@@ -99,7 +108,8 @@ def sensitivity_up(self, step=50):
             logger.error(f'Sensitivity cannot be greater than {LIMITS["gain"][1]}')
             return
         self._current_vals['sensitivity'] += step
-        self.send_controls({'exposure': {'manual': [self._current_vals['exposure_time'], self._current_vals['sensitivity']]}})
+        self.send_controls(
+            {'exposure': {'manual': [self._current_vals['exposure_time'], self._current_vals['sensitivity']]}})
 
     def sensitivity_down(self, step=50):
         """
@@ -111,7 +121,8 @@ def sensitivity_down(self, step=50):
             logger.error(f'Sensitivity cannot be less than {LIMITS["gain"][0]}')
             return
         self._current_vals['sensitivity'] -= step
-        self.send_controls({'exposure': {'manual': [self._current_vals['exposure_time'], self._current_vals['sensitivity']]}})
+        self.send_controls(
+            {'exposure': {'manual': [self._current_vals['exposure_time'], self._current_vals['sensitivity']]}})
 
     def focus_up(self, step=3):
         """
@@ -297,7 +308,8 @@ def send_controls(self, controls: dict = None):
         if controls.get('white-balance', None) is not None:
             if controls['white-balance'].get('mode', None) is not None:
                 if isinstance(controls["focus"]["mode"], str):
-                    controls["white-balance"]["mode"] = getattr(dai.CameraControl.AutoFocusMode, controls["white-balance"]["mode"])
+                    controls["white-balance"]["mode"] = getattr(dai.CameraControl.AutoFocusMode,
+                                                                controls["white-balance"]["mode"])
                 logger.info(f'Setting white balance mode to {controls["white-balance"]["mode"]}.')
                 ctrl.setAutoWhiteBalanceMode(controls["white-balance"]["mode"])
             if controls['white-balance'].get('lock', None) is not None:
diff --git a/depthai_sdk/src/depthai_sdk/components/camera_helper.py b/depthai_sdk/src/depthai_sdk/components/camera_helper.py
index 2fc802319..1f4215084 100644
--- a/depthai_sdk/src/depthai_sdk/components/camera_helper.py
+++ b/depthai_sdk/src/depthai_sdk/components/camera_helper.py
@@ -1,19 +1,17 @@
 import math
-import depthai as dai
-from typing import *
-import numpy as np
-import cv2
+from typing import List, Tuple, Dict, Any, Optional, Union
 
+import depthai as dai
 
-monoResolutions: Dict[dai.MonoCameraProperties.SensorResolution, Tuple[int,int]] = {
-    dai.MonoCameraProperties.SensorResolution.THE_1200_P: (1920, 1200), # Monochrome AR0234
-    dai.MonoCameraProperties.SensorResolution.THE_800_P: (1280, 800), # OV9282
+monoResolutions: Dict[dai.MonoCameraProperties.SensorResolution, Tuple[int, int]] = {
+    dai.MonoCameraProperties.SensorResolution.THE_1200_P: (1920, 1200),  # Monochrome AR0234
+    dai.MonoCameraProperties.SensorResolution.THE_800_P: (1280, 800),  # OV9282
     dai.MonoCameraProperties.SensorResolution.THE_720_P: (1280, 720),
-    dai.MonoCameraProperties.SensorResolution.THE_480_P: (640, 480), # OV7251
+    dai.MonoCameraProperties.SensorResolution.THE_480_P: (640, 480),  # OV7251
     dai.MonoCameraProperties.SensorResolution.THE_400_P: (640, 400),
 }
 
-colorResolutions: Dict[dai.ColorCameraProperties.SensorResolution, Tuple[int,int]] = {
+colorResolutions: Dict[dai.ColorCameraProperties.SensorResolution, Tuple[int, int]] = {
     dai.ColorCameraProperties.SensorResolution.THE_5312X6000: (5312, 6000),  # IMX582 cropped
     dai.ColorCameraProperties.SensorResolution.THE_13_MP: (4208, 3120),  # AR214
     dai.ColorCameraProperties.SensorResolution.THE_12_MP: (4056, 3040),  # IMX378, IMX477, IMX577
@@ -27,10 +25,11 @@
     dai.ColorCameraProperties.SensorResolution.THE_720_P: (1280, 720),
 }
 
-sensorResolutions: Dict[Any, Tuple[int,int]] = []
+sensorResolutions: Dict[Any, Tuple[int, int]] = []
 sensorResolutions.extend(monoResolutions)
 sensorResolutions.extend(colorResolutions)
 
+
 def availableIspScales() -> List[Tuple[int, Tuple[int, int]]]:
     """
     Calculates all supported
@@ -47,19 +46,17 @@ def availableIspScales() -> List[Tuple[int, Tuple[int, int]]]:
     lst.sort(reverse=True)
     return lst
 
-def getClosestVideoSize(width: int, height: int, videoEncoder: bool=False) -> Tuple[int, int]:
+
+def getClosestVideoSize(width: int, height: int, videoEncoder: bool = False) -> Tuple[int, int]:
     """
     For colorCamera.video output
     """
-    while True:
-        if width % 2 == 0: # YUV420/NV12 width needs to be an even number to be convertible to BGR on host using cv2
-            if not videoEncoder or width % 32 == 0: # VideoEncoder HW limitation - width must be divisible by 32
-                break
-        width -= 1
-    while True:
-        if height % 2 == 0: # YUV420/NV12 height needs to be an even number to be convertible to BGR on host using cv2
-            break
-        height -= 1
+    width_divider = 32 if videoEncoder else 2
+    width = (width // width_divider) * width_divider
+
+    height_divider = 8 if videoEncoder else 2
+    height = (height // height_divider) * height_divider
+
     return (width, height)
 
 
@@ -82,10 +79,10 @@ def getClosestIspScale(camResolution: Tuple[int, int],
     """
     if width and height:
         raise ValueError("You have to specify EITHER width OR height to calculate desired ISP scaling options!")
-    if not width and not height:
+    if width is None and height is None:
         raise ValueError("You have to provide width or height calculate desired ISP scaling options!")
 
-    minError = 99999
+    minError = 999999
     ispScale: List[int] = None
     for ratio, (n, d) in availableIspScales():
         newW = int((camResolution[0] * n - 1) / d + 1)
@@ -102,7 +99,7 @@ def getClosestIspScale(camResolution: Tuple[int, int],
         if newW % 2 != 0 or newH % 2 != 0:
             continue
 
-        err = abs((newW - width) if width else (newH - height))
+        err = abs((newW - width) if width is not None else (newH - height))
         if err < minError:
             ispScale = [n, d, n, d]
             minError = err
@@ -166,11 +163,28 @@ def setCameraControl(control: dai.CameraControl,
     # TODO: Add contrast, exposure compensation, brightness, manual exposure, and saturation
 
 
-def get_sensor_resolution(type: dai.CameraSensorType, width: int, height: int) -> Tuple[Union[dai.ColorCameraProperties.SensorResolution, dai.MonoCameraProperties.SensorResolution], Tuple[int,int]]:
-    def get_res(resolutions: Dict[Any, Tuple[int,int]]):
+def get_max_resolution(node: dai.node, sensor: dai.CameraFeatures) -> Union[
+    dai.ColorCameraProperties.SensorResolution, dai.MonoCameraProperties.SensorResolution]:
+    max_res = None
+    max_num = 0
+    for conf in sensor.configs:
+        if node == dai.node.ColorCamera and conf.type != dai.CameraSensorType.COLOR:
+            continue
+        if node == dai.node.MonoCamera and conf.type != dai.CameraSensorType.MONO:
+            continue
+        (res, size) = get_sensor_resolution(conf.type, conf.width, conf.height)
+        if size[0] * size[1] > max_num:
+            max_num = size[0] * size[1]
+            max_res = res
+    return max_res
+
+
+def get_sensor_resolution(type: dai.CameraSensorType, width: int, height: int) -> Tuple[
+    Union[dai.ColorCameraProperties.SensorResolution, dai.MonoCameraProperties.SensorResolution], Tuple[int, int]]:
+    def get_res(resolutions: Dict[Any, Tuple[int, int]]):
         for res, (w, h) in resolutions.items():
             if width == w and height == h:
-                return (res, (w,h))
+                return (res, (w, h))
 
     if type == dai.CameraSensorType.COLOR:
         return get_res(colorResolutions)
@@ -179,11 +193,12 @@ def get_res(resolutions: Dict[Any, Tuple[int,int]]):
     else:
         raise Exception('Camera sensor type unknown!', type)
 
+
 def get_resolution_size(
         resolution: Union[
             dai.ColorCameraProperties.SensorResolution,
             dai.MonoCameraProperties.SensorResolution
-            ]) -> Tuple[int,int]:
+        ]) -> Tuple[int, int]:
     if resolution in colorResolutions:
         return colorResolutions[resolution]
     elif resolution in monoResolutions:
@@ -191,18 +206,19 @@ def get_resolution_size(
     else:
         raise Exception('Camera sensor resolution unknown!', resolution)
 
+
 def getClosesResolution(sensor: dai.CameraFeatures,
                         type: dai.CameraSensorType,
                         width: Optional[int] = None,
                         height: Optional[int] = None, ):
     if width and height:
         raise ValueError("You have to specify EITHER width OR height to calculate desired ISP scaling options!")
-    if not width and not height:
+    if width is None and height is None:
         raise ValueError("You have to provide width or height calculate desired ISP scaling options!")
 
-    minError = 99999
+    minError = 999999
     closestRes = None
-    desired, i = (width, 0) if width else (height, 1)
+    desired, i = (width, 0) if width is not None else (height, 1)
 
     resolutions = [get_sensor_resolution(type, conf.width, conf.height) for conf in sensor.configs if conf.type == type]
 
@@ -215,11 +231,11 @@ def getClosesResolution(sensor: dai.CameraFeatures,
 
 
 def getResize(size: Tuple[int, int],
-                    width: Optional[int] = None,
-                    height: Optional[int] = None) -> Tuple[int, int]:
+              width: Optional[int] = None,
+              height: Optional[int] = None) -> Tuple[int, int]:
     if width and height:
         raise ValueError("You have to specify EITHER width OR height to calculate desired ISP scaling options!")
-    if not width and not height:
+    if width is None and height is None:
         raise ValueError("You have to provide width or height calculate desired ISP scaling options!")
 
     if width:
diff --git a/depthai_sdk/src/depthai_sdk/components/component.py b/depthai_sdk/src/depthai_sdk/components/component.py
index f49e4bb76..972e74f70 100644
--- a/depthai_sdk/src/depthai_sdk/components/component.py
+++ b/depthai_sdk/src/depthai_sdk/components/component.py
@@ -1,24 +1,12 @@
-from abc import ABC, abstractmethod
-from typing import Optional
-
+from abc import ABC
 import depthai as dai
 
-from depthai_sdk.oak_outputs.xout.xout_base import XoutBase, ReplayStream
-
 
 class Component(ABC):
     """
     SDK component is used as an abstraction to the current DepthAI API node or group of nodes.    
     """
 
-    def forced_openvino_version(self) -> Optional[dai.OpenVINO.Version]:
-        """
-        Checks whether the component forces a specific OpenVINO version. Only used by NNComponent (which overrides this
-        method). This function is called after Camera has been configured and right before we connect to the OAK camera.
-        @return: Forced OpenVINO version (optional).
-        """
-        return None
-
     def on_pipeline_started(self, device: dai.Device) -> None:
         """
         This function gets called after the pipeline has been started. It is called from the main thread.
@@ -26,23 +14,61 @@ def on_pipeline_started(self, device: dai.Device) -> None:
         """
         pass
 
-    def _stream_name_ok(self, pipeline: dai.Pipeline, name: str) -> bool:
-        # Check if there's already an XLinkOut stream with this name
-        for node in pipeline.getAllNodes():
-            if isinstance(node, dai.node.XLinkOut) and node.getStreamName() == name:
-                return False
-        return True
 
-    def _create_xout(self, pipeline: dai.Pipeline, xout: XoutBase) -> XoutBase:
-        for xstream in xout.xstreams():
-            if not self._stream_name_ok(pipeline, xstream.name):
-                continue
+    # So users can use:
+    # packets: Dict[Packet] = q.get()
+    # depthPacket = packets['depth']
+    # depthPacket = packets[stereoComp]
+    def __str__(self):
+        return self.out.main.__str__()
+
+    def __hash__(self):
+        return self.__str__().__hash__()
+
+    def __eq__(self, other):
+        if isinstance(other, Component):
+            return str(self) == str(other)
+        elif isinstance(other, str):
+            return str(self) == other
+        else:
+            return False
+
+class ComponentOutput(ABC):
+    """
+    Output of a component
+    """
+    def __init__(self, component: Component):
+        """
+        If user hasn't specified component's output name, we will
+        generate one in Xout class
+        """
+        self.name = None
+        self._comp = component
+
+    def set_name(self, name: str) -> 'ComponentOutput':
+        """
+        Name component's output, which will be used for packet names. If not specified, it
+        will be generated automatically after pipeline is started (after `oak.start()`) by
+        combining all Xout Stream names (eg. "6_out;3_out").
+        """
+        self.name = name
+        return self
+
+    # So users can use:
+    # packets: Dict[Packet] = q.get()
+    # depthPacket = packets['depth']
+    # depthPacket = packets[stereoComp.out.depth]
+    def __str__(self):
+        return self.name
 
-            if isinstance(xstream, ReplayStream):
-                continue
+    def __hash__(self):
+        return self.__str__().__hash__()
 
-            xlink = pipeline.createXLinkOut()
-            xlink.setStreamName(xstream.name)
-            xstream.stream.link(xlink.input)
+    def __eq__(self, other):
+        if isinstance(other, ComponentOutput):
+            return str(self) == str(other)
+        elif isinstance(other, str):
+            return str(self) == other
+        else:
+            return False
 
-        return xout
diff --git a/depthai_sdk/src/depthai_sdk/components/control_camera_with_nn.py b/depthai_sdk/src/depthai_sdk/components/control_camera_with_nn.py
index f804f4974..2ec76015b 100644
--- a/depthai_sdk/src/depthai_sdk/components/control_camera_with_nn.py
+++ b/depthai_sdk/src/depthai_sdk/components/control_camera_with_nn.py
@@ -1,9 +1,12 @@
+from pathlib import Path
+from string import Template
+from typing import Union, Tuple
+
 import depthai as dai
+
 from depthai_sdk.classes.enum import ResizeMode
-from typing import Union, Tuple
 from depthai_sdk.components.camera_helper import get_resolution_size
-from pathlib import Path
-from string import Template
+
 
 def control_camera_with_nn(
         pipeline: dai.Pipeline,
@@ -15,7 +18,7 @@ def control_camera_with_nn(
         af: bool,
         ae: bool,
         debug: bool
-        ):
+):
     sensor_resolution = get_resolution_size(resolution)
     # width / height (old ar)
     sensor_ar = sensor_resolution[0] / sensor_resolution[1]
@@ -34,10 +37,9 @@ def control_camera_with_nn(
             init = f"xmin = 0; ymin = {-cropping}; xmax = 1; ymax = {1 + cropping}"
         else:
             init = f"xmin = {cropping}; ymin = 0; xmax = {1 - cropping}; ymax = 1"
-    else: # Stretch
+    else:  # Stretch
         init = f"xmin=0; ymin=0; xmax=1; ymax=1"
 
-
     resize_str = f"new_xmin=xmin+width*det.xmin; new_ymin=ymin+height*det.ymin; new_xmax=xmin+width*det.xmax; new_ymax=ymin+height*det.ymax;"
     denormalize = f"startx=int(new_xmin*{sensor_resolution[0]}); starty=int(new_ymin*{sensor_resolution[1]}); new_width=int((new_xmax-new_xmin)*{sensor_resolution[0]}); new_height=int((new_ymax-new_ymin)*{sensor_resolution[1]});"
     control_str = ''
@@ -46,7 +48,6 @@ def control_camera_with_nn(
     if af:
         control_str += f"control.setAutoFocusRegion(startx, starty, new_width, new_height);"
 
-
     script_node = pipeline.create(dai.node.Script)
     script_node.setProcessor(dai.ProcessorType.LEON_CSS)  # More stable
 
diff --git a/depthai_sdk/src/depthai_sdk/components/imu_component.py b/depthai_sdk/src/depthai_sdk/components/imu_component.py
index 34451f703..ec3a63398 100644
--- a/depthai_sdk/src/depthai_sdk/components/imu_component.py
+++ b/depthai_sdk/src/depthai_sdk/components/imu_component.py
@@ -2,7 +2,7 @@
 
 import depthai as dai
 
-from depthai_sdk.components.component import Component, XoutBase
+from depthai_sdk.components.component import Component, ComponentOutput
 from depthai_sdk.oak_outputs.xout.xout_base import StreamXout
 from depthai_sdk.oak_outputs.xout.xout_imu import XoutIMU
 
@@ -14,9 +14,15 @@ def __init__(self,
         self.out = self.Out(self)
 
         super().__init__()
+
+        self.imu_name: str = device.getConnectedIMU()
         self.node = pipeline.createIMU()
+        self.fps = 100
         self.config_imu()  # Default settings, component won't work without them
 
+    def get_imu_name(self) -> str:
+        return self.imu_name
+
     def config_imu(self,
                    sensors: List[dai.IMUSensor] = None,
                    report_rate: int = 100,
@@ -43,18 +49,13 @@ def config_imu(self,
         self.node.setMaxBatchReports(maxBatchReports=max_batch_reports)
         self.node.enableFirmwareUpdate(enable_firmware_update)
 
+        self.fps = report_rate
+
     class Out:
+        class ImuOut(ComponentOutput):
+            def __call__(self, device: dai.Device):
+                return XoutIMU(StreamXout(self._comp.node.out, name='imu'), self._comp.fps).set_comp_out(self)
+
         def __init__(self, imu_component: 'IMUComponent'):
-            self._comp = imu_component
-
-        def main(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutBase:
-            """
-            Default output. Uses either camera(), replay(), or encoded() depending on the component settings.
-            """
-            return self.text(pipeline, device)
-
-        def text(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutBase:
-            out = self._comp.node.out
-            out = StreamXout(self._comp.node.id, out)
-            imu_out = XoutIMU(out)
-            return self._comp._create_xout(pipeline, imu_out)
+            self.main = self.ImuOut(imu_component)
+            self.text = self.main
diff --git a/depthai_sdk/src/depthai_sdk/components/multi_stage_nn.py b/depthai_sdk/src/depthai_sdk/components/multi_stage_nn.py
index 183bea93c..f3dcafe45 100644
--- a/depthai_sdk/src/depthai_sdk/components/multi_stage_nn.py
+++ b/depthai_sdk/src/depthai_sdk/components/multi_stage_nn.py
@@ -2,9 +2,10 @@
 from pathlib import Path
 from string import Template
 from typing import Tuple, Optional, List
-from depthai_sdk.classes.enum import ResizeMode
+
 import depthai as dai
 
+from depthai_sdk.classes.enum import ResizeMode
 from depthai_sdk.types import GenericNeuralNetwork
 
 
@@ -45,7 +46,7 @@ def __init__(self,
                 self.init = f"xmin = 0; ymin = {-cropping}; xmax = 1; ymax = {1 + cropping}"
             else:
                 self.init = f"xmin = {cropping}; ymin = 0; xmax = {1 - cropping}; ymax = 1"
-        else: # Stretch
+        else:  # Stretch
             self.init = f"xmin=0; ymin=0; xmax=1; ymax=1"
 
         self.script: dai.node.Script = pipeline.create(dai.node.Script)
@@ -55,7 +56,7 @@ def __init__(self,
         detection_node.out.link(self.script.inputs['detections'])
         high_res_frames.link(self.script.inputs['frames'])
 
-        self.configure() # User might later call this again with different parameters
+        self.configure()  # User might later call this again with different parameters
 
         self.manip: dai.node.ImageManip = pipeline.create(dai.node.ImageManip)
         self.manip.initialConfig.setResize(size)
@@ -66,13 +67,18 @@ def __init__(self,
         self.script.outputs['manip_img'].link(self.manip.inputImage)
         self.out: dai.Node.Output = self.manip.out
 
+        self.whitelist_labels: Optional[List[int]] = None
+        self.scale_bb: Optional[Tuple[int, int]] = None
+
     def configure(self,
                   debug: bool = False,
                   whitelist_labels: Optional[List[int]] = None,
                   scale_bb: Optional[Tuple[int, int]] = None) -> None:
         """
         Args:
-            config (MultiStageConfig, optional): Configuration object. Defaults to None.
+            debug (bool, optional): Enable debug mode. Defaults to False.
+            whitelist_labels (Optional[List[int]], optional): List of labels to keep. Defaults to None.
+            scale_bb (Optional[Tuple[int, int]], optional): Scale bounding box. Defaults to None.
         """
         # Used later for visualization
         self.whitelist_labels = whitelist_labels
diff --git a/depthai_sdk/src/depthai_sdk/components/nn_component.py b/depthai_sdk/src/depthai_sdk/components/nn_component.py
index 5126d9f46..6449f9841 100644
--- a/depthai_sdk/src/depthai_sdk/components/nn_component.py
+++ b/depthai_sdk/src/depthai_sdk/components/nn_component.py
@@ -5,6 +5,9 @@
 from pathlib import Path
 from typing import Callable, Union, List, Dict
 
+from depthai_sdk.types import NNNode
+from depthai_sdk.visualize.bbox import BoundingBox
+
 try:
     import blobconverter
 except ImportError:
@@ -12,17 +15,17 @@
 
 from depthai_sdk.classes.nn_config import Config
 from depthai_sdk.components.camera_component import CameraComponent
-from depthai_sdk.components.component import Component
+from depthai_sdk.components.component import Component, ComponentOutput
 from depthai_sdk.integrations.roboflow import RoboflowIntegration
 from depthai_sdk.components.multi_stage_nn import MultiStageNN
 from depthai_sdk.components.nn_helper import *
 from depthai_sdk.classes.enum import ResizeMode
 from depthai_sdk.components.parser import *
 from depthai_sdk.components.stereo_component import StereoComponent
+from depthai_sdk.visualize.visualizer_helper import depth_to_disp_factor
 from depthai_sdk.oak_outputs.xout.xout_base import StreamXout, XoutBase
 from depthai_sdk.oak_outputs.xout.xout_frames import XoutFrames
 from depthai_sdk.oak_outputs.xout.xout_nn import XoutTwoStage, XoutNnResults, XoutSpatialBbMappings, XoutNnData
-from depthai_sdk.oak_outputs.xout.xout_nn_encoded import XoutNnMjpeg, XoutNnH26x
 from depthai_sdk.oak_outputs.xout.xout_tracker import XoutTracker
 from depthai_sdk.replay import Replay
 
@@ -36,10 +39,9 @@ def __init__(self,
                  nn_type: Optional[str] = None,  # Either 'yolo' or 'mobilenet'
                  decode_fn: Optional[Callable] = None,
                  tracker: bool = False,  # Enable object tracker - only for Object detection models
-                 spatial: Union[None, bool, StereoComponent] = None,
+                 spatial: Optional[StereoComponent] = None,
                  replay: Optional[Replay] = None,
                  args: Dict = None,  # User defined args
-                 name: Optional[str] = None
                  ) -> None:
         """
         Neural Network component abstracts:
@@ -56,32 +58,27 @@ def __init__(self,
             spatial (bool, default False): Enable getting Spatial coordinates (XYZ), only for Obj detectors. Yolo/SSD use on-device spatial calc, others on-host (gen2-calc-spatials-on-host)
             replay (Replay object): Replay
             args (Any, optional): Use user defined arguments when constructing the pipeline
-            name (str, optional): Name of the output stream
         """
         super().__init__()
 
-        self.name = name
         self.out = self.Out(self)
 
         self.triggers = defaultdict(list)
-        self.node: Optional[
-            dai.node.NeuralNetwork,
-            dai.node.MobileNetDetectionNetwork,
-            dai.node.MobileNetSpatialDetectionNetwork,
-            dai.node.YoloDetectionNetwork,
-            dai.node.YoloSpatialDetectionNetwork] = None
+        self.node: Optional[NNNode] = None
 
         # ImageManip used to resize the input to match the expected NN input size
         self.image_manip: Optional[dai.node.ImageManip] = None
         self.x_in: Optional[dai.node.XLinkIn] = None  # Used for multi-stage pipeline
+        # Tracker:
         self.tracker = pipeline.createObjectTracker() if tracker else None
-        self.apply_tracking_filter = False
+        self.apply_tracking_filter = True  # Enable by default
+        self.calculate_speed = True
         self.forget_after_n_frames = None
-        self.calculate_speed = False
 
         # Private properties
         self._ar_resize_mode: ResizeMode = ResizeMode.LETTERBOX  # Default
-        self._input: Union[CameraComponent, 'NNComponent', dai.Node.Output] = input  # Input to the NNComponent node passed on initialization
+        # Input to the NNComponent node passed on initialization
+        self._input: Union[CameraComponent, 'NNComponent', dai.Node.Output] = input
         self._stream_input: dai.Node.Output  # Node Output that will be used as the input for this NNComponent
 
         self._blob: Optional[dai.OpenVINO.Blob] = None
@@ -97,7 +94,7 @@ def __init__(self,
 
         self._input_queue = Optional[None]  # Input queue for multi-stage pipeline
 
-        self._spatial: Optional[Union[bool, StereoComponent]] = spatial
+        self._spatial: Optional[StereoComponent] = spatial
         self._replay: Optional[Replay] = replay  # Replay module
 
         # For visualizer
@@ -141,11 +138,8 @@ def __init__(self,
         # Creates ImageManip node that resizes the input to match the expected NN input size.
         # DepthAI uses CHW (Planar) channel layout and BGR color order convention.
         self.image_manip = pipeline.createImageManip()
-        self.image_manip.setFrameType(dai.RawImgFrame.Type.BGR888p)
-        self.image_manip.setMaxOutputFrameSize(self._size[0] * self._size[1] * 3)
-        self.image_manip.inputImage.setBlocking(False)
-        self.image_manip.inputImage.setQueueSize(2)
-        self._ar_resize_mode = ResizeMode.LETTERBOX  # Default
+        # Configures ImageManip node. Letterbox by default
+        self._change_resize_mode(ResizeMode.LETTERBOX)
 
         if isinstance(self._input, CameraComponent):
             self._stream_input = self._input.stream
@@ -157,15 +151,15 @@ def __init__(self,
             self._stream_input.link(self.image_manip.inputImage)
             # Link ImageManip output to NN node
             self.image_manip.out.link(self.node.input)
-        elif self._is_multi_stage():
+        elif self.is_multi_stage():
             # Here, ImageManip will only crop the high-res frame to correct aspect ratio
             # (without resizing!) and it also acts as a buffer (by default, its pool size is set to 20).
             self.image_manip = pipeline.createImageManip()
             self.image_manip.setNumFramesPool(20)
             self._input._stream_input.link(self.image_manip.inputImage)
-            frame_full_size = self._input._input.stream_size
+            frame_full_size = self._get_input_frame_size()
 
-            if self._input._is_detector():
+            if self._input.is_detector():
                 self.image_manip.setMaxOutputFrameSize(frame_full_size[0] * frame_full_size[1] * 3)
 
                 # Create script node, get HQ frames from input.
@@ -208,23 +202,15 @@ def __init__(self,
             )
 
         if self._spatial:
-            if isinstance(self._spatial, bool):  # Create new StereoComponent
-                self._spatial = StereoComponent(device, pipeline, args=self._args, replay=self._replay)
-            if isinstance(self._spatial, StereoComponent):
-                self._stereo_node: dai.node.StereoDepth = self._spatial.node
-                self._spatial.depth.link(self.node.inputDepth)
-                self._spatial.config_stereo(align=self._input)
+            self._stereo_node: dai.node.StereoDepth = self._spatial.node
+            self._spatial.depth.link(self.node.inputDepth)
+            self._spatial.config_stereo(align=self._input)
             # Configure Spatial Detection Network
 
         if self._args:
-            if self._is_spatial():
+            if self.is_spatial():
                 self._config_spatials_args(self._args)
 
-    def forced_openvino_version(self) -> Optional[dai.OpenVINO.Version]:
-        # TODO: remove this once 2.23 is released, and just reset the ImageManip.
-        self._change_resize_mode(self._ar_resize_mode)
-        return None
-
     def get_name(self):
         model = self._config.get('model', None)
         if model is not None:
@@ -274,9 +260,9 @@ def _parse_node_type(self, nn_type: str) -> None:
         self._node_type = dai.node.NeuralNetwork
         if nn_type:
             if nn_type.upper() == 'YOLO':
-                self._node_type = dai.node.YoloSpatialDetectionNetwork if self._is_spatial() else dai.node.YoloDetectionNetwork
+                self._node_type = dai.node.YoloSpatialDetectionNetwork if self.is_spatial() else dai.node.YoloDetectionNetwork
             elif nn_type.upper() == 'MOBILENET':
-                self._node_type = dai.node.MobileNetSpatialDetectionNetwork if self._is_spatial() else dai.node.MobileNetDetectionNetwork
+                self._node_type = dai.node.MobileNetSpatialDetectionNetwork if self.is_spatial() else dai.node.MobileNetDetectionNetwork
 
     def _config_spatials_args(self, args):
         if not isinstance(args, Dict):
@@ -353,24 +339,27 @@ def _blob_from_config(self, model: Dict, version: Union[None, str, dai.OpenVINO.
         Gets the blob from the config file.
         """
         if isinstance(version, dai.OpenVINO.Version):
-            vals = str(version).split('_')
-            version = f"{vals[1]}.{vals[2]}"
+            version = str(version)
+        if isinstance(version, str):
+            if version.startswith('VERSION_'):
+                version = version[8:]
+            if '_' in version:
+                vals = version.split('_')
+                version = f'{vals[0]}.{vals[1]}'
 
         if 'model_name' in model:  # Use blobconverter to download the model
             zoo_type = model.get("zoo", 'intel')
             return blobconverter.from_zoo(model['model_name'],
                                           zoo_type=zoo_type,
                                           shaves=6,  # TODO: Calculate ideal shave amount
-                                          version=version
-                                          )
+                                          version=version)
 
         if 'xml' in model and 'bin' in model:
             return blobconverter.from_openvino(xml=model['xml'],
                                                bin=model['bin'],
                                                data_type="FP16",  # Myriad X
                                                shaves=6,  # TODO: Calculate ideal shave amount
-                                               version=version
-                                               )
+                                               version=version)
 
         raise ValueError("Specified `model` values in json config files are incorrect!")
 
@@ -381,14 +370,17 @@ def _change_resize_mode(self, mode: ResizeMode) -> None:
         Args:
             mode (ResizeMode): Resize mode to use
         """
-        if self._is_multi_stage():
+        if self.is_multi_stage():
             return  # We need high-res frames for multi-stage NN, so we can crop them later
 
         self._ar_resize_mode = mode
 
-        # TODO: uncomment this when depthai 2.21.3 is released. In some cases (eg.
-        # setting first crop, then letterbox), the last config isn't used.
-        # self.image_manip.initialConfig.set(dai.RawImageManipConfig())
+        # Reset ImageManip node config
+        self.image_manip.initialConfig.set(dai.RawImageManipConfig())
+        self.image_manip.setFrameType(dai.RawImgFrame.Type.BGR888p)
+        self.image_manip.setMaxOutputFrameSize(self._size[0] * self._size[1] * 3)
+        self.image_manip.inputImage.setBlocking(False)
+        self.image_manip.inputImage.setQueueSize(2)
 
         if self._ar_resize_mode == ResizeMode.CROP:
             self.image_manip.initialConfig.setResize(self._size)
@@ -417,7 +409,7 @@ def config_multistage_nn(self,
             scale_bb (Tuple[int, int], optional): Scale detection bounding boxes (x, y) before cropping the frame. In %.
             num_frame_pool (int, optional): Number of frames to pool for inference. If None, will use the default value.
         """
-        if not self._is_multi_stage():
+        if not self.is_multi_stage():
             logging.warning("Input to this model was not a NNComponent, so 2-stage NN inferencing isn't possible!"
                             "This configuration attempt will be ignored.")
             return
@@ -456,7 +448,7 @@ def config_tracker(self,
                        apply_tracking_filter: Optional[bool] = None,
                        forget_after_n_frames: Optional[int] = None,
                        calculate_speed: Optional[bool] = None
-                       ):
+                       ) -> None:
         """
         Configure Object Tracker node (if it's enabled).
 
@@ -503,11 +495,11 @@ def config_tracker(self,
         if calculate_speed is not None:
             self.calculate_speed = calculate_speed
 
-    def config_yolo_from_metadata(self, metadata: Dict):
+    def config_yolo_from_metadata(self, metadata: Dict) -> None:
         """
         Configures (Spatial) Yolo Detection Network node with a dictionary. Calls config_yolo().
         """
-        return self.config_yolo(
+        self.config_yolo(
             num_classes=metadata['classes'],
             coordinate_size=metadata['coordinates'],
             anchors=metadata['anchors'],
@@ -527,7 +519,7 @@ def config_yolo(self,
         """
         Configures (Spatial) Yolo Detection Network node.
         """
-        if not self._is_yolo():
+        if not self.is_yolo():
             logging.warning('This is not a YOLO detection network! This configuration attempt will be ignored.')
             return
 
@@ -545,7 +537,8 @@ def config_yolo(self,
 
     def config_nn(self,
                   conf_threshold: Optional[float] = None,
-                  resize_mode: Union[ResizeMode, str] = None):
+                  resize_mode: Union[ResizeMode, str] = None
+                  ) -> None:
         """
         Configures the Detection Network node.
 
@@ -555,17 +548,17 @@ def config_nn(self,
         """
         if resize_mode:
             self._ar_resize_mode = ResizeMode.parse(resize_mode)
-            # TODO: After 2.23 is released, uncomment this
-            # self._change_resize_mode(self._ar_resize_mode)
+            self._change_resize_mode(self._ar_resize_mode)
 
-        if conf_threshold is not None and self._is_detector():
+        if conf_threshold is not None and self.is_detector():
             self.node.setConfidenceThreshold(conf_threshold)
 
     def config_spatial(self,
                        bb_scale_factor: Optional[float] = None,
                        lower_threshold: Optional[int] = None,
                        upper_threshold: Optional[int] = None,
-                       calc_algo: Optional[dai.SpatialLocationCalculatorAlgorithm] = None):
+                       calc_algo: Optional[dai.SpatialLocationCalculatorAlgorithm] = None
+                       ) -> None:
         """
         Configures the Spatial Detection Network node.
 
@@ -575,7 +568,7 @@ def config_spatial(self,
             upper_threshold (int, optional): Specifies upper threshold in depth units (millimeter by default) for depth values which will used to calculate spatial data
             calc_algo (dai.SpatialLocationCalculatorAlgorithm, optional): Specifies spatial location calculator algorithm: Average/Min/Max
         """
-        if not self._is_spatial():
+        if not self.is_spatial():
             logging.warning('This is not a Spatial Detection network! This configuration attempt will be ignored.')
             return
 
@@ -588,247 +581,218 @@ def config_spatial(self,
         if calc_algo:
             self.node.setSpatialCalculationAlgorithm(calc_algo)
 
-    def _update_config(self):
+    def _update_config(self) -> None:
         if self.node is None or self._config is None:
             return
 
         nn_config = self._config.get("nn_config", {})
 
         meta = nn_config.get('NN_specific_metadata', None)
-        if self._is_yolo() and meta:
+        if self.is_yolo() and meta:
             self.config_yolo_from_metadata(metadata=meta)
 
         self.config_nn(conf_threshold=nn_config.get('conf_threshold', None))
 
+    def _get_camera_comp(self) -> CameraComponent:
+        if self.is_multi_stage():
+            return self._input._get_camera_comp()
+        return self._input
+
+    def _get_input_frame_size(self) -> Tuple[int, int]:
+        # TODO: if user passes node output as the NN input (eg. examples/mixed/switch_between_models.py),
+        # this function will fail
+        return self._get_camera_comp().stream_size
+
+    #
+    def get_bbox(self) -> BoundingBox:
+        if self.is_multi_stage():
+            return self._input.get_bbox()
+        else:
+            try:
+                stream_size = self._get_input_frame_size()
+                old_ar = stream_size[0] / stream_size[1]
+                new_ar = self._size[0] / self._size[1]
+                return BoundingBox().resize_to_aspect_ratio(old_ar, new_ar, self._ar_resize_mode)
+            except (AttributeError, ZeroDivisionError, ValueError):
+                return BoundingBox()
+
     """
     Available outputs (to the host) of this component
     """
 
     class Out:
-        def __init__(self, nn_component: 'NNComponent'):
-            self._comp = nn_component
 
-        def main(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutBase:
+        class MainOut(ComponentOutput):
             """
             Default output. Streams NN results and high-res frames that were downscaled and used for inferencing.
             Produces DetectionPacket or TwoStagePacket (if it's 2. stage NNComponent).
             """
-            if self._comp._is_multi_stage():
-                input_nn = self._comp._input
-                if input_nn._input.encoder:
-                    return self.encoded(pipeline=pipeline, device=device)
-            elif self._comp._input.encoder:
-                return self.encoded(pipeline=pipeline, device=device)
-
-            if self._comp._is_multi_stage():
-                det_nn_out = StreamXout(id=self._comp._input.node.id,
-                                        out=self._comp._input.node.out,
-                                        name=self._comp._input.name)
-                second_nn_out = StreamXout(id=self._comp.node.id, out=self._comp.node.out, name=self._comp.name)
-
-                out = XoutTwoStage(det_nn=self._comp._input,
-                                   second_nn=self._comp,
-                                   frames=self._comp._input._input.get_stream_xout(),
-                                   det_out=det_nn_out,
-                                   second_nn_out=second_nn_out,
-                                   device=device,
-                                   input_queue_name="input_queue" if self._comp.x_in else None)
-            else:
-                det_nn_out = StreamXout(id=self._comp.node.id, out=self._comp.node.out, name=self._comp.name)
-                input_stream = self._comp._stream_input
-                out = XoutNnResults(det_nn=self._comp,
-                                    frames=StreamXout(id=input_stream.getParent().id,
-                                                      out=input_stream,
-                                                      name=self._comp.name),
-                                    nn_results=det_nn_out)
 
-            return self._comp._create_xout(pipeline, out)
-
-        def passthrough(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutBase:
-            """
-            Default output. Streams NN results and passthrough frames (frames used for inferencing)
-            Produces DetectionPacket or TwoStagePacket (if it's 2. stage NNComponent).
-            """
-            if self._comp._is_multi_stage():
-                det_nn_out = StreamXout(id=self._comp._input.node.id,
-                                        out=self._comp._input.node.out,
-                                        name=self._comp._input.name)
-                frames = StreamXout(id=self._comp._input.node.id,
-                                    out=self._comp._input.node.passthrough,
-                                    name=self._comp.name)
-                second_nn_out = StreamXout(self._comp.node.id, self._comp.node.out, name=self._comp.name)
-
-                out = XoutTwoStage(det_nn=self._comp._input,
-                                   second_nn=self._comp,
-                                   frames=frames,
-                                   det_out=det_nn_out,
-                                   second_nn_out=second_nn_out,
+            def __call__(self, device: dai.Device, fourcc: Optional[str] = None) -> XoutBase:
+                if self._comp.is_multi_stage():
+                    det_nn_out = StreamXout(out=self._comp._input.node.out)
+                    second_nn_out = StreamXout(out=self._comp.node.out)
+
+                    return XoutTwoStage(det_nn=self._comp._input,
+                                        second_nn=self._comp,
+                                        frames=self._comp._input._input.get_stream_xout(),
+                                        det_out=det_nn_out,
+                                        second_nn_out=second_nn_out,
+                                        device=device,
+                                        input_queue_name="input_queue" if self._comp.x_in else None,
+                                        bbox=self._comp.get_bbox()).set_fourcc(fourcc).set_comp_out(self)
+                else:
+                    # TODO: refactor. This is a bit hacky, as we want to support passing node output as the input
+                    # to the NNComponent. In such case, we don't have access to VideoEnc (inside CameraComponent)
+                    det_nn_out = StreamXout(out=self._comp.node.out)
+                    input_stream = self._comp._stream_input
+                    if fourcc is None:
+                        frame_stream = StreamXout(out=input_stream)
+                    else:
+                        frame_stream = self._comp._get_camera_comp().get_stream_xout(fourcc)
+                    return XoutNnResults(det_nn=self._comp,
+                                         frames=frame_stream,
+                                         nn_results=det_nn_out,
+                                         bbox=self._comp.get_bbox()).set_fourcc(fourcc).set_comp_out(self)
+
+        class PassThroughOut(ComponentOutput):
+            def __call__(self, device: dai.Device) -> XoutBase:
+                """
+                Default output. Streams NN results and passthrough frames (frames used for inferencing)
+                Produces DetectionPacket or TwoStagePacket (if it's 2. stage NNComponent).
+                """
+                if self._comp.is_multi_stage():
+                    return XoutTwoStage(det_nn=self._comp._input,
+                                        second_nn=self._comp,
+                                        frames=StreamXout(out=self._comp._input.node.passthrough),
+                                        det_out=StreamXout(out=self._comp._input.node.out),
+                                        second_nn_out=StreamXout(self._comp.node.out),
+                                        device=device,
+                                        input_queue_name="input_queue" if self._comp.x_in else None,
+                                        bbox=self._comp.get_bbox()).set_comp_out(self)
+                else:
+                    return XoutNnResults(det_nn=self._comp,
+                                         frames=StreamXout(out=self._comp.node.passthrough),
+                                         nn_results=StreamXout(out=self._comp.node.out),
+                                         bbox=BoundingBox()
+                                         ).set_comp_out(self)
+
+        class ImgManipOut(ComponentOutput):
+            def __call__(self, device: dai.Device) -> XoutBase:
+                return XoutFrames(StreamXout(out=self._comp.image_manip.out)).set_comp_out(self)
+
+        class InputOut(ComponentOutput):
+            def __call__(self, device: dai.Device) -> XoutBase:
+                return XoutFrames(StreamXout(out=self._comp._stream_input)).set_comp_out(self)
+
+        class SpatialOut(ComponentOutput):
+            def __call__(self, device: dai.Device) -> XoutSpatialBbMappings:
+                """
+                Streams depth and bounding box mappings (``SpatialDetectionNework.boundingBoxMapping``). Produces SpatialBbMappingPacket.
+                """
+                if not self._comp.is_spatial():
+                    raise Exception('SDK tried to output spatial data (depth + bounding box mappings),'
+                                    'but this is not a Spatial Detection network!')
+
+                return XoutSpatialBbMappings(
+                    device=device,
+                    stereo=self._comp._stereo_node,
+                    frames=StreamXout(out=self._comp.node.passthroughDepth),
+                    configs=StreamXout(out=self._comp.node.out),
+                    dispScaleFactor=depth_to_disp_factor(device, self._comp._stereo_node),
+                    bbox=self._comp.get_bbox()
+                ).set_comp_out(self)
+
+        class TwoStageOut(ComponentOutput):
+            def __call__(self, device: dai.Device) -> XoutFrames:
+                """
+                Streams 2. stage cropped frames to the host. Produces FramePacket.
+                """
+                if not self._comp.is_multi_stage():
+                    raise Exception(
+                        'SDK tried to output TwoStage crop frames, but this is not a Two-Stage NN component!')
+
+                return XoutFrames(frames=StreamXout(out=self._comp._multi_stage_nn.manip.out)).set_comp_out(self)
+
+        class TrackerOut(ComponentOutput):
+            def __call__(self, device: dai.Device) -> XoutTracker:
+                """
+                Streams ObjectTracker tracklets and high-res frames that were downscaled and used for inferencing. Produces TrackerPacket.
+                """
+                if not self._comp.is_tracker():
+                    raise Exception('Tracker was not enabled! Enable with cam.create_nn("[model]", tracker=True)!')
+
+                self._comp.node.passthrough.link(self._comp.tracker.inputDetectionFrame)
+                self._comp.node.out.link(self._comp.tracker.inputDetections)
+
+                # TODO: add support for full frame tracking
+                self._comp.node.passthrough.link(self._comp.tracker.inputTrackerFrame)
+
+                return XoutTracker(det_nn=self._comp,
+                                   frames=self._comp._input.get_stream_xout(),  # CameraComponent
                                    device=device,
-                                   input_queue_name="input_queue" if self._comp.x_in else None)
-            else:
-                det_nn_out = StreamXout(id=self._comp.node.id, out=self._comp.node.out, name=self._comp.name)
-                frames = StreamXout(id=self._comp.node.id, out=self._comp.node.passthrough, name=self._comp.name)
-
-                out = XoutNnResults(det_nn=self._comp,
-                                    frames=frames,
-                                    nn_results=det_nn_out)
-
-            return self._comp._create_xout(pipeline, out)
+                                   tracklets=StreamXout(self._comp.tracker.out),
+                                   bbox=self._comp.get_bbox(),
+                                   apply_kalman=self._comp.apply_tracking_filter,
+                                   forget_after_n_frames=self._comp.forget_after_n_frames,
+                                   calculate_speed=self._comp.calculate_speed,
+                                   ).set_comp_out(self)
+
+        class EncodedOut(MainOut):
+            def __call__(self, device: dai.Device) -> XoutNnResults:
+                """
+                Streams NN results and encoded frames (frames used for inferencing)
+                Produces DetectionPacket or TwoStagePacket (if it's 2. stage NNComponent).
+                """
+                # A bit hacky, maybe we can remove this alltogether
+                return super().__call__(device, fourcc=self._comp._get_camera_comp().get_fourcc())
+
+        class NnDataOut(ComponentOutput):
+            def __call__(self, device: dai.Device) -> XoutNnData:
+                node_output = self._comp.node.out if \
+                    type(self._comp.node) == dai.node.NeuralNetwork else \
+                    self._comp.node.outNetwork
+
+                return XoutNnData(xout=StreamXout(node_output)).set_comp_out(self)
 
-        def image_manip(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutBase:
-            out = XoutFrames(frames=StreamXout(id=self._comp.image_manip.id,
-                                               out=self._comp.image_manip.out,
-                                               name=self._comp.name))
-            return self._comp._create_xout(pipeline, out)
-
-        def input(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutBase:
-            out = XoutFrames(frames=StreamXout(id=self._comp._input.node.id,
-                                               out=self._comp._stream_input,
-                                               name=self._comp.name))
-            return self._comp._create_xout(pipeline, out)
-
-        def spatials(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutSpatialBbMappings:
-            """
-            Streams depth and bounding box mappings (``SpatialDetectionNework.boundingBoxMapping``). Produces SpatialBbMappingPacket.
-            """
-            if not self._comp._is_spatial():
-                raise Exception('SDK tried to output spatial data (depth + bounding box mappings),'
-                                'but this is not a Spatial Detection network!')
-
-            out = XoutSpatialBbMappings(
-                device=device,
-                stereo=self._comp._stereo_node,
-                frames=StreamXout(id=self._comp.node.id, out=self._comp.node.passthroughDepth, name=self._comp.name),
-                configs=StreamXout(id=self._comp.node.id, out=self._comp.node.out, name=self._comp.name)
-            )
-
-            return self._comp._create_xout(pipeline, out)
-
-        def twostage_crops(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutFrames:
-            """
-            Streams 2. stage cropped frames to the host. Produces FramePacket.
-            """
-            if not self._comp._is_multi_stage():
-                raise Exception('SDK tried to output TwoStage crop frames, but this is not a Two-Stage NN component!')
-
-            out = XoutFrames(frames=StreamXout(id=self._comp._multi_stage_nn.manip.id,
-                                               out=self._comp._multi_stage_nn.manip.out,
-                                               name=self._comp.name))
-
-            return self._comp._create_xout(pipeline, out)
-
-        def tracker(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutTracker:
-            """
-            Streams ObjectTracker tracklets and high-res frames that were downscaled and used for inferencing. Produces TrackerPacket.
-            """
-            if not self._comp._is_tracker():
-                raise Exception('Tracker was not enabled! Enable with cam.create_nn("[model]", tracker=True)!')
-
-            self._comp.node.passthrough.link(self._comp.tracker.inputDetectionFrame)
-            self._comp.node.out.link(self._comp.tracker.inputDetections)
-
-            # TODO: add support for full frame tracking
-            self._comp.node.passthrough.link(self._comp.tracker.inputTrackerFrame)
-
-            out = XoutTracker(det_nn=self._comp,
-                              frames=self._comp._input.get_stream_xout(),  # CameraComponent
-                              device=device,
-                              tracklets=StreamXout(self._comp.tracker.id, self._comp.tracker.out),
-                              apply_kalman=self._comp.apply_tracking_filter,
-                              forget_after_n_frames=self._comp.forget_after_n_frames,
-                              calculate_speed=self._comp.calculate_speed)
-
-            return self._comp._create_xout(pipeline, out)
-
-        def encoded(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutNnResults:
-            """
-            Streams NN results and encoded frames (frames used for inferencing)
-            Produces DetectionPacket or TwoStagePacket (if it's 2. stage NNComponent).
-            """
-            if self._comp._is_multi_stage():
-                input_nn = self._comp._input
-
-                if input_nn._input.encoder is None:
-                    raise Exception('Encoder not enabled for the input')
-
-                det_nn_out = StreamXout(id=self._comp._input.node.id,
-                                        out=self._comp._input.node.out,
-                                        name=self._comp._input.name)
-                frames = StreamXout(id=input_nn._input.encoder.id,
-                                    out=input_nn._input.encoder.bitstream,
-                                    name=self._comp.name)
-                second_nn_out = StreamXout(self._comp.node.id, self._comp.node.out, name=self._comp.name)
-
-                out = XoutTwoStage(det_nn=self._comp._input,
-                                   second_nn=self._comp,
-                                   frames=frames,
-                                   det_out=det_nn_out,
-                                   second_nn_out=second_nn_out,
-                                   device=device,
-                                   input_queue_name="input_queue" if self._comp.x_in else None)
-
-                return self._comp._create_xout(pipeline, out)
-
-            if self._comp._input.encoder is None:
-                raise Exception('Encoder not enabled for the input')
-
-            if self._comp._input._encoder_profile == dai.VideoEncoderProperties.Profile.MJPEG:
-                out = XoutNnMjpeg(
-                    det_nn=self._comp,
-                    frames=StreamXout(self._comp._input.encoder.id, self._comp._input.encoder.bitstream),
-                    nn_results=StreamXout(self._comp.node.id, self._comp.node.out),
-                    color=self._comp._input.is_color(),
-                    lossless=self._comp._input.encoder.getLossless(),
-                    fps=self._comp._input.encoder.getFrameRate(),
-                    frame_shape=self._comp._input.stream_size
-                )
-            else:
-                out = XoutNnH26x(
-                    det_nn=self._comp,
-                    frames=StreamXout(self._comp._input.node.id, self._comp._input.encoder.bitstream),
-                    nn_results=StreamXout(self._comp.node.id, self._comp.node.out),
-                    color=self._comp._input.is_color(),
-                    profile=self._comp._input._encoder_profile,
-                    fps=self._comp._input.encoder.getFrameRate(),
-                    frame_shape=self._comp._input.stream_size
-                )
-
-            return self._comp._create_xout(pipeline, out)
-
-        def nn_data(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutNnData:
-            if type(self._comp.node) == dai.node.NeuralNetwork:
-                out = XoutNnData(xout=StreamXout(self._comp.node.id, self._comp.node.out))
-            else:
-                out = XoutNnData(xout=StreamXout(self._comp.node.id, self._comp.node.outNetwork))
-            return self._comp._create_xout(pipeline, out)
+        def __init__(self, nn_component: 'NNComponent'):
+            self.main = self.MainOut(nn_component)
+            self.passthrough = self.PassThroughOut(nn_component)
+            self.image_manip = self.ImgManipOut(nn_component)
+            self.input = self.InputOut(nn_component)
+            self.spatials = self.SpatialOut(nn_component)
+            self.twostage_crops = self.TwoStageOut(nn_component)
+            self.tracker = self.TrackerOut(nn_component)
+            self.encoded = self.EncodedOut(nn_component)
+            self.nn_data = self.NnDataOut(nn_component)
 
     # Checks
-    def _is_spatial(self) -> bool:
+    def is_spatial(self) -> bool:
         return self._spatial is not None  # todo fix if spatial is bool and equals to False
 
-    def _is_tracker(self) -> bool:
+    def is_tracker(self) -> bool:
         # Currently, only object detectors are supported
-        return self._is_detector() and self.tracker is not None
+        return self.is_detector() and self.tracker is not None
 
-    def _is_yolo(self) -> bool:
+    def is_yolo(self) -> bool:
         return (
                 self._node_type == dai.node.YoloDetectionNetwork or
                 self._node_type == dai.node.YoloSpatialDetectionNetwork
         )
 
-    def _is_mobile_net(self) -> bool:
+    def is_mobile_net(self) -> bool:
         return (
                 self._node_type == dai.node.MobileNetDetectionNetwork or
                 self._node_type == dai.node.MobileNetSpatialDetectionNetwork
         )
 
-    def _is_detector(self) -> bool:
+    def is_detector(self) -> bool:
         """
         Currently these 2 object detectors are supported
         """
-        return self._is_yolo() or self._is_mobile_net()
+        return self.is_yolo() or self.is_mobile_net()
 
-    def _is_multi_stage(self):
+    def is_multi_stage(self):
         if not isinstance(self._input, type(self)):
             return False
 
diff --git a/depthai_sdk/src/depthai_sdk/components/nn_helper.py b/depthai_sdk/src/depthai_sdk/components/nn_helper.py
index 5b82be46e..ef2ebbb18 100644
--- a/depthai_sdk/src/depthai_sdk/components/nn_helper.py
+++ b/depthai_sdk/src/depthai_sdk/components/nn_helper.py
@@ -1,9 +1,9 @@
 import importlib
-from pathlib import Path
 import os
-from typing import Dict, Union, Optional, Tuple
+from pathlib import Path
+from typing import Dict, Union
+
 import requests
-import depthai as dai
 
 BLOBS_PATH = Path.home() / Path('.cache/blobs')
 
diff --git a/depthai_sdk/src/depthai_sdk/components/parser.py b/depthai_sdk/src/depthai_sdk/components/parser.py
index ed41296f6..fc1c2f018 100644
--- a/depthai_sdk/src/depthai_sdk/components/parser.py
+++ b/depthai_sdk/src/depthai_sdk/components/parser.py
@@ -30,6 +30,22 @@ def rgb_resolution(resolution: Union[
         return dai.ColorCameraProperties.SensorResolution.THE_1080_P
 
 
+def encoder_profile_to_fourcc(profile: dai.VideoEncoderProperties.Profile) -> str:
+    """
+    Converts encoder profile to fourcc string
+    """
+    if profile == dai.VideoEncoderProperties.Profile.MJPEG:
+        return 'mjpeg'
+    elif profile == dai.VideoEncoderProperties.Profile.H265_MAIN:
+        return 'hevc'
+    elif profile in [dai.VideoEncoderProperties.Profile.H264_BASELINE,
+                     dai.VideoEncoderProperties.Profile.H264_HIGH,
+                     dai.VideoEncoderProperties.Profile.H264_MAIN
+                     ]:
+        return 'h264'
+    raise ValueError(f'Unknown encoder profile: {profile}')
+
+
 def mono_resolution(resolution: Union[
     None, str, dai.MonoCameraProperties.SensorResolution]) -> dai.MonoCameraProperties.SensorResolution:
     """
@@ -70,6 +86,14 @@ def parse_bool(value: str) -> bool:
         raise ValueError(f"Couldn't parse '{value}' to bool!")
 
 
+def get_first_color_cam(device: dai.Device) -> dai.CameraBoardSocket:
+    for cam in device.getConnectedCameraFeatures():
+        if cam.supportedTypes[0] == dai.CameraSensorType.COLOR:
+            return cam.socket
+    # Default
+    return None
+
+
 def parse_camera_socket(value: Union[str, dai.CameraBoardSocket]) -> dai.CameraBoardSocket:
     if isinstance(value, dai.CameraBoardSocket):
         return value
@@ -94,6 +118,7 @@ def parse_camera_socket(value: Union[str, dai.CameraBoardSocket]) -> dai.CameraB
     else:
         raise ValueError(f"Camera socket name '{value}' not supported!")
 
+
 def parse_usb_speed(speed: Union[None, str, dai.UsbSpeed]) -> Optional[dai.UsbSpeed]:
     if speed is None:
         return None
diff --git a/depthai_sdk/src/depthai_sdk/components/pointcloud_component.py b/depthai_sdk/src/depthai_sdk/components/pointcloud_component.py
index 8ed847272..7e806ffc7 100644
--- a/depthai_sdk/src/depthai_sdk/components/pointcloud_component.py
+++ b/depthai_sdk/src/depthai_sdk/components/pointcloud_component.py
@@ -1,12 +1,9 @@
-import logging
-import warnings
-from typing import Optional, Union, Any, Dict, Tuple
+from typing import Optional, Union, Any
 
-import cv2
 import depthai as dai
-import numpy as np
+
 from depthai_sdk.components.camera_component import CameraComponent
-from depthai_sdk.components.component import Component
+from depthai_sdk.components.component import Component, ComponentOutput
 from depthai_sdk.components.stereo_component import StereoComponent
 from depthai_sdk.oak_outputs.xout.xout_base import XoutBase, StreamXout
 from depthai_sdk.oak_outputs.xout.xout_pointcloud import XoutPointcloud
@@ -21,34 +18,23 @@ def __init__(self,
                  stereo: Union[None, StereoComponent, dai.node.StereoDepth, dai.Node.Output] = None,
                  colorize: Optional[CameraComponent] = None,
                  replay: Optional[Replay] = None,
-                 args: Any = None,
-                 name: Optional[str] = None):
+                 args: Any = None):
         """
         Args:
             pipeline (dai.Pipeline): DepthAI pipeline
             replay (Replay object, optional): Replay
             args (Any, optional): Use user defined arguments when constructing the pipeline
-            name (str, optional): Name of the output stream
         """
         super().__init__()
         self.out = self.Out(self)
 
         self.stereo_depth_node: dai.node.StereoDepth
-        self.depth: dai.Node.Output # Depth node output
+        self.depth: dai.Node.Output  # Depth node output
 
         self.colorize_comp: Optional[CameraComponent] = colorize
 
-        self.name = name
-
         self._replay: Optional[Replay] = replay
 
-        # Colorization aspect
-        if colorize is None:
-            self.colorize_comp = CameraComponent(device, pipeline, source='color', replay=replay, args=args)
-
-        if isinstance(self.colorize_comp, CameraComponent):
-            self.colorize_comp.config_color_camera(isp_scale=(2,5))
-
         # Depth aspect
         if stereo is None:
             stereo = StereoComponent(device, pipeline, replay=replay, args=args)
@@ -62,8 +48,8 @@ def __init__(self,
             config.postProcessing.spatialFilter.enable = True
             config.postProcessing.spatialFilter.holeFillingRadius = 2
             config.postProcessing.spatialFilter.numIterations = 1
-            config.postProcessing.thresholdFilter.minRange = 400 # 40cm
-            config.postProcessing.thresholdFilter.maxRange = 20000 # 20m
+            config.postProcessing.thresholdFilter.minRange = 400  # 40cm
+            config.postProcessing.thresholdFilter.maxRange = 20000  # 20m
             config.postProcessing.decimationFilter.decimationFactor = 2
             config.postProcessing.decimationFilter.decimationMode = dai.RawStereoDepthConfig.PostProcessing.DecimationFilter.DecimationMode.NON_ZERO_MEDIAN
             stereo.node.initialConfig.set(config)
@@ -82,31 +68,22 @@ def __init__(self,
             self.stereo_depth_node = stereo.getParent()
             self.depth = stereo
 
-
-    def config_postprocessing(self,
-                              ) -> None:
+    def config_postprocessing(self) -> None:
         """
         Configures postprocessing options.
-
-        Args:
         """
-        pass
+        raise NotImplementedError("config_postprocessing() not yet implemented")
 
     class Out:
+        class PointcloudOut(ComponentOutput):
+            def __call__(self, device: dai.Device) -> XoutBase:
+                colorize = None
+                if self._comp.colorize_comp is not None:
+                    colorize = StreamXout(self._comp.colorize_comp.stream, name="Color")
+                return XoutPointcloud(device,
+                                      StreamXout(self._comp.depth),
+                                      color_frames=colorize).set_comp_out(self)
+
         def __init__(self, component: 'PointcloudComponent'):
-            self._comp = component
-
-        def main(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutBase:
-            return self.pointcloud(pipeline, device)
-
-        def pointcloud(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutBase:
-            colorize = None
-            if self._comp.colorize_comp is not None:
-                colorize = StreamXout(self._comp.colorize_comp.node.id, self._comp.colorize_comp.stream, name="Color")
-
-            out = XoutPointcloud(device,
-                                 StreamXout(self._comp.stereo_depth_node.id, self._comp.depth, name=self._comp.name),
-                                 color_frames=colorize,
-                                 fps=30
-                                 )
-            return self._comp._create_xout(pipeline, out)
+            self.pointcloud = self.PointcloudOut(component)
+            self.main = self.pointcloud
diff --git a/depthai_sdk/src/depthai_sdk/components/pointcloud_helper.py b/depthai_sdk/src/depthai_sdk/components/pointcloud_helper.py
index 7e3cac42f..56903452a 100644
--- a/depthai_sdk/src/depthai_sdk/components/pointcloud_helper.py
+++ b/depthai_sdk/src/depthai_sdk/components/pointcloud_helper.py
@@ -1,5 +1,6 @@
-import numpy as np
 import depthai as dai
+import numpy as np
+
 
 def create_xyz(device: dai.Device, width: int, height: int):
     calibData = device.readCalibration()
@@ -29,4 +30,3 @@ def create_xyz(device: dai.Device, width: int, height: int):
 
     xyz = np.stack([x_coord, y_coord], axis=-1)
     return np.pad(xyz, ((0, 0), (0, 0), (0, 1)), "constant", constant_values=1.0)
-
diff --git a/depthai_sdk/src/depthai_sdk/components/stereo_component.py b/depthai_sdk/src/depthai_sdk/components/stereo_component.py
index 7367c34b5..2b9d7de72 100644
--- a/depthai_sdk/src/depthai_sdk/components/stereo_component.py
+++ b/depthai_sdk/src/depthai_sdk/components/stereo_component.py
@@ -7,17 +7,15 @@
 import depthai as dai
 import numpy as np
 
-from depthai_sdk.components.camera_component import CameraComponent
+from depthai_sdk.components.camera_component import CameraComponent, ComponentOutput
 from depthai_sdk.components.component import Component
-from depthai_sdk.components.parser import parse_median_filter, parse_encode
+from depthai_sdk.components.parser import parse_median_filter, parse_encode, encoder_profile_to_fourcc
 from depthai_sdk.components.stereo_control import StereoControl
 from depthai_sdk.components.undistort import _get_mesh
 from depthai_sdk.oak_outputs.xout.xout_base import XoutBase, StreamXout
-from depthai_sdk.oak_outputs.xout.xout_depth import XoutDepth
+from depthai_sdk.oak_outputs.xout.xout_depth import XoutDisparityDepth
 from depthai_sdk.oak_outputs.xout.xout_disparity import XoutDisparity
 from depthai_sdk.oak_outputs.xout.xout_frames import XoutFrames
-from depthai_sdk.oak_outputs.xout.xout_h26x import XoutH26x
-from depthai_sdk.oak_outputs.xout.xout_mjpeg import XoutMjpeg
 from depthai_sdk.replay import Replay
 from depthai_sdk.visualize.configs import StereoColor
 from depthai_sdk.visualize.visualizer_helper import depth_to_disp_factor
@@ -45,25 +43,20 @@ def disparity(self) -> dai.Node.Output:
     def __init__(self,
                  device: dai.Device,
                  pipeline: dai.Pipeline,
-                 resolution: Union[None, str, dai.MonoCameraProperties.SensorResolution] = None,
-                 fps: Optional[float] = None,
-                 left: Union[None, CameraComponent, dai.node.MonoCamera] = None,  # Left mono camera
-                 right: Union[None, CameraComponent, dai.node.MonoCamera] = None,  # Right mono camera
+                 left: Union[CameraComponent, dai.node.MonoCamera],  # Left stereo camera
+                 right: Union[CameraComponent, dai.node.MonoCamera],  # Right stereo camera
                  replay: Optional[Replay] = None,
                  args: Any = None,
-                 name: Optional[str] = None,
                  encode: Union[None, str, bool, dai.VideoEncoderProperties.Profile] = None):
         """
         Args:
-            pipeline (dai.Pipeline): DepthAI pipeline
-            resolution (str/SensorResolution): If monochrome cameras aren't already passed, create them and set specified resolution
-            fps (float): If monochrome cameras aren't already passed, create them and set specified FPS
-            left (None / dai.None.Output / CameraComponent): Left mono camera source. Will get handled by Camera object.
-            right (None / dai.None.Output / CameraComponent): Right mono camera source. Will get handled by Camera object.
-            replay (Replay object, optional): Replay
-            args (Any, optional): Use user defined arguments when constructing the pipeline
-            name (str, optional): Name of the output stream
-            encode (str/bool/Profile, optional): Encode the output stream
+            device (dai.Device): DepthAI device.
+            pipeline (dai.Pipeline): DepthAI pipeline.
+            left (dai.None.Output / CameraComponent): Left mono camera source. Will get handled by Camera object.
+            right (dai.None.Output / CameraComponent): Right mono camera source. Will get handled by Camera object.
+            replay (Replay object, optional): Replay object to use for playback.
+            args (Any, optional): Use user defined arguments when constructing the pipeline.
+            encode (str/bool/Profile, optional): Encode the output stream.
         """
         super().__init__()
         self.out = self.Out(self)
@@ -78,10 +71,7 @@ def __init__(self,
 
         self._device = device
         self._replay: Optional[Replay] = replay
-        self._resolution: Optional[Union[str, dai.MonoCameraProperties.SensorResolution]] = resolution
-        self._fps: Optional[float] = fps
         self._args: Dict = args
-        self.name = name
 
         self.left = left
         self.right = right
@@ -120,25 +110,6 @@ def __init__(self,
             if len(device.getCameraSensorNames()) == 1:
                 raise Exception('OAK-1 camera does not have Stereo camera pair!')
 
-            # If not specified, default to 400P resolution for faster processing
-            self._resolution = self._resolution or dai.MonoCameraProperties.SensorResolution.THE_400_P
-
-            # Always use 1200p for OAK-D-LR and OAK-D-SR
-            if self._device.getDeviceName() == 'OAK-D-LR':
-                self._resolution = dai.MonoCameraProperties.SensorResolution.THE_1200_P
-
-            if not self.left: # Should never happen
-                self.left = CameraComponent(device, pipeline, 'left', self._resolution, self._fps, replay=self._replay)
-            if not self.right:
-                self.right = CameraComponent(device, pipeline, 'right', self._resolution, self._fps,
-                                             replay=self._replay)
-
-            # AR0234 outputs 1200p, so we need to resize it to 800p on RVC2
-            if self._device.getDeviceName() == 'OAK-D-LR':
-                if isinstance(self.left, CameraComponent) and isinstance(self.right, CameraComponent):
-                    self.left.config_color_camera(isp_scale=(2, 3))
-                    self.right.config_color_camera(isp_scale=(2, 3))
-
             if self._get_ir_drivers():
                 laser = self._args.get('irDotBrightness', None)
                 laser = laser if laser is not None else 800
@@ -158,6 +129,38 @@ def __init__(self,
         self._left_stream = self._get_output_stream(self.left)
         self._right_stream = self._get_output_stream(self.right)
 
+        # Check whether input stereo pairs are larger than 1280 pixels in width (limitation of the RVC2/RVC3).
+        # If that's the case, create ImageManip to downscale the streams.
+        downscale_manips = []
+        if isinstance(self.left, CameraComponent):
+            # Check whether input size width is larger than 1280
+            w, h = self.left.stream_size
+            if w > 1280:
+                manip = pipeline.create(dai.node.ImageManip)
+                new_h = int(h * (1280 / w))
+                manip.setResize(1280, new_h)
+                logging.info(f'Input frame size to stereo component was {w}x{h}, added downscalling to 1280x{new_h}')
+                manip.setMaxOutputFrameSize(1280 * new_h)
+                # Stereo works on GRAY8 frames
+                manip.setFrameType(dai.ImgFrame.Type.GRAY8)
+                self._left_stream.link(manip.inputImage)
+                self._left_stream = manip.out
+                downscale_manips.append(manip)
+        if isinstance(self.right, CameraComponent):
+            # Check whether input size width is larger than 1280
+            w, h = self.right.stream_size
+            if w > 1280:
+                manip = pipeline.create(dai.node.ImageManip)
+                new_h = int(h * (1280 / w))
+                manip.setResize(1280, new_h)
+                logging.info(f'Input frame size to stereo component was {w}x{h}, added downscalling to 1280x{new_h}')
+                manip.setMaxOutputFrameSize(1280 * new_h)
+                # Stereo works on GRAY8 frames
+                manip.setFrameType(dai.ImgFrame.Type.GRAY8)
+                self._right_stream.link(manip.inputImage)
+                self._right_stream = manip.out
+                downscale_manips.append(manip)
+
         if self._replay:  # Replay
             self._replay.initStereoDepth(self.node, left_name=self.left._source, right_name=self.right._source)
         else:
@@ -176,14 +179,14 @@ def __init__(self,
         self.node.setRectifyEdgeFillColor(0)
 
         if self._undistortion_offset is not None:
-            calibData = self._replay._calibData if self._replay else device.readCalibration()
+            calib_data = self._replay._calibData if self._replay else device.readCalibration()
             w_frame, h_frame = self._get_stream_size(self.left)
-            mapX_left, mapY_left, mapX_right, mapY_right = self._get_maps(w_frame, h_frame, calibData)
+            mapX_left, mapY_left, mapX_right, mapY_right = self._get_maps(w_frame, h_frame, calib_data)
             mesh_l = _get_mesh(mapX_left, mapY_left)
             mesh_r = _get_mesh(mapX_right, mapY_right)
-            meshLeft = list(mesh_l.tobytes())
-            meshRight = list(mesh_r.tobytes())
-            self.node.loadMeshData(meshLeft, meshRight)
+            mesh_left = list(mesh_l.tobytes())
+            mesh_right = list(mesh_r.tobytes())
+            self.node.loadMeshData(mesh_left, mesh_right)
 
         if self._args:
             self._config_stereo_args(self._args)
@@ -192,8 +195,8 @@ def __init__(self,
         self._control_xlink_in = pipeline.create(dai.node.XLinkIn)
         self._control_xlink_in.setStreamName(f"{self.node.id}_inputControl")
         self._control_xlink_in.out.link(self.node.inputConfig)
-        self._control_xlink_in.setMaxDataSize(
-            1)  # CameraControl message doesn't use any additional data (only metadata)
+        # CameraControl message doesn't use any additional data (only metadata)
+        self._control_xlink_in.setMaxDataSize(1)
 
     def on_pipeline_started(self, device: dai.Device):
         if self._control_xlink_in is not None:
@@ -261,7 +264,7 @@ def config_stereo(self,
         if confidence is not None: self.node.initialConfig.setConfidenceThreshold(confidence)
         if align is not None:
             self._align_component = align
-            self.node.setDepthAlign(align.node.getBoardSocket())
+            self.node.setDepthAlign(align._socket)
         if median is not None: self.node.setMedianFilter(parse_median_filter(median))
         if extended is not None: self.node.initialConfig.setExtendedDisparity(extended)
         if subpixel is not None: self.node.initialConfig.setSubpixel(subpixel)
@@ -389,12 +392,12 @@ def _get_disparity_factor(self, device: dai.Device) -> float:
         calib = device.readCalibration()
         baseline = calib.getBaselineDistance(useSpecTranslation=True) * 10  # mm
         intrinsics = calib.getCameraIntrinsics(dai.CameraBoardSocket.RIGHT, self.right.getResolutionSize())
-        focalLength = intrinsics[0][0]
+        focal_length = intrinsics[0][0]
         disp_levels = self.node.getMaxDisparity() / 95
-        return baseline * focalLength * disp_levels
+        return baseline * focal_length * disp_levels
 
     def _get_maps(self, width: int, height: int, calib: dai.CalibrationHandler):
-        imageSize = (width, height)
+        image_size = (width, height)
         M1 = np.array(calib.getCameraIntrinsics(calib.getStereoLeftCameraId(), width, height))
         M2 = np.array(calib.getCameraIntrinsics(calib.getStereoRightCameraId(), width, height))
         d1 = np.array(calib.getDistortionCoefficients(calib.getStereoLeftCameraId()))
@@ -413,98 +416,78 @@ def _get_maps(self, width: int, height: int, calib: dai.CalibrationHandler):
         M2[0][0] += self._undistortion_offset
         M2[1][1] += self._undistortion_offset
 
-        mapX_l, mapY_l = cv2.initUndistortRectifyMap(M1, d1, R1, M2, imageSize, cv2.CV_32FC1)
-        mapX_r, mapY_r = cv2.initUndistortRectifyMap(M2, d2, R2, M2, imageSize, cv2.CV_32FC1)
+        mapX_l, mapY_l = cv2.initUndistortRectifyMap(M1, d1, R1, M2, image_size, cv2.CV_32FC1)
+        mapX_r, mapY_r = cv2.initUndistortRectifyMap(M2, d2, R2, M2, image_size, cv2.CV_32FC1)
         return mapX_l, mapY_l, mapX_r, mapY_r
 
+    def get_fourcc(self) -> Optional[str]:
+        if self.encoder is None:
+            return None
+        return encoder_profile_to_fourcc(self._encoderProfile)
+
     """
     Available outputs (to the host) of this component
     """
 
+    def _mono_frames(self):
+        """
+        Create mono frames output if WLS filter is enabled or colorize is set to RGBD
+        """
+        mono_frames = None
+        if self.wls_config['enabled'] or self._colorize == StereoColor.RGBD:
+            mono_frames = StreamXout(self._right_stream)
+        return mono_frames
+
     class Out:
+        class DepthOut(ComponentOutput):
+            def __call__(self, device: dai.Device) -> XoutBase:
+                return XoutDisparityDepth(
+                    device=device,
+                    frames=StreamXout(self._comp.depth),
+                    dispScaleFactor=depth_to_disp_factor(device, self._comp.node),
+                    mono_frames=self._comp._mono_frames(),
+                    colorize=self._comp._colorize,
+                    colormap=self._comp._postprocess_colormap,
+                    ir_settings=self._comp.ir_settings
+                ).set_comp_out(self)
+
+        class DisparityOut(ComponentOutput):
+            def __call__(self, device: dai.Device, fourcc: Optional[str] = None) -> XoutBase:
+                return XoutDisparity(
+                    device=device,
+                    frames=StreamXout(self._comp.encoder.bitstream) if fourcc else
+                    StreamXout(self._comp.disparity),
+                    disp_factor=255.0 / self._comp.node.getMaxDisparity(),
+                    mono_frames=self._comp._mono_frames(),
+                    colorize=self._comp._colorize,
+                    colormap=self._comp._postprocess_colormap,
+                    wls_config=self._comp.wls_config,
+                    ir_settings=self._comp.ir_settings,
+                ).set_comp_out(self)
+
+        class RectifiedLeftOut(ComponentOutput):
+            def __call__(self, device: dai.Device) -> XoutBase:
+                return XoutFrames(StreamXout(self._comp.node.rectifiedLeft, 'Rectified left')).set_comp_out(self)
+
+        class RectifiedRightOut(ComponentOutput):
+            def __call__(self, device: dai.Device) -> XoutBase:
+                return XoutFrames(StreamXout(self._comp.node.rectifiedRight, 'Rectified right')).set_comp_out(self)
+
+        class EncodedOut(DisparityOut):
+            def __call__(self, device: dai.Device) -> XoutBase:
+                if not self._comp.encoder:
+                    raise RuntimeError('Encoder not enabled, cannot output encoded frames')
+                if self._comp.wls_config['enabled']:
+                    warnings.warn('WLS filter is enabled, but cannot be applied to encoded frames.')
+
+                return super().__call__(device, fourcc=self._comp.get_fourcc())
+
         def __init__(self, stereo_component: 'StereoComponent'):
             self._comp = stereo_component
 
-        def _mono_frames(self):
-            """
-            Create mono frames output if WLS filter is enabled or colorize is set to RGBD
-            """
-            mono_frames = None
-            if self._comp.wls_config['enabled'] or self._comp._colorize == StereoColor.RGBD:
-                mono_frames = StreamXout(self._comp.node.id, self._comp._right_stream, name=self._comp.name)
-            return mono_frames
-
-        def main(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutBase:
-            # By default, we want to show disparity
-            return self.depth(pipeline, device)
-
-        def disparity(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutBase:
-            fps = self._comp.left.get_fps() if self._comp._replay is None else self._comp._replay.get_fps()
-
-            out = XoutDisparity(
-                device=device,
-                frames=StreamXout(self._comp.node.id, self._comp.disparity, name=self._comp.name),
-                disp_factor=255.0 / self._comp.node.getMaxDisparity(),
-                fps=fps,
-                mono_frames=self._mono_frames(),
-                colorize=self._comp._colorize,
-                colormap=self._comp._postprocess_colormap,
-                wls_config=self._comp.wls_config,
-                ir_settings=self._comp.ir_settings,
-            )
-
-            return self._comp._create_xout(pipeline, out)
-
-        def rectified_left(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutBase:
-            fps = self._comp.left.get_fps() if self._comp._replay is None else self._comp._replay.get_fps()
-            out = XoutFrames(
-                frames=StreamXout(self._comp.node.id, self._comp.node.rectifiedLeft),
-                fps=fps)
-            out.name = 'Rectified left'
-            return self._comp._create_xout(pipeline, out)
-
-        def rectified_right(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutBase:
-            fps = self._comp.left.get_fps() if self._comp._replay is None else self._comp._replay.get_fps()
-            out = XoutFrames(
-                frames=StreamXout(self._comp.node.id, self._comp.node.rectifiedRight),
-                fps=fps)
-            out.name = 'Rectified right'
-            return self._comp._create_xout(pipeline, out)
-
-        def depth(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutBase:
-            fps = self._comp.left.get_fps() if self._comp._replay is None else self._comp._replay.get_fps()
-
-            out = XoutDepth(
-                device=device,
-                frames=StreamXout(self._comp.node.id, self._comp.depth, name=self._comp.name),
-                dispScaleFactor=depth_to_disp_factor(device, self._comp.node),
-                fps=fps,
-                mono_frames=self._mono_frames(),
-                colorize=self._comp._colorize,
-                colormap=self._comp._postprocess_colormap,
-                wls_config=self._comp.wls_config,
-                ir_settings=self._comp.ir_settings
-            )
-            return self._comp._create_xout(pipeline, out)
-
-        def encoded(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutBase:
-            if not self._comp.encoder:
-                raise RuntimeError('Encoder not enabled, cannot output encoded frames')
-
-            if self._comp.wls_config['enabled']:
-                warnings.warn('WLS filter is enabled, but cannot be applied to encoded frames.')
-
-            if self._comp._encoderProfile == dai.VideoEncoderProperties.Profile.MJPEG:
-                out = XoutMjpeg(frames=StreamXout(self._comp.encoder.id, self._comp.encoder.bitstream),
-                                color=self._comp.colormap is not None,
-                                lossless=self._comp.encoder.getLossless(),
-                                fps=self._comp.encoder.getFrameRate(),
-                                frame_shape=(1200, 800))
-            else:
-                out = XoutH26x(frames=StreamXout(self._comp.encoder.id, self._comp.encoder.bitstream),
-                               color=self._comp.colormap is not None,
-                               profile=self._comp._encoderProfile,
-                               fps=self._comp.encoder.getFrameRate(),
-                               frame_shape=(1200, 800))
-
-            return self._comp._create_xout(pipeline, out)
+            self.depth = self.DepthOut(stereo_component)
+            self.rectified_left = self.RectifiedLeftOut(stereo_component)
+            self.rectified_right = self.RectifiedRightOut(stereo_component)
+            self.disparity = self.DisparityOut(stereo_component)
+            self.encoded = self.EncodedOut(stereo_component)
+            self.main = self.depth
diff --git a/depthai_sdk/src/depthai_sdk/components/stereo_control.py b/depthai_sdk/src/depthai_sdk/components/stereo_control.py
index 372493d66..cf52e688b 100644
--- a/depthai_sdk/src/depthai_sdk/components/stereo_control.py
+++ b/depthai_sdk/src/depthai_sdk/components/stereo_control.py
@@ -14,9 +14,11 @@
     'illumination_led': (0, 1500),
 }
 
+
 def clamp(value, min_value, max_value):
     return max(min(value, max_value), min_value)
 
+
 class StereoControl:
     def __init__(self, device: dai.Device):
         self.queue = None
diff --git a/depthai_sdk/src/depthai_sdk/components/undistort.py b/depthai_sdk/src/depthai_sdk/components/undistort.py
index 56a2c2618..53d302d90 100644
--- a/depthai_sdk/src/depthai_sdk/components/undistort.py
+++ b/depthai_sdk/src/depthai_sdk/components/undistort.py
@@ -1,33 +1,33 @@
 import numpy as np
 
+
 def _get_mesh(mapX: np.ndarray, mapY: np.ndarray):
-    meshCellSize = 16
+    mesh_cell_size = 16
     mesh0 = []
-    # print(mapX.shape)
     # Creates subsampled mesh which will be loaded on to device to undistort the image
     for y in range(mapX.shape[0] + 1):  # iterating over height of the image
-        if y % meshCellSize == 0:
-            rowLeft = []
+        if y % mesh_cell_size == 0:
+            row_left = []
             for x in range(mapX.shape[1] + 1):  # iterating over width of the image
-                if x % meshCellSize == 0:
+                if x % mesh_cell_size == 0:
                     if y == mapX.shape[0] and x == mapX.shape[1]:
-                        rowLeft.append(mapY[y - 1, x - 1])
-                        rowLeft.append(mapX[y - 1, x - 1])
+                        row_left.append(mapY[y - 1, x - 1])
+                        row_left.append(mapX[y - 1, x - 1])
                     elif y == mapX.shape[0]:
-                        rowLeft.append(mapY[y - 1, x])
-                        rowLeft.append(mapX[y - 1, x])
+                        row_left.append(mapY[y - 1, x])
+                        row_left.append(mapX[y - 1, x])
                     elif x == mapX.shape[1]:
-                        rowLeft.append(mapY[y, x - 1])
-                        rowLeft.append(mapX[y, x - 1])
+                        row_left.append(mapY[y, x - 1])
+                        row_left.append(mapX[y, x - 1])
                     else:
-                        rowLeft.append(mapY[y, x])
-                        rowLeft.append(mapX[y, x])
-            if (mapX.shape[1] % meshCellSize) % 2 != 0:
-                rowLeft.append(0)
-                rowLeft.append(0)
+                        row_left.append(mapY[y, x])
+                        row_left.append(mapX[y, x])
+            if (mapX.shape[1] % mesh_cell_size) % 2 != 0:
+                row_left.append(0)
+                row_left.append(0)
 
-            mesh0.append(rowLeft)
+            mesh0.append(row_left)
 
     mesh0 = np.array(mesh0)
     # mesh = list(map(tuple, mesh0))
-    return mesh0
\ No newline at end of file
+    return mesh0
diff --git a/depthai_sdk/src/depthai_sdk/integrations/roboflow.py b/depthai_sdk/src/depthai_sdk/integrations/roboflow.py
index 1bc6fe60f..40a6e3919 100644
--- a/depthai_sdk/src/depthai_sdk/integrations/roboflow.py
+++ b/depthai_sdk/src/depthai_sdk/integrations/roboflow.py
@@ -1,7 +1,7 @@
 import json
 import logging
 from pathlib import Path
-from typing import Dict
+from typing import Dict, Optional
 from zipfile import ZipFile
 
 import depthai as dai
@@ -19,7 +19,7 @@ def __init__(self, config: Dict):
 
         self.config = config
 
-    def _file_with_ext(self, folder: Path, ext: str) -> Path:
+    def _file_with_ext(self, folder: Path, ext: str) -> Optional[Path]:
         files = list(folder.glob(f"*{ext}"))
         if 0 == len(files):
             return None
@@ -33,9 +33,9 @@ def device_update(self, device: dai.Device) -> Path:
         name = self.config['model'].replace('/', '_')  # '/' isn't valid folder name
 
         model_folder = ROBOFLOW_MODELS / name
-        jsonFile = self._file_with_ext(model_folder, '.json')
-        if jsonFile:
-            return jsonFile
+        json_file = self._file_with_ext(model_folder, '.json')
+        if json_file:
+            return json_file
 
         json_res = response.json()
         if "error" in json_res:
@@ -52,18 +52,18 @@ def device_update(self, device: dai.Device) -> Path:
 
         logging.info(f"Downloading '{ret['name']}' model from Roboflow server")
 
-        zipFileReq = requests.get(ret['model'])
-        zipFileReq.raise_for_status()
+        zip_file_req = requests.get(ret['model'])
+        zip_file_req.raise_for_status()
 
         (ROBOFLOW_MODELS / name).mkdir(parents=True, exist_ok=True)
-        zipFilePath = str(ROBOFLOW_MODELS / 'roboflow.zip')
+        zip_file_path = str(ROBOFLOW_MODELS / 'roboflow.zip')
         # Download the .zip where our model is
-        with open(zipFilePath, 'wb') as f:
-            f.write(zipFileReq.content)
+        with open(zip_file_path, 'wb') as f:
+            f.write(zip_file_req.content)
 
-        logging.info(f"Downloaded the model to {zipFilePath}")
+        logging.info(f"Downloaded the model to {zip_file_path}")
 
-        with ZipFile(zipFilePath, 'r') as zObject:  # Extract the zip
+        with ZipFile(zip_file_path, 'r') as zObject:  # Extract the zip
             zObject.extractall(str(ROBOFLOW_MODELS / name))
 
         # Rename bin/xml files
@@ -72,21 +72,20 @@ def device_update(self, device: dai.Device) -> Path:
 
         # Rename bin/xml paths inside the json
         new_json_name = str(model_folder / (name + ".json"))
-        jsonConf = json.load(self._file_with_ext(model_folder, ".json").open())
-        jsonConf['model']['xml'] = name + ".xml"
-        jsonConf['model']['bin'] = name + ".bin"
+        json_conf = json.load(self._file_with_ext(model_folder, ".json").open())
+        json_conf['model']['xml'] = name + ".xml"
+        json_conf['model']['bin'] = name + ".bin"
 
         # For some reason, Roboflow server provides incorrect json file, so we have to edit it
-        if 'output_format' in jsonConf:
-            jsonConf['nn_config']['output_format'] = jsonConf['output_format']
-            del jsonConf['output_format']
-        if 'NN_family' in jsonConf:
-            jsonConf['nn_config']['NN_family'] = jsonConf['NN_family']
-            del jsonConf['NN_family']
+        if 'output_format' in json_conf:
+            json_conf['nn_config']['output_format'] = json_conf['output_format']
+            del json_conf['output_format']
+        if 'NN_family' in json_conf:
+            json_conf['nn_config']['NN_family'] = json_conf['NN_family']
+            del json_conf['NN_family']
 
         # Overwrite the json file, and rename it
-        self._file_with_ext(model_folder, ".json").rename(new_json_name).open("w").write(json.dumps(jsonConf))
-
-        Path(zipFilePath).unlink()  # Delete .zip
+        self._file_with_ext(model_folder, ".json").rename(new_json_name).open("w").write(json.dumps(json_conf))
 
+        Path(zip_file_path).unlink()  # Delete .zip
         return Path(new_json_name)
diff --git a/depthai_sdk/src/depthai_sdk/integrations/ros/imu_interpolation.py b/depthai_sdk/src/depthai_sdk/integrations/ros/imu_interpolation.py
index f809dd9b9..6688b5f01 100644
--- a/depthai_sdk/src/depthai_sdk/integrations/ros/imu_interpolation.py
+++ b/depthai_sdk/src/depthai_sdk/integrations/ros/imu_interpolation.py
@@ -1,7 +1,8 @@
 from enum import Enum
 from typing import List
-import numpy as np
+
 import depthai as dai
+import numpy as np
 
 
 class ImuSyncMethod(Enum):
diff --git a/depthai_sdk/src/depthai_sdk/integrations/ros/ros2_streaming.py b/depthai_sdk/src/depthai_sdk/integrations/ros/ros2_streaming.py
index e05909495..f8d4a1a97 100644
--- a/depthai_sdk/src/depthai_sdk/integrations/ros/ros2_streaming.py
+++ b/depthai_sdk/src/depthai_sdk/integrations/ros/ros2_streaming.py
@@ -1,8 +1,10 @@
 import logging
+from queue import Queue
 from threading import Thread
 from typing import Dict, Any
-from queue import Queue
+
 import rclpy
+
 from depthai_sdk.integrations.ros.ros_base import RosBase
 
 
diff --git a/depthai_sdk/src/depthai_sdk/nn_models/deeplabv3_person/config.json b/depthai_sdk/src/depthai_sdk/nn_models/_deeplabv3_person/config.json
similarity index 100%
rename from depthai_sdk/src/depthai_sdk/nn_models/deeplabv3_person/config.json
rename to depthai_sdk/src/depthai_sdk/nn_models/_deeplabv3_person/config.json
diff --git a/depthai_sdk/src/depthai_sdk/nn_models/deeplabv3_person/handler.py b/depthai_sdk/src/depthai_sdk/nn_models/_deeplabv3_person/handler.py
similarity index 100%
rename from depthai_sdk/src/depthai_sdk/nn_models/deeplabv3_person/handler.py
rename to depthai_sdk/src/depthai_sdk/nn_models/_deeplabv3_person/handler.py
diff --git a/depthai_sdk/src/depthai_sdk/nn_models/openpose2/config.json b/depthai_sdk/src/depthai_sdk/nn_models/_openpose2/config.json
similarity index 66%
rename from depthai_sdk/src/depthai_sdk/nn_models/openpose2/config.json
rename to depthai_sdk/src/depthai_sdk/nn_models/_openpose2/config.json
index c5867bf80..e3c99219a 100644
--- a/depthai_sdk/src/depthai_sdk/nn_models/openpose2/config.json
+++ b/depthai_sdk/src/depthai_sdk/nn_models/_openpose2/config.json
@@ -1,8 +1,10 @@
 {
+    "model":{
+        "model_name": "openpose2"
+    },
     "nn_config": {
         "output_format" : "raw"
     },
-    "openvino_version": "2020_4",
     "handler": "handler.py",
     "version": 1
 }
diff --git a/depthai_sdk/src/depthai_sdk/nn_models/openpose2/handler.py b/depthai_sdk/src/depthai_sdk/nn_models/_openpose2/handler.py
similarity index 100%
rename from depthai_sdk/src/depthai_sdk/nn_models/openpose2/handler.py
rename to depthai_sdk/src/depthai_sdk/nn_models/_openpose2/handler.py
diff --git a/depthai_sdk/src/depthai_sdk/nn_models/openpose2/model.yml b/depthai_sdk/src/depthai_sdk/nn_models/_openpose2/model.yml
similarity index 100%
rename from depthai_sdk/src/depthai_sdk/nn_models/openpose2/model.yml
rename to depthai_sdk/src/depthai_sdk/nn_models/_openpose2/model.yml
diff --git a/depthai_sdk/src/depthai_sdk/nn_models/road-segmentation-adas-0001/config.json b/depthai_sdk/src/depthai_sdk/nn_models/_road-segmentation-adas-0001/config.json
similarity index 100%
rename from depthai_sdk/src/depthai_sdk/nn_models/road-segmentation-adas-0001/config.json
rename to depthai_sdk/src/depthai_sdk/nn_models/_road-segmentation-adas-0001/config.json
diff --git a/depthai_sdk/src/depthai_sdk/nn_models/road-segmentation-adas-0001/handler.py b/depthai_sdk/src/depthai_sdk/nn_models/_road-segmentation-adas-0001/handler.py
similarity index 100%
rename from depthai_sdk/src/depthai_sdk/nn_models/road-segmentation-adas-0001/handler.py
rename to depthai_sdk/src/depthai_sdk/nn_models/_road-segmentation-adas-0001/handler.py
diff --git a/depthai_sdk/src/depthai_sdk/nn_models/yolo-v3-tf/config.json b/depthai_sdk/src/depthai_sdk/nn_models/yolo-v3-tf/config.json
index 969eb5bce..781101bdd 100644
--- a/depthai_sdk/src/depthai_sdk/nn_models/yolo-v3-tf/config.json
+++ b/depthai_sdk/src/depthai_sdk/nn_models/yolo-v3-tf/config.json
@@ -22,6 +22,7 @@
             "confidence_threshold" : 0.5
         }
     },
+    "openvino_version": "2021.4",
     "mappings":
     {
         "labels":
diff --git a/depthai_sdk/src/depthai_sdk/oak_camera.py b/depthai_sdk/src/depthai_sdk/oak_camera.py
index 562f3aebc..d1e9fab51 100644
--- a/depthai_sdk/src/depthai_sdk/oak_camera.py
+++ b/depthai_sdk/src/depthai_sdk/oak_camera.py
@@ -1,4 +1,3 @@
-import copy
 import logging
 import time
 import warnings
@@ -6,6 +5,7 @@
 from typing import Dict, Any, Optional, List, Union, Callable
 
 from depthai_sdk import CV2_HAS_GUI_SUPPORT
+from depthai_sdk.types import Resolution
 from depthai_sdk.visualize.visualizer import Visualizer
 
 try:
@@ -17,19 +17,33 @@
 
 from depthai_sdk.trigger_action.actions.abstract_action import Action
 from depthai_sdk.args_parser import ArgsParser
-from depthai_sdk.classes.output_config import BaseConfig, RecordConfig, OutputConfig, SyncConfig, RosStreamConfig, TriggerActionConfig
+from depthai_sdk.classes.packet_handlers import (
+    BasePacketHandler,
+    QueuePacketHandler,
+    RosPacketHandler,
+    TriggerActionPacketHandler,
+    RecordPacketHandler,
+    CallbackPacketHandler,
+    VisualizePacketHandler
+)
+# RecordConfig, OutputConfig, SyncConfig, RosStreamConfig, TriggerActionConfig
 from depthai_sdk.components.camera_component import CameraComponent
-from depthai_sdk.components.component import Component
+from depthai_sdk.components.component import Component, ComponentOutput
 from depthai_sdk.components.imu_component import IMUComponent
 from depthai_sdk.components.nn_component import NNComponent
-from depthai_sdk.components.parser import parse_usb_speed, parse_camera_socket
+from depthai_sdk.components.parser import (
+    parse_usb_speed,
+    parse_camera_socket,
+    get_first_color_cam,
+    parse_open_vino_version
+)
 from depthai_sdk.components.stereo_component import StereoComponent
 from depthai_sdk.components.pointcloud_component import PointcloudComponent
-from depthai_sdk.oak_device import OakDevice
 from depthai_sdk.record import RecordType, Record
 from depthai_sdk.replay import Replay
 from depthai_sdk.trigger_action.triggers.abstract_trigger import Trigger
-from depthai_sdk.utils import configPipeline, report_crash_dump
+from depthai_sdk.utils import report_crash_dump
+
 
 
 class UsbWarning(UserWarning):
@@ -49,7 +63,7 @@ class OakCamera:
     def __init__(self,
                  device: Optional[str] = None,
                  usb_speed: Union[None, str, dai.UsbSpeed] = None,  # Auto by default
-                 replay: Optional[str] = None,
+                 replay: Union[None, str, Path] = None,
                  rotation: int = 0,
                  config: dai.Device.Config = None,
                  args: Union[bool, Dict] = True
@@ -70,20 +84,26 @@ def __init__(self,
 
         self.pipeline = dai.Pipeline()
         self._args: Optional[Dict[str, Any]] = None  # User defined arguments
-        self._oak = OakDevice()
+        self._pipeine_graph = None
 
         if args:
             if isinstance(args, bool):
                 self._args = ArgsParser.parseArgs()
-                # Set up the OakCamera
-                if self._args.get('recording', None):
-                    replay = self._args.get('recording', None)
-                if self._args.get('deviceId', None):
-                    device = self._args.get('deviceId', None)
-                if self._args.get('usbSpeed', None):
-                    usb_speed = parse_usb_speed(self._args.get('usbSpeed', None))
             else:  # Already parsed
                 self._args = args
+            # Set up the OakCamera
+            if self._args.get('recording', None):
+                replay = self._args.get('recording', None)
+            if self._args.get('deviceId', None):
+                device = self._args.get('deviceId', None)
+            if self._args.get('usbSpeed', None):
+                usb_speed = parse_usb_speed(self._args.get('usbSpeed', None))
+
+            self.config_pipeline(
+                xlink_chunk=self._args.get('xlinkChunkSize', None),
+                tuning_blob=self._args.get('cameraTuning', None),
+                openvino_version=self._args.get('openvinoVersion', None),
+            )
 
         if config is None:
             config = dai.Device.Config()
@@ -101,23 +121,22 @@ def __init__(self,
         self._polling = []
 
         self._components: List[Component] = []  # List of components
-        self._out_templates: List[BaseConfig] = []
+        self._packet_handlers: List[BasePacketHandler] = []
 
         self._rotation = rotation
-
         if replay is not None:
             self.replay = Replay(replay)
             self.replay.initPipeline(self.pipeline)
             logging.info(f'Available streams from recording: {self.replay.getStreams()}')
-    
+        self._calibration = self._init_calibration()
+
     def camera(self,
                source: Union[str, dai.CameraBoardSocket],
                resolution: Optional[Union[
                    str, dai.ColorCameraProperties.SensorResolution, dai.MonoCameraProperties.SensorResolution
-                   ]] = None,
+               ]] = None,
                fps: Optional[float] = None,
                encode: Union[None, str, bool, dai.VideoEncoderProperties.Profile] = None,
-               name: Optional[str] = None,
                ) -> CameraComponent:
         """
         Creates Camera component. This abstracts ColorCamera/MonoCamera nodes and supports mocking the camera when
@@ -129,36 +148,83 @@ def camera(self,
             resolution (str/SensorResolution): Sensor resolution of the camera.
             fps (float): Sensor FPS
             encode (bool/str/Profile): Whether we want to enable video encoding (accessible via cameraComponent.out_encoded). If True, it will use MJPEG
-            name (str): Name used to identify the X-out stream. This name will also be associated with the frame in the callback function.
         """
-        socket = source
+        sensor_type = None
         if isinstance(source, str):
-            socket = parse_camera_socket(source.split(",")[0])
+            if "," in source:  # For sensors that support multiple
+                parts = source.lower().split(',')
+                source = parts[0]
+                if parts[1] in ["c", "color"]:
+                    sensor_type = dai.CameraSensorType.COLOR
+                elif parts[1] in ["m", "mono"]:
+                    sensor_type = dai.CameraSensorType.MONO
+                else:
+                    raise Exception(
+                        "Please specify sensor type with c/color or m/mono after the ','"
+                        " - eg. `cam = oak.create_camera('cama,c')`"
+                    )
+
+            if source == 'left':
+                source = self._calibration.getStereoLeftCameraId()
+            elif source == 'right':
+                source = self._calibration.getStereoRightCameraId()
+            elif source in ['color', 'rgb']:
+                source = get_first_color_cam(self.device)
+            else:
+                source = parse_camera_socket(source)
+
+            if source in [None, dai.CameraBoardSocket.AUTO]:
+                return None  # There's no camera on this socket
+
         for comp in self._components:
-            if isinstance(comp, CameraComponent) and comp.node.getBoardSocket() == socket:
+            if isinstance(comp, CameraComponent) and comp._socket == source:
                 return comp
 
-        comp = CameraComponent(self._oak.device,
+        comp = CameraComponent(self.device,
                                self.pipeline,
                                source=source,
                                resolution=resolution,
                                fps=fps,
                                encode=encode,
+                               sensor_type=sensor_type,
                                rotation=self._rotation,
                                replay=self.replay,
-                               name=name,
                                args=self._args)
         self._components.append(comp)
         return comp
 
+    def _init_device(self,
+                     config: dai.Device.Config,
+                     device_str: Optional[str] = None,
+                     ) -> None:
+
+        """
+        Connect to the OAK camera
+        """
+        self.device = None
+        if device_str is not None:
+            device_info = dai.DeviceInfo(device_str)
+        else:
+            (found, device_info) = dai.Device.getFirstAvailableDevice()
+            if not found:
+                raise Exception("No OAK device found to connect to!")
+
+        self.device = dai.Device(
+            config=config,
+            deviceInfo=device_info,
+        )
+
+        # TODO test with usb3 (SUPER speed)
+        if config.board.usb.maxSpeed != dai.UsbSpeed.HIGH and self.device.getUsbSpeed() == dai.UsbSpeed.HIGH:
+            warnings.warn("Device connected in USB2 mode! This might cause some issues. "
+                          "In such case, please try using a (different) USB3 cable, "
+                          "or force USB2 mode 'with OakCamera(usb_speed='usb2') as oak:'", UsbWarning)
+
     def create_camera(self,
                       source: Union[str, dai.CameraBoardSocket],
-                      resolution: Optional[Union[
-                          str, dai.ColorCameraProperties.SensorResolution, dai.MonoCameraProperties.SensorResolution
-                      ]] = None,
+                      resolution: Optional[Resolution] = None,
                       fps: Optional[float] = None,
                       encode: Union[None, str, bool, dai.VideoEncoderProperties.Profile] = None,
-                      name: Optional[str] = None,
                       ) -> CameraComponent:
         """
         Deprecated, use camera() instead.
@@ -172,14 +238,13 @@ def create_camera(self,
             resolution (str/SensorResolution): Sensor resolution of the camera.
             fps (float): Sensor FPS
             encode (bool/str/Profile): Whether we want to enable video encoding (accessible via cameraComponent.out_encoded). If True, it will use MJPEG
-            name (str): Name used to identify the X-out stream. This name will also be associated with the frame in the callback function.
         """
-        return self.camera(source, resolution, fps, encode, name)
+        return self.camera(source, resolution, fps, encode)
 
     def all_cameras(self,
                     resolution: Optional[Union[
                         str, dai.ColorCameraProperties.SensorResolution, dai.MonoCameraProperties.SensorResolution
-                        ]] = None,
+                    ]] = None,
                     fps: Optional[float] = None,
                     encode: Union[None, str, bool, dai.VideoEncoderProperties.Profile] = None,
                     ) -> List[CameraComponent]:
@@ -193,16 +258,19 @@ def all_cameras(self,
         """
         components: List[CameraComponent] = []
         # Loop over all available camera sensors
-        for cam_sensor in self._oak.device.getConnectedCameraFeatures():
-            comp = CameraComponent(self._oak.device,
+        if self.replay:
+            sources = self.replay.getStreams()  # TODO handle in case the stream is not from a camera
+        else:
+            sources = [cam_sensor.socket for cam_sensor in self.device.getConnectedCameraFeatures()]
+        for source in sources:
+            comp = CameraComponent(self.device,
                                    self.pipeline,
-                                   source=cam_sensor.socket,
+                                   source=source,
                                    resolution=resolution,
                                    fps=fps,
                                    encode=encode,
                                    rotation=self._rotation,
                                    replay=self.replay,
-                                   name=None,
                                    args=self._args)
             components.append(comp)
 
@@ -211,9 +279,9 @@ def all_cameras(self,
 
     def create_all_cameras(self,
                            resolution: Optional[Union[
-                               str, dai.ColorCameraProperties.SensorResolution, 
+                               str, dai.ColorCameraProperties.SensorResolution,
                                dai.MonoCameraProperties.SensorResolution
-                               ]] = None,
+                           ]] = None,
                            fps: Optional[float] = None,
                            encode: Union[None, str, bool, dai.VideoEncoderProperties.Profile] = None,
                            ) -> List[CameraComponent]:
@@ -236,7 +304,6 @@ def create_nn(self,
                   tracker: bool = False,  # Enable object tracker - only for Object detection models
                   spatial: Union[None, bool, StereoComponent] = None,
                   decode_fn: Optional[Callable] = None,
-                  name: Optional[str] = None
                   ) -> NNComponent:
         """
         Creates Neural Network component.
@@ -248,9 +315,11 @@ def create_nn(self,
             tracker: Enable object tracker, if model is object detector (yolo/mobilenet)
             spatial: Calculate 3D spatial coordinates, if model is object detector (yolo/mobilenet) and depth stream is available
             decode_fn: Custom decoding function for the model's output
-            name (str): Name used to identify the X-out stream. This name will also be associated with the frame in the callback function.
         """
-        comp = NNComponent(self._oak.device,
+        if spatial and type(spatial) == bool:
+            spatial = self.stereo()
+
+        comp = NNComponent(self.device,
                            self.pipeline,
                            model=model,
                            input=input,
@@ -259,8 +328,7 @@ def create_nn(self,
                            spatial=spatial,
                            decode_fn=decode_fn,
                            replay=self.replay,
-                           args=self._args,
-                           name=name)
+                           args=self._args)
         self._components.append(comp)
         return comp
 
@@ -269,7 +337,6 @@ def stereo(self,
                fps: Optional[float] = None,
                left: Union[None, dai.Node.Output, CameraComponent] = None,  # Left mono camera
                right: Union[None, dai.Node.Output, CameraComponent] = None,  # Right mono camera
-               name: Optional[str] = None,
                encode: Union[None, str, bool, dai.VideoEncoderProperties.Profile] = None
                ) -> StereoComponent:
         """
@@ -280,23 +347,22 @@ def stereo(self,
             fps (float): If monochrome cameras aren't already passed, create them and set specified FPS
             left (CameraComponent/dai.node.MonoCamera): Pass the camera object (component/node) that will be used for stereo camera.
             right (CameraComponent/dai.node.MonoCamera): Pass the camera object (component/node) that will be used for stereo camera.
-            name (str): Name used to identify the X-out stream. This name will also be associated with the frame in the callback function.
             encode (bool/str/Profile): Whether we want to enable video encoding (accessible via StereoComponent.out.encoded). If True, it will use h264 codec.
         """
         if left is None:
-            left = self.camera(source=dai.CameraBoardSocket.LEFT, resolution=resolution, fps=fps)
+            left = self.camera(source="left", resolution=resolution, fps=fps)
         if right is None:
-            right = self.camera(source=dai.CameraBoardSocket.RIGHT, resolution=resolution, fps=fps)
+            right = self.camera(source="right", resolution=resolution, fps=fps)
+
+        if right is None or left is None:
+            return None
 
-        comp = StereoComponent(self._oak.device,
+        comp = StereoComponent(self.device,
                                self.pipeline,
-                               resolution=resolution,
-                               fps=fps,
                                left=left,
                                right=right,
                                replay=self.replay,
                                args=self._args,
-                               name=name,
                                encode=encode)
         self._components.append(comp)
         return comp
@@ -319,23 +385,21 @@ def create_stereo(self,
             fps (float): If monochrome cameras aren't already passed, create them and set specified FPS
             left (CameraComponent/dai.node.MonoCamera): Pass the camera object (component/node) that will be used for stereo camera.
             right (CameraComponent/dai.node.MonoCamera): Pass the camera object (component/node) that will be used for stereo camera.
-            name (str): Name used to identify the X-out stream. This name will also be associated with the frame in the callback function.
             encode (bool/str/Profile): Whether we want to enable video encoding (accessible via StereoComponent.out.encoded). If True, it will use h264 codec.
         """
-        return self.stereo(resolution, fps, left, right, name, encode)
+        return self.stereo(resolution, fps, left, right, encode)
 
     def create_imu(self) -> IMUComponent:
         """
         Create IMU component
         """
-        comp = IMUComponent(self._oak.device, self.pipeline)
+        comp = IMUComponent(self.device, self.pipeline)
         self._components.append(comp)
         return comp
 
     def create_pointcloud(self,
                           stereo: Union[None, StereoComponent, dai.node.StereoDepth, dai.Node.Output] = None,
                           colorize: Union[None, CameraComponent, dai.node.MonoCamera, dai.node.ColorCamera, dai.Node.Output, bool] = None,
-                          name: Optional[str] = None,
                           ) -> PointcloudComponent:
 
         if colorize is None:
@@ -349,43 +413,16 @@ def create_pointcloud(self,
                         colorize = component
 
         comp = PointcloudComponent(
-            self._oak.device,
+            self.device,
             self.pipeline,
             stereo=stereo,
             colorize=colorize,
             replay=self.replay,
             args=self._args,
-            name=name
         )
         self._components.append(comp)
         return comp
 
-    def _init_device(self,
-                     config: dai.Device.Config,
-                     device_str: Optional[str] = None,
-                     ) -> None:
-
-        """
-        Connect to the OAK camera
-        """
-        if device_str is not None:
-            device_info = dai.DeviceInfo(device_str)
-        else:
-            (found, device_info) = dai.Device.getFirstAvailableDevice()
-            if not found:
-                raise Exception("No OAK device found to connect to!")
-
-        self._oak.device = dai.Device(
-            config=config,
-            deviceInfo=device_info,
-        )
-
-        # TODO test with usb3 (SUPER speed)
-        if config.board.usb.maxSpeed != dai.UsbSpeed.HIGH and self._oak.device.getUsbSpeed() == dai.UsbSpeed.HIGH:
-            warnings.warn("Device connected in USB2 mode! This might cause some issues. "
-                          "In such case, please try using a (different) USB3 cable, "
-                          "or force USB2 mode 'with OakCamera(usbSpeed='usb2') as oak:'", UsbWarning)
-
     def config_pipeline(self,
                         xlink_chunk: Optional[int] = None,
                         calib: Optional[dai.CalibrationHandler] = None,
@@ -399,23 +436,38 @@ def config_pipeline(self,
         @param tuning_blob: Camera tuning blob
         @param openvino_version: Force specific OpenVINO version
         """
-        configPipeline(self.pipeline, xlink_chunk, calib, tuning_blob, openvino_version)
+        if xlink_chunk is not None:
+            self.pipeline.setXLinkChunkSize(xlink_chunk)
+        if calib is not None:
+            self.pipeline.setCalibrationData(calib)
+        if tuning_blob is not None:
+            self.pipeline.setCameraTuningBlobPath(tuning_blob)
+        ov_version = parse_open_vino_version(openvino_version)
+        if ov_version is not None:
+            self.pipeline.setOpenVINOVersion(ov_version)
 
     def __enter__(self):
         return self
 
     def __exit__(self, exc_type, exc_value, tb):
+        self.close()
+
+    def close(self):
         logging.info("Closing OAK camera")
         if self.replay:
-            logging.info("Closing replay")
             self.replay.close()
-        if self._oak.device is not None:
-            self._oak.device.close()
 
-        for out in self._out_templates:
-            if isinstance(out, RecordConfig):
-                out.rec.close()
-        self._oak.close()
+        for handler in self._packet_handlers:
+            handler.close()
+
+        self.device.close()
+
+    def _new_oak_msg(self, q_name: str, msg):
+        if self._stop:
+            return
+        if q_name in self._new_msg_callbacks:
+            for callback in self._new_msg_callbacks[q_name]:
+                callback(q_name, msg)
 
     def start(self, blocking=False):
         """
@@ -423,48 +475,51 @@ def start(self, blocking=False):
         Args:
             blocking (bool):  Continuously loop and call oak.poll() until program exits
         """
-        self.build()
-
-        # Remove unused nodes. There's a better way though.
-        # self._pipeline.
-        # schema = self._pipeline.serializeToJson()['pipeline']
-        # used_nodes = []
-        # for conn in schema['connections']:
-        #     print()
-        #     used_nodes.append(conn["node1Id"])
-        #     used_nodes.append(conn["node2Id"])
-        #
-        # for node in self._pipeline.getAllNodes():
-        #     if node.id not in used_nodes:
-        #         print(f"Removed node {node} (id: {node.id}) from the pipeline as it hasn't been used!")
-        #         self._pipeline.remove(node)
-
-        self._oak.device.startPipeline(self.pipeline)
-
-        self._oak.init_callbacks(self.pipeline)
+        self._new_msg_callbacks = {}
+        for node in self.pipeline.getAllNodes():
+            if isinstance(node, dai.node.XLinkOut):
+                self._new_msg_callbacks[node.getStreamName()] = []
+
+        for handler in self._packet_handlers:
+            # Setup PacketHandlers. This will:
+            # - Initialize all submodules (eg. Recording, Trigger/Actions, Visualizer)
+            # - Create XLinkIn nodes for all components/streams
+            handler.setup(self.pipeline, self.device, self._new_msg_callbacks)
+
+        # Upload the pipeline to the device and start it
+        self.device.startPipeline(self.pipeline)
+
+        for xlink_name in self._new_msg_callbacks:
+            try:
+                self.device.getOutputQueue(xlink_name, maxSize=4, blocking=False).addCallback(self._new_oak_msg)
+            # TODO: make this nicer, have self._new_msg_callbacks know whether it's replay or not
+            except Exception as e:
+                if self.replay:
+                    self.replay._add_callback(xlink_name, self._new_oak_msg)
+                else:
+                    raise e
+
+        # Append callbacks to be called from main thread
+        # self._polling.append()
+        if self._pipeine_graph is not None:
+            self._pipeine_graph.create_graph(self.pipeline.serializeToJson()['pipeline'], self.device)
+            logging.info('Pipeline graph process started')
 
         # Call on_pipeline_started() for each component
         for comp in self._components:
-            comp.on_pipeline_started(self._oak.device)
-
-        # Start FPS counters
-        for xout in self._oak.oak_out_streams:  # Start FPS counters
-            xout.start_fps()
+            comp.on_pipeline_started(self.device)
 
         if self.replay:
-            self.replay.createQueues(self._oak.device)
+            self.replay.createQueues(self.device)
+            self.replay.start()
             # Called from Replay module on each new frame sent to the device.
-            self.replay.start(self._oak.new_msg)
 
         # Check if callbacks (sync/non-sync are set)
         if blocking:
             # Constant loop: get messages, call callbacks
             while self.running():
-                time.sleep(0.001)
                 self.poll()
 
-            cv2.destroyAllWindows()
-
     def running(self) -> bool:
         """
         Check if camera is running.
@@ -479,18 +534,18 @@ def poll(self) -> Optional[int]:
 
         Returns: key pressed from cv2.waitKey, or None if
         """
+        # if self._stop:
+        #     return
         if CV2_HAS_GUI_SUPPORT:
             key = cv2.waitKey(1)
             if key == ord('q'):
                 self._stop = True
                 return key
         else:
+            time.sleep(0.001)
             key = -1
 
         # TODO: check if components have controls enabled and check whether key == `control`
-
-        self._oak.check_sync()
-
         if self.replay:
             if key == ord(' '):
                 self.replay.toggle_pause()
@@ -508,183 +563,133 @@ def poll(self) -> Optional[int]:
 
         return key
 
-    def build(self) -> dai.Pipeline:
-        """
-        Connect to the device and build the pipeline based on previously provided configuration. Configure XLink queues,
-        upload the pipeline to the device. This function must only be called once!  build() is also called by start().
-        Return:
-            Built dai.Pipeline
-        """
-        if self._built:
-            return
-        self._built = True
-
-        # First go through each component to check whether any is forcing an OpenVINO version
-        # TODO: check each component's SHAVE usage
-        for c in self._components:
-            ov = c.forced_openvino_version()
-            if ov:
-                if self.pipeline.getRequiredOpenVINOVersion() and self.pipeline.getRequiredOpenVINOVersion() != ov:
-                    raise Exception(
-                        'Two components forced two different OpenVINO version!'
-                        'Please make sure that all your models are compiled using the same OpenVINO version.'
-                    )
-                self.pipeline.setOpenVINOVersion(ov)
-
-        if self.pipeline.getRequiredOpenVINOVersion() is None:
-            # Force 2021.4 as it's better supported (blobconverter, compile tool) for now.
-            self.pipeline.setOpenVINOVersion(dai.OpenVINO.VERSION_2021_4)
-
-
-        # Create XLinkOuts based on visualizers/callbacks enabled
-
-        # TODO: clean this up and potentially move elsewhere
-        names = []
-        for out in self._out_templates:
-            xouts = out.setup(self.pipeline, self._oak.device, names)
-            self._oak.oak_out_streams.extend(xouts)
-
-        # User-defined arguments
-        if self._args:
-            self.config_pipeline(
-                xlink_chunk=self._args.get('xlinkChunkSize', None),
-                tuning_blob=self._args.get('cameraTuning', None),
-                openvino_version=self._args.get('openvinoVersion', None),
-            )
-
-        return self.pipeline
-
-    def _get_component_outputs(self, output: Union[List, Callable, Component]) -> List[Callable]:
-        if not isinstance(output, List):
-            output = [output]
-
-        for i in range(len(output)):
-            if isinstance(output[i], Component):
-                # Select default (main) output of the component
-                output[i] = output[i].out.main
-        return output
-
-    def sync(self, outputs: Union[Callable, List[Callable]], callback: Callable, visualize=False):
-        """
-        Synchronize multiple components outputs forward them to the callback.
-        Args:
-            outputs: Component output(s)
-            callback: Where to send synced streams
-            visualize: Whether to draw on the frames (like with visualize())
-        """
-        if isinstance(outputs, Callable):
-            outputs = [outputs]  # to list
-
-        self._out_templates.append(SyncConfig(outputs, callback))
+    def sync(self, outputs: Union[ComponentOutput, List[ComponentOutput]], callback: Callable, visualize=False):
+        raise DeprecationWarning('sync() is deprecated. Use callback() instead.')
 
     def record(self,
-               outputs: Union[Callable, List[Callable]],
+               outputs: Union[ComponentOutput, List[ComponentOutput]],
                path: str,
-               record_type: RecordType = RecordType.VIDEO):
+               record_type: RecordType = RecordType.VIDEO
+               ) -> RecordPacketHandler:
         """
         Record component outputs. This handles syncing multiple streams (eg. left, right, color, depth) and saving
         them to the computer in desired format (raw, mp4, mcap, bag..).
+
         Args:
-            outputs (Component/Component output): Component output(s) to be recorded
-            path: Folder path where to save these streams
-            record_type: Record type
+            outputs (Component/Component output): Component output(s) to be recorded.
+            path: Folder path where to save these streams.
+            record_type: Record type.
         """
-        record = Record(Path(path).resolve(), record_type)
-        self._out_templates.append(RecordConfig(self._get_component_outputs(outputs), record))
-        return record
+        handler = RecordPacketHandler(outputs, Record(Path(path).resolve(), record_type))
+        self._packet_handlers.append(handler)
+        return handler
 
     def show_graph(self):
         """
-        Shows DepthAI Pipeline graph, which can be useful when debugging. Builds the pipeline (oak.build()).
+        Shows DepthAI Pipeline graph, which can be useful when debugging. You must call this BEFORE calling the oak.start()!
         """
-        self.build()
-        from depthai_pipeline_graph.pipeline_graph import \
-            PipelineGraph
-
-        p = PipelineGraph()
-        p.create_graph(self.pipeline.serializeToJson()['pipeline'], self.device)
-        self._polling.append(p.update)
-        logging.info('Process started')
+        from depthai_pipeline_graph.pipeline_graph import PipelineGraph
+        self._pipeine_graph = PipelineGraph()
+        self._polling.append(self._pipeine_graph.update)
 
     def visualize(self,
-                  output: Union[List, Callable, Component],
+                  output: Union[List, ComponentOutput, Component],
                   record_path: Optional[str] = None,
                   scale: float = None,
                   fps=False,
-                  callback: Callable = None):
+                  callback: Callable = None,
+                  visualizer: str = 'opencv'
+                  ) -> Visualizer:
         """
         Visualize component output(s). This handles output streaming (OAK->host), message syncing, and visualizing.
+
         Args:
-            output (Component/Component output): Component output(s) to be visualized. If component is passed, SDK will visualize its default output (out())
-            record_path: Path where to store the recording (visualization window name gets appended to that path), supported formats: mp4, avi
-            scale: Scale the output window by this factor
-            fps: Whether to show FPS on the output window
-            callback: Instead of showing the frame, pass the Packet to the callback function, where it can be displayed
-        """
-        if record_path and isinstance(output, List):
-            if len(output) > 1:
-                raise ValueError('Recording visualizer is only supported for a single output.')
-            output = output[0]
-
-        visualizer = Visualizer(scale, fps)
-        return self._callback(output, callback, visualizer, record_path)
-
-    def _callback(self,
-                  output: Union[List, Callable, Component],
-                  callback: Callable,
-                  visualizer: Visualizer = None,
-                  record_path: Optional[str] = None):
-        if isinstance(output, List):
-            for element in output:
-                self._callback(element, callback, visualizer, record_path)
-            return visualizer
-
-        if isinstance(output, Component):
-            output = output.out.main
-
-        visualizer_enabled = visualizer is not None
-        if visualizer_enabled:
-            config = visualizer.config
-            visualizer = copy.deepcopy(visualizer) or Visualizer()
-            visualizer.config = config if config else visualizer.config
-
-        self._out_templates.append(OutputConfig(output, callback, visualizer, visualizer_enabled, record_path))
-        return visualizer
-
-    def callback(self, output: Union[List, Callable, Component], callback: Callable, enable_visualizer: bool = False):
+            output (Component/Component output): Component output(s) to be visualized. If component is passed, SDK will visualize its default output (out()).
+            record_path: Path where to store the recording (visualization window name gets appended to that path), supported formats: mp4, avi.
+            scale: Scale the output window by this factor.
+            fps: Whether to show FPS on the output window.
+            callback: Instead of showing the frame, pass the Packet to the callback function, where it can be displayed.
+            visualizer: Which visualizer to use. Options: 'opencv', 'depthai-viewer', 'robothub'.
+        """
+        main_thread = False
+        visualizer = visualizer.lower()
+        if visualizer in ['opencv', 'cv2']:
+            from depthai_sdk.visualize.visualizers.opencv_visualizer import OpenCvVisualizer
+            vis = OpenCvVisualizer(scale, fps)
+            main_thread = True  # OpenCV's imshow() requires to be called from the main thread
+        elif visualizer in ['depthai-viewer', 'depthai_viewer', 'viewer', 'depthai']:
+            from depthai_sdk.visualize.visualizers.viewer_visualizer import DepthaiViewerVisualizer
+            vis = DepthaiViewerVisualizer(scale, fps)
+        elif visualizer in ['robothub', 'rh']:
+            raise NotImplementedError('Robothub visualizer is not implemented yet')
+        else:
+            raise ValueError(f"Unknown visualizer: {visualizer}. Options: 'opencv'")
+
+        handler = VisualizePacketHandler(output,
+                                         vis,
+                                         callback=callback, record_path=record_path,
+                                         main_thread=main_thread)
+        self._packet_handlers.append(handler)
+
+        if main_thread:
+            self._polling.append(handler._poll)
+
+        return vis
+
+    def queue(self, output: Union[ComponentOutput, Component, List], max_size: int = 30) -> QueuePacketHandler:
         """
-        Create a callback for the component output(s). This handles output streaming (OAK->Host) and message syncing.
+        Create a queue for the component output(s). This handles output streaming (OAK->Host) and message syncing.
+
         Args:
             output: Component output(s) to be visualized. If component is passed, SDK will visualize its default output.
-            callback: Handler function to which the Packet will be sent.
-            enable_visualizer: Whether to enable visualizer for this output.
+            max_size: Maximum queue size for this output.
         """
-        self._callback(output, callback, Visualizer() if enable_visualizer else None)
+        handler = QueuePacketHandler(output, max_size)
+        self._packet_handlers.append(handler)
+        return handler
 
-    def ros_stream(self, output: Union[List, Callable, Component]):
-        self._out_templates.append(RosStreamConfig(self._get_component_outputs(output)))
-
-    def trigger_action(self, trigger: Trigger, action: Union[Action, Callable]):
-        self._out_templates.append(TriggerActionConfig(trigger, action))
-
-    def set_max_queue_size(self, size: int):
+    def callback(self,
+                 output: Union[List, Callable, Component],
+                 callback: Callable,
+                 main_thread=False
+                 ) -> CallbackPacketHandler:
         """
-        Set maximum queue size for all outputs. This is the maximum number of frames that can be stored in the queue.
+        Create a callback for the component output(s). This handles output streaming (OAK->Host) and message syncing.
+
         Args:
-            size: Maximum queue size for all outputs.
+            output: Component output(s) to be visualized. If component is passed, SDK will visualize its default output.
+            callback: Handler function to which the Packet will be sent.
+            main_thread: Whether to run the callback in the main thread. If False, it will call the callback in a separate thread, so some functions (eg. cv2.imshow) won't work.
         """
-        self._oak.set_max_queue_size(size)
+        handler = CallbackPacketHandler(output, callback=callback, main_thread=main_thread)
+        if main_thread:
+            self._polling.append(handler._poll)
+        self._packet_handlers.append(handler)
+        return handler
 
-    @property
-    def device(self) -> dai.Device:
+    def ros_stream(self, output: Union[List, ComponentOutput, Component]) -> RosPacketHandler:
         """
-        Returns dai.Device object. oak.built() has to be called before querying this property!
+        Publish component output(s) to ROS streams.
         """
-        return self._oak.device
+        handler = RosPacketHandler(output)
+        self._packet_handlers.append(handler)
+        return handler
+
+    def trigger_action(self, trigger: Trigger, action: Union[Action, Callable]) -> None:
+        self._packet_handlers.append(TriggerActionPacketHandler(trigger, action))
 
     @property
     def sensors(self) -> List[dai.CameraBoardSocket]:
         """
         Returns list of all sensors added to the pipeline.
         """
-        return self._oak.image_sensors
+        return self.device.getConnectedCameraFeatures()
+
+    def _init_calibration(self) -> dai.CalibrationHandler:
+        if self.replay:
+            calibration = self.pipeline.getCalibrationData()
+        else:
+            calibration = self.device.readCalibration()
+        if calibration is None:
+            logging.warning("No calibration data found on the device or in replay")
+        return calibration
diff --git a/depthai_sdk/src/depthai_sdk/oak_device.py b/depthai_sdk/src/depthai_sdk/oak_device.py
deleted file mode 100644
index 235d345ea..000000000
--- a/depthai_sdk/src/depthai_sdk/oak_device.py
+++ /dev/null
@@ -1,51 +0,0 @@
-from typing import List, Optional
-
-import depthai as dai
-
-from depthai_sdk.oak_outputs.xout.xout_base import XoutBase
-
-
-class OakDevice:
-    def __init__(self):
-        self.device: Optional[dai.Device] = None
-        # fpsHandlers: Dict[str, FPS] = dict()
-        self.oak_out_streams: List[XoutBase] = []
-        self.max_queue_size = 4
-
-    @property
-    def image_sensors(self) -> List[dai.CameraBoardSocket]:
-        """
-        Available imageSensors available on the camera
-        """
-        return self.device.getConnectedCameras()
-
-    @property
-    def info(self) -> dai.DeviceInfo:
-        return self.device.getDeviceInfo()
-
-    def init_callbacks(self, pipeline: dai.Pipeline):
-        for node in pipeline.getAllNodes():
-            if isinstance(node, dai.node.XLinkOut):
-                stream_name = node.getStreamName()
-                # self.fpsHandlers[name] = FPS()
-                self.device.getOutputQueue(stream_name, maxSize=self.max_queue_size, blocking=False).addCallback(
-                    lambda name, msg: self.new_msg(name, msg)
-                )
-
-    def new_msg(self, name, msg):
-        for sync in self.oak_out_streams:
-            sync.new_msg(name, msg)
-
-    def check_sync(self):
-        """
-        Checks whether there are new synced messages, non-blocking.
-        """
-        for sync in self.oak_out_streams:
-            sync.check_queue(block=False)  # Don't block!
-
-    def set_max_queue_size(self, size: int):
-        self.max_queue_size = size
-
-    def close(self):
-        for stream in self.oak_out_streams:
-            stream.close()
diff --git a/depthai_sdk/src/depthai_sdk/oak_outputs/syncing.py b/depthai_sdk/src/depthai_sdk/oak_outputs/syncing.py
index addcac847..6318e5cda 100644
--- a/depthai_sdk/src/depthai_sdk/oak_outputs/syncing.py
+++ b/depthai_sdk/src/depthai_sdk/oak_outputs/syncing.py
@@ -1,13 +1,14 @@
 import threading
-from typing import Dict, List, Any, Optional
+from datetime import timedelta
+from typing import Dict, Any, Optional
 
 
 class SequenceNumSync:
     """
-        msgs = {seqNum: {name: message}}
+        self.msgs = {seqNum: {name: message}}
         Example:
 
-        msgs = {
+        self.msgs = {
             '1': {
                 'rgb': dai.Frame(),
                 'dets': dai.ImgDetections(),
@@ -47,3 +48,44 @@ def sync(self, seq_num: int, name: str, msg) -> Optional[Dict]:
                 return ret
 
         return None
+
+
+class TimestampSync:
+    def __init__(self, stream_num: int, ms_threshold: int):
+        self.msgs: Dict[str, Any] = dict()
+        self.stream_num: int = stream_num
+        self.ms_threshold = ms_threshold
+
+    def sync(self, timestamp, name: str, msg):
+        if name not in self.msgs:
+            self.msgs[name] = []
+
+        self.msgs[name].append((timestamp, msg))
+
+        synced = {}
+        for name, arr in self.msgs.items():
+            # Go through all stored messages and calculate the time difference to the target msg.
+            # Then sort these self.msgs to find a msg that's closest to the target time, and check
+            # whether it's below 17ms which is considered in-sync.
+            diffs = []
+            for i, (msg_ts, msg) in enumerate(arr):
+                diffs.append(abs(msg_ts - timestamp))
+            if len(diffs) == 0:
+                break
+            diffs_sorted = diffs.copy()
+            diffs_sorted.sort()
+            dif = diffs_sorted[0]
+
+            if dif < timedelta(milliseconds=self.ms_threshold):
+                synced[name] = diffs.index(dif)
+
+        if len(synced) == self.stream_num:  # We have all synced streams
+            # Remove older self.msgs
+            for name, i in synced.items():
+                self.msgs[name] = self.msgs[name][i:]
+            ret = {}
+            for name, arr in self.msgs.items():
+                ts, synced_msg = arr.pop(0)
+                ret[name] = synced_msg
+            return ret
+        return None
diff --git a/depthai_sdk/src/depthai_sdk/oak_outputs/xout.py b/depthai_sdk/src/depthai_sdk/oak_outputs/xout.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/__init__.py b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/__init__.py
index b896e7159..e69de29bb 100644
--- a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/__init__.py
+++ b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/__init__.py
@@ -1,15 +0,0 @@
-try:
-    import cv2
-except ImportError:
-    cv2 = None
-
-
-class Clickable:
-    def __init__(self, decay_step: int = 30):
-        super().__init__()
-        self.buffer = None
-        self.decay_step = decay_step
-
-    def on_click_callback(self, event, x, y, flags, param) -> None:
-        if event == cv2.EVENT_MOUSEMOVE:
-            self.buffer = ([0, param[0][y, x], [x, y]])
diff --git a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_base.py b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_base.py
index 4ca3fa270..b4fabad61 100644
--- a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_base.py
+++ b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_base.py
@@ -1,21 +1,20 @@
-import traceback
-import warnings
 from abc import ABC, abstractmethod
-from queue import Empty, Queue
-from typing import List, Callable, Optional
+from typing import List, Optional, Callable
 
 import depthai as dai
 
-from depthai_sdk.oak_outputs.fps import FPS
+from depthai_sdk.classes.packets import FramePacket
+from depthai_sdk.components.component import ComponentOutput
 
 
 class StreamXout:
-    def __init__(self, id: int, out: dai.Node.Output, name: Optional[str] = None):
+    def __init__(self, out: dai.Node.Output, name: Optional[str] = None):
         self.stream = out
         if name is not None:
-            self.name = f'{name}_{str(out.name)}'
+            self.name = name
         else:
-            self.name = f"{str(id)}_{out.name}"
+            node = out.getParent()
+            self.name = f"{str(node.id)}_{out.name}"
 
 
 class ReplayStream(StreamXout):
@@ -26,90 +25,73 @@ def __init__(self, name: str):
 class XoutBase(ABC):
     def __init__(self) -> None:
         self._streams = [xout.name for xout in self.xstreams()]
-        self._visualizer = None
-        self._visualizer_enabled = False
         self._packet_name = None
-        self._fps = None
-        self.queue = None
-        self.callback = None
+        self._packet_name_postfix = None
+
+        # It will get assigned later inside the BasePacketHandler class
+        self.new_packet_callback: Callable = lambda x: None
 
     def get_packet_name(self) -> str:
         if self._packet_name is None:
             self._packet_name = ";".join([xout.name for xout in self.xstreams()])
-        return self._packet_name
+        return self._packet_name + (f'_{self._packet_name_postfix}' if self._packet_name_postfix else '')
+
+    def set_packet_name_postfix(self, postfix: str) -> None:
+        """
+        Set postfix to packet name.
+        """
+        self._packet_name_postfix = postfix
+
+    def set_comp_out(self, comp_out: ComponentOutput) -> 'XoutBase':
+        """
+        Set ComponentOutput to Xout.
+        """
+        if comp_out.name is None:
+            # If user hasn't specified component's output name, generate one
+            comp_out.name = self.get_packet_name()
+        else:
+            # Otherwise, set packet name to user-specified one
+            self._packet_name = comp_out.name
+        return self
 
     @abstractmethod
     def xstreams(self) -> List[StreamXout]:
         raise NotImplementedError()
 
-    def setup_base(self, callback: Callable):
-        # Gets called when initializing
-        self.queue = Queue(maxsize=10)
-        self.callback = callback
-
-    def start_fps(self):
-        self._fps = FPS()
+    def device_msg_callback(self, name, dai_message) -> None:
+        """
+        This is the (first) callback that gets called on a device message. Don't override it.
+        It will call `new_msg` and `on_callback` methods. If `new_msg` returns a packet, it will call
+        `new_packet` method.
+        """
+        # self._fps_counter[name].next_iter()
+        packet = self.new_msg(name, dai_message)
+        if packet is not None:
+            # If not list, convert to list.
+            # Some Xouts create multiple packets from a single message (example: IMU)
+            if not isinstance(packet, list):
+                packet = [packet]
+
+            for p in packet:
+                # In case we have encoded frames, we need to set the codec
+                if isinstance(p, FramePacket) and \
+                        hasattr(self, 'get_codec') and \
+                        self._fourcc is not None:
+                    p.set_decode_codec(self.get_codec)
+
+                self.on_callback(p)
+                self.new_packet_callback(p)
 
     @abstractmethod
     def new_msg(self, name: str, msg) -> None:
         raise NotImplementedError()
 
-    @abstractmethod
-    def visualize(self, packet) -> None:
-        raise NotImplementedError()
-
     def on_callback(self, packet) -> None:
         """
         Hook called when `callback` or `self.visualize` are used.
         """
         pass
 
-    def on_record(self, packet) -> None:
-        """
-        Hook called when `record_path` is used.
-        """
-        pass
-
-    def close(self) -> None:
-        """
-        Hook that will be called when exiting the context manager.
-        """
-        pass
-
-    # This approach is used as some functions (eg. imshow()) need to be called from
-    # main thread, and calling them from callback thread wouldn't work.
-    def check_queue(self, block=False) -> None:
-        """
-        Checks queue for any available messages. If available, call callback. Non-blocking by default.
-        """
-        try:
-            packet = self.queue.get(block=block)
-
-            if packet is not None:
-                self._fps.next_iter()
-
-                self.on_callback(packet)
-
-                if self._visualizer_enabled:
-                    try:
-                        self.visualize(packet)
-                    except Exception as e:
-                        warnings.warn(f'An error occurred while visualizing: {e}')
-                        traceback.print_exc()
-                else:
-                    # User defined callback
-                    try:
-                        self.callback(packet)
-                    except Exception as e:
-                        warnings.warn(f'An error occurred while calling callback: {e}')
-                        traceback.print_exc()
-
-                # Record after processing, so that user can modify the frame
-                self.on_record(packet)
-
-        except Empty:  # Queue empty
-            pass
-
     def fourcc(self) -> str:
         if self.is_mjpeg():
             return 'mjpeg'
@@ -124,25 +106,22 @@ def fourcc(self) -> str:
             return None
 
     def is_h265(self) -> bool:
-        if type(self).__name__ == 'XoutH26x':
-            # XoutH26x class has profile attribute
-            return self.profile == dai.VideoEncoderProperties.Profile.H265_MAIN
-        return False
+        fourcc = getattr(self, '_fourcc', None)
+        return fourcc is not None and fourcc.lower() == 'hevc'
 
     def is_h264(self) -> bool:
-        if type(self).__name__ == 'XoutH26x':
-            # XoutH26x class has profile attribute
-            return self.profile != dai.VideoEncoderProperties.Profile.H265_MAIN
-        return False
-
-    def is_h26x(self) -> bool:
-        return type(self).__name__ == 'XoutH26x'
+        fourcc = getattr(self, '_fourcc', None)
+        return fourcc is not None and fourcc.lower() == 'h264'
 
     def is_mjpeg(self) -> bool:
-        return type(self).__name__ == 'XoutMjpeg'
+        fourcc = getattr(self, '_fourcc', None)
+        return fourcc is not None and fourcc.lower() == 'mjpeg'
+
+    def is_h26x(self) -> bool:
+        return self.is_h264() or self.is_h265()
 
     def is_raw(self) -> bool:
-        return type(self).__name__ == 'XoutFrames'
+        return type(self).__name__ == 'XoutFrames' and self._fourcc is None
 
     def is_depth(self) -> bool:
         return type(self).__name__ == 'XoutDepth'
diff --git a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_depth.py b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_depth.py
index cf774ac50..fe65f4b2a 100644
--- a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_depth.py
+++ b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_depth.py
@@ -1,50 +1,40 @@
-from typing import Optional
+from typing import Optional, Dict
 
 import depthai as dai
-import numpy as np
 
-from depthai_sdk.classes.packets import DepthPacket
+from depthai_sdk.classes.packets import DisparityDepthPacket
 from depthai_sdk.oak_outputs.xout.xout_base import StreamXout
 from depthai_sdk.oak_outputs.xout.xout_disparity import XoutDisparity
 from depthai_sdk.visualize.configs import StereoColor
 
-try:
-    import cv2
-except ImportError:
-    cv2 = None
 
-
-class XoutDepth(XoutDisparity):
+class XoutDisparityDepth(XoutDisparity):
     def __init__(self,
                  device: dai.Device,
                  frames: StreamXout,
                  dispScaleFactor: float,
-                 fps: float,
                  mono_frames: Optional[StreamXout],
                  colorize: StereoColor = None,
                  colormap: int = None,
-                 wls_config: dict = None,
                  ir_settings: dict = None):
         self.name = 'Depth'
         super().__init__(device=device,
                          frames=frames,
                          disp_factor=255 / 95,
-                         fps=fps,
                          mono_frames=mono_frames,
                          colorize=colorize,
                          colormap=colormap,
-                         wls_config=wls_config,
                          ir_settings=ir_settings)
 
         self.disp_scale_factor = dispScaleFactor
 
-    def visualize(self, packet: DepthPacket):
-        # Convert depth to disparity for nicer visualization
-        packet.depth_map = packet.frame.copy()
-        with np.errstate(divide='ignore'):
-            disp = self.disp_scale_factor / packet.frame
-
-        disp[disp == np.inf] = 0
-
-        packet.frame = np.round(disp).astype(np.uint8)
-        super().visualize(packet)
+    def package(self, msgs: Dict) -> DisparityDepthPacket:
+        mono_frame = msgs[self.mono_frames.name] if self.mono_frames else None
+        return DisparityDepthPacket(
+            self.get_packet_name(),
+            msgs[self.frames.name],
+            colorize=self.colorize,
+            colormap=self.colormap,
+            mono_frame=mono_frame,
+            disp_scale_factor=self.disp_scale_factor,
+        )
diff --git a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_disparity.py b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_disparity.py
index e6e8239aa..80bf7f28f 100644
--- a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_disparity.py
+++ b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_disparity.py
@@ -2,15 +2,15 @@
 import logging
 import warnings
 from collections import defaultdict
-from typing import List, Optional
+from typing import List, Optional, Dict
 
 import depthai as dai
 import numpy as np
 
-from depthai_sdk.classes.packets import DepthPacket
-from depthai_sdk.oak_outputs.xout import Clickable
+from depthai_sdk.classes.packets import DisparityPacket
 from depthai_sdk.oak_outputs.xout.xout_base import StreamXout
 from depthai_sdk.oak_outputs.xout.xout_frames import XoutFrames
+from depthai_sdk.oak_outputs.xout.xout_seq_sync import XoutSeqSync
 from depthai_sdk.visualize.configs import StereoColor
 
 try:
@@ -19,26 +19,23 @@
     cv2 = None
 
 
-class XoutDisparity(XoutFrames, Clickable):
+class XoutDisparity(XoutSeqSync, XoutFrames):
     def __init__(self,
                  device: dai.Device,
                  frames: StreamXout,
                  disp_factor: float,
-                 fps: float,
                  mono_frames: Optional[StreamXout],
                  colorize: StereoColor = None,
                  colormap: int = None,
                  wls_config: dict = None,
                  ir_settings: dict = None):
         self.mono_frames = mono_frames
-        self.multiplier = disp_factor
-        self.fps = fps
         self.name = 'Disparity'
+        self.multiplier = disp_factor
         self.device = device
 
         self.colorize = colorize
         self.colormap = colormap
-        self.use_wls_filter = wls_config['enabled']
 
         self.ir_settings = ir_settings
         self._dot_projector_brightness = 0  # [0, 1200]
@@ -56,20 +53,20 @@ def __init__(self,
         self._X, self._y = [], []
 
         # Prefer to use WLS level if set, otherwise use lambda and sigma
-        wls_level = wls_config['level']
-        if wls_level and self.use_wls_filter:
+        self.use_wls_filter = wls_config['enabled'] if wls_config else False
+        if self.use_wls_filter:
+            wls_level = wls_config['level']
             logging.debug(
                 f'Using WLS level: {wls_level.name} (lambda: {wls_level.value[0]}, sigma: {wls_level.value[1]})'
             )
-            self.wls_lambda = wls_level.value[0]
-            self.wls_sigma = wls_level.value[1]
-        else:
-            self.wls_lambda = wls_config['lambda']
-            self.wls_sigma = wls_config['sigma']
+            self.wls_lambda = wls_level.value[0] or wls_config['lambda']
+            self.wls_sigma = wls_level.value[1] or wls_config['sigma']
 
-        if self.use_wls_filter:
             try:
                 self.wls_filter = cv2.ximgproc.createDisparityWLSFilterGeneric(False)
+                self.wls_filter.setLambda(self.wls_lambda)
+                self.wls_filter.setSigmaColor(self.wls_sigma)
+
             except AttributeError:
                 warnings.warn(
                     'OpenCV version does not support WLS filter. Disabling WLS filter. '
@@ -78,108 +75,48 @@ def __init__(self,
                 )
                 self.use_wls_filter = False
 
-        self.msgs = dict()
-
-        XoutFrames.__init__(self, frames=frames, fps=fps)
-        Clickable.__init__(self, decay_step=int(self.fps))
+        XoutFrames.__init__(self, frames=frames)
+        XoutSeqSync.__init__(self, [frames, mono_frames])
 
     def on_callback(self, packet) -> None:
         if self.ir_settings['auto_mode']:
-            self._auto_ir_search(packet.frame)
-
-    def visualize(self, packet: DepthPacket):
-        frame = packet.frame
-        disparity_frame = (frame * self.multiplier).astype(np.uint8)
-        try:
-            mono_frame = packet.mono_frame.getCvFrame()
-        except AttributeError:
-            mono_frame = None
-
-        stereo_config = self._visualizer.config.stereo
-
-        if self.use_wls_filter or stereo_config.wls_filter:
-            self.wls_filter.setLambda(self.wls_lambda or stereo_config.wls_lambda)
-            self.wls_filter.setSigmaColor(self.wls_sigma or stereo_config.wls_sigma)
-            disparity_frame = self.wls_filter.filter(disparity_frame, mono_frame)
-
-        colorize = self.colorize or stereo_config.colorize
-        if self.colormap is not None:
-            colormap = self.colormap
-        else:
-            colormap = stereo_config.colormap
-            colormap[0] = [0, 0, 0]  # Invalidate pixels 0 to be black
-
-        if mono_frame is not None and disparity_frame.ndim == 2 and mono_frame.ndim == 3:
-            disparity_frame = disparity_frame[..., np.newaxis]
-
-        if colorize == StereoColor.GRAY:
-            packet.frame = disparity_frame
-        elif colorize == StereoColor.RGB:
-            packet.frame = cv2.applyColorMap(disparity_frame, colormap)
-        elif colorize == StereoColor.RGBD:
-            packet.frame = cv2.applyColorMap(
-                (disparity_frame * 1.0 + mono_frame * 0.5).astype(np.uint8), colormap
-            )
-
-        if self._visualizer.config.output.clickable:
-            cv2.namedWindow(self.name)
-            cv2.setMouseCallback(self.name, self.on_click_callback, param=[disparity_frame])
-
-            if self.buffer:
-                x, y = self.buffer[2]
-                text = f'{self.buffer[1]}'  # Disparity value
-                if packet.depth_map is not None:
-                    text = f"{packet.depth_map[y, x] / 1000 :.2f} m"
-
-                self._visualizer.add_circle(coords=(x, y), radius=3, color=(255, 255, 255), thickness=-1)
-                self._visualizer.add_text(text=text, coords=(x, y - 10))
-
-        super().visualize(packet)
+            self._auto_ir_search(packet.msg.getFrame())
 
     def xstreams(self) -> List[StreamXout]:
         if self.mono_frames is None:
             return [self.frames]
         return [self.frames, self.mono_frames]
 
-    def new_msg(self, name: str, msg: dai.Buffer) -> None:
-        if name not in self._streams:
-            return  # From Replay modules. TODO: better handling?
-
-        # TODO: what if msg doesn't have sequence num?
-        seq = str(msg.getSequenceNum())
+    def package(self, msgs: Dict) -> DisparityPacket:
+        img_frame = msgs[self.frames.name]
+        mono_frame = msgs[self.mono_frames.name] if self.mono_frames else None
+        # TODO: refactor the mess below
+        packet = DisparityPacket(
+            self.get_packet_name(),
+            img_frame,
+            self.multiplier,
+            disparity_map=None,
+            colorize=self.colorize,
+            colormap=self.colormap,
+            mono_frame=mono_frame,
+        )
+        packet._get_codec = self.get_codec
+
+        if self._fourcc is None:
+            disparity_frame = img_frame.getFrame()
+        else:
+            disparity_frame = packet.decode()
+            if disparity_frame is None:
+                return None
 
-        if seq not in self.msgs:
-            self.msgs[seq] = dict()
+        if mono_frame and self.use_wls_filter:
+            # Perform WLS filtering
+            # If we have wls enabled, it means CV2 is installed
+            disparity_frame = self.wls_filter.filter(disparity_frame, mono_frame.getCvFrame())
 
-        if name == self.frames.name:
-            self.msgs[seq][name] = msg
-        elif name == self.mono_frames.name:
-            self.msgs[seq][name] = msg
-        else:
-            raise ValueError('Message from unknown stream name received by TwoStageSeqSync!')
-
-        if len(self.msgs[seq]) == len(self.xstreams()):
-            # Frames synced!
-            if self.queue.full():
-                self.queue.get()  # Get one, so queue isn't full
-
-            mono_frame = None
-            if self.mono_frames is not None:
-                mono_frame = self.msgs[seq][self.mono_frames.name]
-
-            packet = DepthPacket(
-                self.get_packet_name(),
-                img_frame=self.msgs[seq][self.frames.name],
-                mono_frame=mono_frame,
-                visualizer=self._visualizer
-            )
-            self.queue.put(packet, block=False)
+        packet.disparity_map = disparity_frame
 
-            new_msgs = {}
-            for name, msg in self.msgs.items():
-                if int(name) > int(seq):
-                    new_msgs[name] = msg
-            self.msgs = new_msgs
+        return packet
 
     def _auto_ir_search(self, frame: np.ndarray):
         # Perform neighbourhood search if we got worse metric values
@@ -202,7 +139,7 @@ def _ir_grid_search_iteration(self, frame: np.array, candidate_pairs: list = Non
         fill_rate = np.count_nonzero(frame) / frame.size
         self._metrics_buffer['fill_rate'].append(fill_rate)
 
-        if len(self._metrics_buffer['fill_rate']) < max(self.fps, 30):
+        if len(self._metrics_buffer['fill_rate']) < 30:
             return False
 
         if candidate_idx >= len(candidate_pairs):
@@ -225,7 +162,7 @@ def _ir_grid_search_iteration(self, frame: np.array, candidate_pairs: list = Non
             return False
 
         # Skip first half second of frames to allow for auto exposure to settle down
-        fill_rate_avg = np.mean(self._metrics_buffer['fill_rate'][int(self.fps // 2):])
+        fill_rate_avg = np.mean(self._metrics_buffer['fill_rate'][15:])
 
         self._X.append([self._dot_projector_brightness, self._flood_brightness])
         self._y.append(fill_rate_avg)
@@ -237,7 +174,7 @@ def _check_consistency(self, frame):
         fill_rate = np.count_nonzero(frame) / frame.size
         self._metrics_buffer['fill_rate'].append(fill_rate)
 
-        if len(self._metrics_buffer['fill_rate']) < max(self.fps, 30):
+        if len(self._metrics_buffer['fill_rate']) < 30:
             return
 
         fill_rate_avg = np.mean(self._metrics_buffer['fill_rate'])
diff --git a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_frames.py b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_frames.py
index 4f8971e85..38f17f8b2 100644
--- a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_frames.py
+++ b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_frames.py
@@ -1,13 +1,7 @@
-from typing import Tuple, List
-
-import numpy as np
+from typing import List, Optional
 
 from depthai_sdk.classes.packets import FramePacket
 from depthai_sdk.oak_outputs.xout.xout_base import XoutBase, StreamXout
-from depthai_sdk.recorders.video_recorder import VideoRecorder
-from depthai_sdk.recorders.video_writers import AvWriter
-from depthai_sdk.visualize.configs import TextPosition
-from depthai_sdk.visualize.visualizer import Platform, Visualizer
 
 try:
     import cv2
@@ -20,83 +14,44 @@ class XoutFrames(XoutBase):
     Stream of frames. Single message, no syncing required.
     """
 
-    def __init__(self, frames: StreamXout, fps: float = 30, frame_shape: Tuple[int, ...] = None):
+    def __init__(self,
+                 frames: StreamXout,
+                 fourcc: Optional[str] = None,  # 'mjpeg', 'h264', 'hevc'
+                 ):
         """
         Args:
             frames: StreamXout object.
-            fps: Frames per second for the output stream.
-            frame_shape: Shape of the frame. If not provided, it will be inferred from the first frame.
+            fourcc: Codec to use for encoding. If None, no encoding will be done.
         """
         self.frames = frames
         self.name = frames.name
-
-        self.fps = fps
-        self._video_recorder = None
-        self._is_recorder_enabled = None
-        self._frame_shape = frame_shape
+        self._fourcc = fourcc
+        self._codec = None
 
         super().__init__()
 
-    def setup_visualize(self,
-                        visualizer: Visualizer,
-                        visualizer_enabled: bool,
-                        name: str = None
-                        ) -> None:
-        self._visualizer = visualizer
-        self._visualizer_enabled = visualizer_enabled
-        self.name = name or self.name
-
-    def setup_recorder(self, recorder: VideoRecorder) -> None:
-        self._video_recorder = recorder
-
-    def visualize(self, packet: FramePacket) -> None:
-        # Frame shape may be 1D, that means it's an encoded frame
-        if self._visualizer.frame_shape is None or np.array(self._visualizer.frame_shape).ndim == 1:
-            if self._frame_shape is not None:
-                self._visualizer.frame_shape = self._frame_shape
-            else:
-                self._visualizer.frame_shape = packet.frame.shape
-
-        if self._visualizer.config.output.show_fps:
-            self._visualizer.add_text(
-                text=f'FPS: {self._fps.fps():.1f}',
-                position=TextPosition.TOP_LEFT
-            )
-
-        if self.callback:  # Don't display frame, call the callback
-            self.callback(packet)
-        else:
-            packet.frame = self._visualizer.draw(packet.frame)
-            # Draw on the frame
-            if self._visualizer.platform == Platform.PC:
-                cv2.imshow(self.name, packet.frame)
-            else:
-                pass
-
-    def on_record(self, packet) -> None:
-        if self._video_recorder:
-            if isinstance(self._video_recorder[self.name], AvWriter):
-                self._video_recorder.write(self.name, packet.msg)
-            else:
-                self._video_recorder.write(self.name, packet.frame)
+    def set_fourcc(self, fourcc: str) -> 'XoutFrames':
+        self._fourcc = fourcc
+        return self
 
     def xstreams(self) -> List[StreamXout]:
         return [self.frames]
 
-    def new_msg(self, name: str, msg) -> None:
+    def new_msg(self, name: str, msg):
         if name not in self._streams:
             return
-
-        if self.queue.full():
-            self.queue.get()  # Get one, so queue isn't full
-
-        packet = FramePacket(self.name or name,
-                             msg,
-                             msg.getCvFrame() if cv2 else None,
-                             self._visualizer)
-
-        self.queue.put(packet, block=False)
-
-    def close(self) -> None:
-        if self._video_recorder is not None:
-            self._video_recorder.close()
+        return FramePacket(self.get_packet_name(), msg)
+
+    def get_codec(self):
+        # No codec, frames are NV12/YUV/BGR, so we can just use imgFrame.getCvFrame()
+        if self._fourcc is None:
+            return None
+
+        if self._codec is None:
+            try:
+                import av
+            except ImportError:
+                raise ImportError('Attempted to decode an encoded frame, but av is not installed.'
+                                  ' Please install it with `pip install av`')
+            self._codec = av.CodecContext.create(self._fourcc, "r")
+        return self._codec
diff --git a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_h26x.py b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_h26x.py
deleted file mode 100644
index 5ace19d6e..000000000
--- a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_h26x.py
+++ /dev/null
@@ -1,57 +0,0 @@
-from typing import Tuple
-
-import depthai as dai
-
-from depthai_sdk.classes import FramePacket
-from depthai_sdk.oak_outputs.xout.xout_base import StreamXout
-from depthai_sdk.oak_outputs.xout.xout_frames import XoutFrames
-
-try:
-    import av
-except ImportError:
-    av = None
-
-
-class XoutH26x(XoutFrames):
-    def __init__(self,
-                 frames: StreamXout,
-                 color: bool,
-                 profile: dai.VideoEncoderProperties.Profile,
-                 fps: float,
-                 frame_shape: Tuple[int, ...]):
-        super().__init__(frames)
-        self.name = 'H26x Stream'
-        self.color = color
-        self.profile = profile
-        self.fps = fps
-        self._frame_shape = frame_shape
-        fourcc = 'hevc' if profile == dai.VideoEncoderProperties.Profile.H265_MAIN else 'h264'
-        self.codec = av.CodecContext.create(fourcc, "r") if av else None
-
-    def decode_frame(self, packet: FramePacket):
-        if not self.codec:
-            raise ImportError('av is not installed. Please install it with `pip install av`')
-
-        enc_packets = self.codec.parse(packet.msg.getData())
-        if len(enc_packets) == 0:
-            return None
-
-        frames = self.codec.decode(enc_packets[-1])
-        if not frames:
-            return None
-
-        frame = frames[0].to_ndarray(format='bgr24')
-
-        # If it's Mono, squeeze from 3 planes (height, width, 3) to single plane (height, width)
-        if not self.color:
-            frame = frame[:, :, 0]
-
-        return frame
-
-    def visualize(self, packet: FramePacket):
-        decoded_frame = self.decode_frame(packet)
-        if decoded_frame is None:
-            return
-
-        packet.frame = decoded_frame
-        super().visualize(packet)
diff --git a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_imu.py b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_imu.py
index cf9d016f5..2f9d7b650 100644
--- a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_imu.py
+++ b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_imu.py
@@ -2,128 +2,37 @@
 
 import depthai as dai
 import numpy as np
+from ahrs.filters import Mahony
 
 from depthai_sdk.classes import IMUPacket
 from depthai_sdk.oak_outputs.xout.xout_base import XoutBase, StreamXout
-from depthai_sdk.visualize.visualizer import Visualizer
-
-try:
-    import cv2
-except ImportError:
-    cv2 = None
 
 
 class XoutIMU(XoutBase):
-    def __init__(self, imu_xout: StreamXout):
+    def __init__(self, imu_xout: StreamXout, fps: int):
         self.imu_out = imu_xout
-        self.packets = []
-        self.start_time = 0.0
-
-        self.fig = None
-        self.axes = None
-        self.acceleration_lines = []
-        self.gyroscope_lines = []
-
-        self.acceleration_buffer = []
-        self.gyroscope_buffer = []
+        self._ahrs = Mahony(frequency=fps)
+        self._ahrs.Q = np.array([1, 0, 0, 0], dtype=np.float64)
 
         super().__init__()
         self.name = 'IMU'
 
-    def setup_visualize(self,
-                        visualizer: Visualizer,
-                        visualizer_enabled: bool,
-                        name: str = None, _=None):
-        from matplotlib import pyplot as plt
-
-        self._visualizer = visualizer
-        self._visualizer_enabled = visualizer_enabled
-        self.name = name or self.name
-
-        self.fig, self.axes = plt.subplots(2, 1, figsize=(10, 10), constrained_layout=True)
-        labels = ['x', 'y', 'z']
-        colors = ['r', 'g', 'b']
-
-        for i in range(3):
-            self.acceleration_lines.append(self.axes[0].plot([], [], label=f'Acceleration {labels[i]}', color=colors[i])[0])
-            self.axes[0].set_ylabel('Acceleration (m/s^2)')
-            self.axes[0].set_xlabel('Time (s)')
-            self.axes[0].legend()
-
-        for i in range(3):
-            self.gyroscope_lines.append(self.axes[1].plot([], [], label=f'Gyroscope {labels[i]}', color=colors[i])[0])
-            self.axes[1].set_ylabel('Gyroscope (rad/s)')
-            self.axes[1].set_xlabel('Time (s)')
-            self.axes[1].legend()
-
-    def visualize(self, packet: IMUPacket):
-        if self.start_time == 0.0:
-            self.start_time = packet.data[0].acceleroMeter.timestamp.get()
-
-        acceleration_x = [el.acceleroMeter.x for el in packet.data]
-        acceleration_y = [el.acceleroMeter.y for el in packet.data]
-        acceleration_z = [el.acceleroMeter.z for el in packet.data]
-
-        t_acceleration = [(el.acceleroMeter.timestamp.get() - self.start_time).total_seconds() for el in packet.data]
-
-        # Keep only last 100 values
-        if len(self.acceleration_buffer) > 100:
-            self.acceleration_buffer.pop(0)
-
-        self.acceleration_buffer.append([t_acceleration, acceleration_x, acceleration_y, acceleration_z])
-
-        gyroscope_x = [el.gyroscope.x for el in packet.data]
-        gyroscope_y = [el.gyroscope.y for el in packet.data]
-        gyroscope_z = [el.gyroscope.z for el in packet.data]
-
-        t_gyroscope = [(el.gyroscope.timestamp.get() - self.start_time).total_seconds() for el in packet.data]
-
-        # Keep only last 100 values
-        if len(self.gyroscope_buffer) > 100:
-            self.gyroscope_buffer.pop(0)
-
-        self.gyroscope_buffer.append([t_gyroscope, gyroscope_x, gyroscope_y, gyroscope_z])
-
-        # Plot acceleration
-        for i in range(3):
-            self.acceleration_lines[i].set_xdata([el[0] for el in self.acceleration_buffer])
-            self.acceleration_lines[i].set_ydata([el[i + 1] for el in self.acceleration_buffer])
-
-        self.axes[0].set_xlim(self.acceleration_buffer[0][0][0], t_acceleration[-1])
-        self.axes[0].set_ylim(-20, 20)
-
-        # Plot gyroscope
-        for i in range(3):
-            self.gyroscope_lines[i].set_xdata([el[0] for el in self.gyroscope_buffer])
-            self.gyroscope_lines[i].set_ydata([el[i + 1] for el in self.gyroscope_buffer])
-
-        self.axes[1].set_xlim(self.gyroscope_buffer[0][0][0], t_acceleration[-1])
-        self.axes[1].set_ylim(-20, 20)
-
-        self.fig.canvas.draw()
-
-        # Convert plot to numpy array
-        img = np.fromstring(self.fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
-        img = img.reshape(self.fig.canvas.get_width_height()[::-1] + (3,))
-        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
-
-        packet.frame = img
-
-        if self.callback:  # Don't display frame, call the callback
-            self.callback(packet)
-        else:
-            cv2.imshow(self.name, packet.frame)
-
     def xstreams(self) -> List[StreamXout]:
         return [self.imu_out]
 
-    def new_msg(self, name: str, msg: dai.IMUData) -> None:
+    def new_msg(self, name: str, msg: dai.IMUData):
         if name not in self._streams:
             return
 
-        if self.queue.full():
-            self.queue.get()  # Get one, so queue isn't full
-
-        packet = IMUPacket(msg.packets)
-
-        self.queue.put(packet, block=False)
+        arr = []
+        for packet in msg.packets:
+            gyro_vals = np.array([packet.gyroscope.z, packet.gyroscope.x, packet.gyroscope.y])
+            accelero_vals = np.array([packet.acceleroMeter.z, packet.acceleroMeter.x, packet.acceleroMeter.y])
+            self._ahrs.Q = self._ahrs.updateIMU(self._ahrs.Q, gyro_vals, accelero_vals)
+            rotation = dai.IMUReportRotationVectorWAcc()
+            rotation.i = self._ahrs.Q[0]
+            rotation.j = self._ahrs.Q[1]
+            rotation.k = self._ahrs.Q[2]
+            rotation.real = self._ahrs.Q[3]
+            arr.append(IMUPacket(self.get_packet_name(), packet, rotation=rotation))
+        return arr
diff --git a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_mjpeg.py b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_mjpeg.py
deleted file mode 100644
index b2611c566..000000000
--- a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_mjpeg.py
+++ /dev/null
@@ -1,34 +0,0 @@
-from typing import Tuple
-
-import numpy as np
-
-from depthai_sdk.classes.packets import FramePacket
-from depthai_sdk.oak_outputs.xout.xout_base import StreamXout
-from depthai_sdk.oak_outputs.xout.xout_frames import XoutFrames
-
-try:
-    import cv2
-except ImportError:
-    cv2 = None
-
-
-class XoutMjpeg(XoutFrames):
-    name: str = "MJPEG Stream"
-
-    def __init__(self, frames: StreamXout, color: bool, lossless: bool, fps: float, frame_shape: Tuple[int, ...]):
-        super().__init__(frames)
-        # We could use cv2.IMREAD_UNCHANGED, but it produces 3 planes (RGB) for mono frame instead of a single plane
-        self.flag = cv2.IMREAD_COLOR if color else cv2.IMREAD_GRAYSCALE
-        self.lossless = lossless
-        self.fps = fps
-        self._frame_shape = frame_shape
-
-        if lossless and self._visualizer:
-            raise ValueError('Visualizing Lossless MJPEG stream is not supported!')
-
-    def decode_frame(self, packet: FramePacket) -> np.ndarray:
-        return cv2.imdecode(packet.msg.getData(), self.flag)
-
-    def visualize(self, packet: FramePacket):
-        packet.frame = self.decode_frame(packet)
-        super().visualize(packet)
diff --git a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_nn.py b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_nn.py
index 64641493c..db04f30fe 100644
--- a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_nn.py
+++ b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_nn.py
@@ -2,47 +2,41 @@
 from typing import List, Union, Dict, Any, Optional, Tuple
 
 import depthai as dai
-import numpy as np
 
 from depthai_sdk.classes import Detections, ImgLandmarks, SemanticSegmentation
 from depthai_sdk.classes.enum import ResizeMode
 from depthai_sdk.classes.packets import (
-    _Detection, DetectionPacket, TrackerPacket, SpatialBbMappingPacket, TwoStagePacket, NNDataPacket
+    Detection,
+    DetectionPacket,
+    ImgLandmarksPacket,
+    NnOutputPacket,
+    SemanticSegmentationPacket,
+    SpatialBbMappingPacket,
+    TwoStagePacket,
+    NNDataPacket
 )
-from depthai_sdk.classes.enum import ResizeMode
+from depthai_sdk.oak_outputs.syncing import SequenceNumSync
 from depthai_sdk.oak_outputs.xout.xout_base import XoutBase, StreamXout
+from depthai_sdk.oak_outputs.xout.xout_depth import XoutDisparityDepth
 from depthai_sdk.oak_outputs.xout.xout_frames import XoutFrames
 from depthai_sdk.oak_outputs.xout.xout_seq_sync import XoutSeqSync
-from depthai_sdk.visualize.visualizer import Visualizer
-from depthai_sdk.visualize.visualizer_helper import hex_to_bgr, colorize_disparity, draw_mappings, depth_to_disp_factor
+from depthai_sdk.types import XoutNNOutputPacket
 from depthai_sdk.visualize.bbox import BoundingBox
-from depthai_sdk.visualize.colors import generate_colors
-try:
-    import cv2
-except ImportError:
-    cv2 = None
+from depthai_sdk.visualize.colors import generate_colors, hex_to_bgr
+
 
 class XoutNnData(XoutBase):
     def __init__(self, xout: StreamXout):
         self.nndata_out = xout
         super().__init__()
-        self.name = 'NNData'
-
-    def visualize(self, packet: NNDataPacket):
-        print('Visualization of NNData is not supported')
 
     def xstreams(self) -> List[StreamXout]:
         return [self.nndata_out]
 
-    def new_msg(self, name: str, msg: dai.NNData) -> None:
+    def new_msg(self, name: str, msg: dai.NNData) -> NNDataPacket:
         if name not in self._streams:
             return
-
-        if self.queue.full():
-            self.queue.get()  # Get one, so queue isn't full
-
-        packet = NNDataPacket(name=self.name, nn_data=msg)
-        self.queue.put(packet, block=False)
+        return NNDataPacket(name=self.get_packet_name(), nn_data=msg)
 
 
 class XoutNnResults(XoutSeqSync, XoutFrames):
@@ -52,7 +46,8 @@ def xstreams(self) -> List[StreamXout]:
     def __init__(self,
                  det_nn: 'NNComponent',
                  frames: StreamXout,
-                 nn_results: StreamXout):
+                 nn_results: StreamXout,
+                 bbox: BoundingBox):
         self.det_nn = det_nn
         self.nn_results = nn_results
 
@@ -61,6 +56,7 @@ def __init__(self,
 
         self.name = 'NN results'
         self.labels = None
+        self.bbox = bbox
 
         # TODO: add support for colors, generate new colors for each label that doesn't have colors
         if det_nn._labels:
@@ -84,176 +80,85 @@ def __init__(self,
         self._resize_mode: ResizeMode = det_nn._ar_resize_mode
         self._nn_size: Tuple[int, int] = det_nn._size
 
-        self.segmentation_colormap = None
-
-    def setup_visualize(self,
-                        visualizer: Visualizer,
-                        visualizer_enabled: bool,
-                        name: str = None):
-        super().setup_visualize(visualizer, visualizer_enabled, name)
-
-    def on_callback(self, packet: Union[DetectionPacket, TrackerPacket]):
-        # Convert Grayscale to BGR
-        if len(packet.frame.shape) == 2:
-            packet.frame = np.dstack((packet.frame, packet.frame, packet.frame))
-
-        frame_shape = self.det_nn._input.stream_size[::-1]
-
-        if self._frame_shape is None:
-            # Lazy-load the frame shape
-            self._frame_shape = np.array([*frame_shape])
-            if self._visualizer:
-                self._visualizer.frame_shape = self._frame_shape
-
-        bbox = BoundingBox().resize_to_aspect_ratio(self._frame_shape, self._nn_size, self._resize_mode)
-
-        # Add detections to packet
-        if isinstance(packet.img_detections, dai.ImgDetections) \
-                or isinstance(packet.img_detections, dai.SpatialImgDetections) \
-                or isinstance(packet.img_detections, Detections):
-
-            for detection in packet.img_detections.detections:
-                d = _Detection()
-                d.img_detection = detection
-                d.label = self.labels[detection.label][0] if self.labels else str(detection.label)
-                d.color = self.labels[detection.label][1] if self.labels else (255, 255, 255)
-
-                d.top_left, d.bottom_right = bbox.get_relative_bbox(BoundingBox(detection)).denormalize(self._frame_shape)
-                packet.detections.append(d)
-
-            if self._visualizer:
-                # Add detections to visualizer
-                self._visualizer.add_detections(
-                    packet.img_detections.detections,
-                    bbox,
-                    self.labels,
-                    is_spatial=packet._is_spatial_detection()
-                )
-        elif isinstance(packet.img_detections, ImgLandmarks):
-            if not self._visualizer:
-                return
-
-            all_landmarks = packet.img_detections.landmarks
-            all_landmarks_indices = packet.img_detections.landmarks_indices
-            colors = packet.img_detections.colors
-            for landmarks, indices in zip(all_landmarks, all_landmarks_indices):
-                for i, landmark in enumerate(landmarks):
-                    # Map normalized coordinates to frame coordinates
-                    l = [(int(point[0] * self._frame_shape[1]), int(point[1] * self._frame_shape[0])) for point in landmark]
-                    idx = indices[i]
-
-                    self._visualizer.add_line(pt1=tuple(l[0]), pt2=tuple(l[1]), color=colors[idx], thickness=4)
-                    self._visualizer.add_circle(coords=tuple(l[0]), radius=8, color=colors[idx], thickness=-1)
-                    self._visualizer.add_circle(coords=tuple(l[1]), radius=8, color=colors[idx], thickness=-1)
-        elif isinstance(packet.img_detections, SemanticSegmentation):
-            raise NotImplementedError('Semantic segmentation visualization is not implemented yet!')
-            if not self._visualizer:
-                return
-
-            # Generate colormap if not already generated
-            if self.segmentation_colormap is None:
-                n_classes = len(self.labels) if self.labels else 8
-                self.segmentation_colormap = generate_colors(n_classes)
-
-            mask = np.array(packet.img_detections.mask).astype(np.uint8)
-
-            if mask.ndim == 3:
-                mask = np.argmax(mask, axis=0)
-
-            try:
-                colorized_mask = np.array(self.segmentation_colormap)[mask]
-            except IndexError:
-                unique_classes = np.unique(mask)
-                max_class = np.max(unique_classes)
-                new_colors = generate_colors(max_class - len(self.segmentation_colormap) + 1)
-                self.segmentation_colormap.extend(new_colors)
-                colorized_mask = np.array(self.segmentation_colormap)[mask]
-
-            # bbox = None
-            # if self.normalizer.resize_mode == ResizeMode.LETTERBOX:
-            #     bbox = self.normalizer.get_letterbox_bbox(packet.frame, normalize=True)
-            #     input_h, input_w = self.normalizer.aspect_ratio
-            #     resize_bbox = bbox[0] * input_w, bbox[1] * input_h, bbox[2] * input_w, bbox[3] * input_h
-            #     resize_bbox = np.int0(resize_bbox)
-            # else:
-            #     resize_bbox = self.normalizer.normalize(frame=np.zeros(self._frame_shape, dtype=bool),
-            #                                             bbox=bbox or (0., 0., 1., 1.))
-
-            # x1, y1, x2, y2 = resize_bbox
-            # h, w = packet.frame.shape[:2]
-            # # Stretch mode
-            # if self.normalizer.resize_mode == ResizeMode.STRETCH:
-            #     colorized_mask = cv2.resize(colorized_mask, (w, h))
-            # elif self.normalizer.resize_mode == ResizeMode.LETTERBOX:
-            #     colorized_mask = cv2.resize(colorized_mask[y1:y2, x1:x2], (w, h))
-            # else:
-            #     padded_mask = np.zeros((h, w, 3), dtype=np.uint8)
-            #     resized_mask = cv2.resize(colorized_mask, (x2 - x1, y2 - y1))
-            #     padded_mask[y1:y2, x1:x2] = resized_mask
-            #     colorized_mask = padded_mask
-
-            # self._visualizer.add_mask(colorized_mask, alpha=0.5)
-
-    def package(self, msgs: Dict):
-        if self.queue.full():
-            self.queue.get()  # Get one, so queue isn't full
-
-        decode_fn = self.det_nn._decode_fn
-        packet = DetectionPacket(
-            self.get_packet_name(),
-            msgs[self.frames.name],
-            msgs[self.nn_results.name] if decode_fn is None else decode_fn(msgs[self.nn_results.name]),
-            self._visualizer
-        )
-
-        self.queue.put(packet, block=False)
-
-class XoutSpatialBbMappings(XoutSeqSync, XoutFrames):
+    def package(self, msgs: Dict) -> XoutNNOutputPacket:
+        nn_result = msgs[self.nn_results.name]
+        img = msgs[self.frames.name]
+        if type(nn_result) == dai.NNData:
+            decode_fn = self.det_nn._decode_fn
+
+            if decode_fn is None:
+                return NnOutputPacket(self.get_packet_name(), img, nn_result, self.bbox)
+
+            decoded_nn_result = decode_fn(nn_result)
+            if type(decoded_nn_result) == Detections:
+                packet = DetectionPacket(self.get_packet_name(), img, nn_result, self.bbox)
+                return self._add_detections_to_packet(packet, decoded_nn_result)
+            elif type(decoded_nn_result) == ImgLandmarks:
+                return ImgLandmarksPacket(self.get_packet_name(), img, nn_result, decoded_nn_result, self.bbox)
+            elif type(decoded_nn_result) == SemanticSegmentation:
+                return SemanticSegmentationPacket(self.get_packet_name(), img, nn_result, decoded_nn_result, self.bbox)
+            raise ValueError(f'NN result decoding failed! decode() returned type {type(nn_result)}')
+
+        elif type(nn_result) in [dai.ImgDetections, dai.SpatialImgDetections]:
+            packet = DetectionPacket(self.get_packet_name(), img, nn_result, self.bbox)
+            return self._add_detections_to_packet(packet, nn_result)
+        else:
+            raise ValueError(f'Unknown NN result type: {type(nn_result)}')
+
+    def _add_detections_to_packet(self,
+                                  packet: DetectionPacket,
+                                  dets: Union[dai.ImgDetections, dai.SpatialImgDetections, Detections]
+                                  ) -> DetectionPacket:
+        for detection in dets.detections:
+            packet.detections.append(Detection(
+                img_detection=detection if isinstance(detection, dai.ImgDetection) else None,
+                label_str=self.labels[detection.label][0] if self.labels else str(detection.label),
+                confidence=detection.confidence,
+                color=self.labels[detection.label][1] if self.labels else (255, 255, 255),
+                bbox=BoundingBox(detection),
+                angle=detection.angle if hasattr(detection, 'angle') else None,
+                ts=dets.getTimestamp()
+            ))
+        return packet
+
+
+class XoutSpatialBbMappings(XoutDisparityDepth, SequenceNumSync):
     def __init__(self,
                  device: dai.Device,
                  stereo: dai.node.StereoDepth,
-                 frames: StreamXout,
-                 configs: StreamXout):
+                 frames: StreamXout,  # passthroughDepth
+                 configs: StreamXout,  # out
+                 dispScaleFactor: float,
+                 bbox: BoundingBox):
         self._stereo = stereo
         self.frames = frames
         self.configs = configs
+        self.bbox = bbox
 
-        XoutFrames.__init__(self, frames)
-        XoutSeqSync.__init__(self, [frames, configs])
-
-        self.device = device
-        self.multiplier = 255 / 95.0
-        self.factor = None
-        self.name = 'Depth & Bounding Boxes'
+        XoutDisparityDepth.__init__(self, device, frames, dispScaleFactor, None)
+        SequenceNumSync.__init__(self, 2)
 
-    def xstreams(self) -> List[StreamXout]:
-        return [self.frames, self.configs]
+    def new_msg(self, name: str, msg):
+        # Ignore frames that we aren't listening for
+        if name not in self._streams: return
 
-    def visualize(self, packet: SpatialBbMappingPacket):
-        if not self.factor:
-            size = (packet.msg.getWidth(), packet.msg.getHeight())
-            self.factor = depth_to_disp_factor(self.device, self._stereo)
+        synced = self.sync(msg.getSequenceNum(), name, msg)
+        if synced:
+            return self.package(synced)
 
-        depth = np.array(packet.msg.getFrame())
-        with np.errstate(all='ignore'):
-            disp = (self.factor / depth).astype(np.uint8)
+    def on_callback(self, packet) -> None:
+        pass
 
-        print('disp max', np.max(disp), 'disp min', np.min(disp))
-        packet.frame = colorize_disparity(disp, multiplier=1)
-        draw_mappings(packet)
-
-        super().visualize(packet)
+    def xstreams(self) -> List[StreamXout]:
+        return [self.frames, self.configs]
 
-    def package(self, msgs: Dict):
-        if self.queue.full():
-            self.queue.get()  # Get one, so queue isn't full
-        packet = SpatialBbMappingPacket(
+    def package(self, msgs: Dict) -> SpatialBbMappingPacket:
+        return SpatialBbMappingPacket(
             self.get_packet_name(),
             msgs[self.frames.name],
             msgs[self.configs.name],
-            self._visualizer
+            disp_scale_factor=self.disp_scale_factor,
         )
-        self.queue.put(packet, block=False)
 
 
 class XoutTwoStage(XoutNnResults):
@@ -261,11 +166,10 @@ class XoutTwoStage(XoutNnResults):
     Two stage syncing based on sequence number. Each frame produces ImgDetections msg that contains X detections.
     Each detection (if not on blacklist) will crop the original frame and forward it to the second (stage) NN for
     inferencing.
-    """
-    """
+
     msgs = {
         '1': TwoStageSyncPacket(),
-        '2': TwoStageSyncPacket(), 
+        '2': TwoStageSyncPacket(),
     }
     """
 
@@ -276,10 +180,11 @@ def __init__(self,
                  det_out: StreamXout,
                  second_nn_out: StreamXout,
                  device: dai.Device,
-                 input_queue_name: str):
+                 input_queue_name: str,
+                 bbox: BoundingBox):
         self.second_nn_out = second_nn_out
         # Save StreamXout before initializing super()!
-        super().__init__(det_nn, frames, det_out)
+        super().__init__(det_nn, frames, det_out, bbox)
 
         self.msgs: Dict[str, Dict[str, Any]] = dict()
         self.det_nn = det_nn
@@ -301,7 +206,7 @@ def xstreams(self) -> List[StreamXout]:
 
     # No need for `def visualize()` as `XoutNnResults.visualize()` does what we want
 
-    def new_msg(self, name: str, msg: dai.Buffer) -> None:
+    def new_msg(self, name: str, msg: dai.Buffer):
         if name not in self._streams:
             return  # From Replay modules. TODO: better handling?
 
@@ -373,33 +278,22 @@ def new_msg(self, name: str, msg: dai.Buffer) -> None:
 
                     self.input_cfg_queue.send(cfg)
 
-            # print(f'Added detection seq {seq}')
         elif name in self.frames.name:
             self.msgs[seq][name] = msg
-            # print(f'Added frame seq {seq}')
         else:
             raise ValueError('Message from unknown stream name received by TwoStageSeqSync!')
 
         if self.synced(seq):
-            # print('Synced', seq)
             # Frames synced!
-            if self.queue.full():
-                self.queue.get()  # Get one, so queue isn't full
-
+            dets = self.msgs[seq][self.nn_results.name]
             packet = TwoStagePacket(
                 self.get_packet_name(),
                 self.msgs[seq][self.frames.name],
-                self.msgs[seq][self.nn_results.name],
+                dets,
                 self.msgs[seq][self.second_nn_out.name],
                 self.whitelist_labels,
-                self._visualizer
+                self.bbox
             )
-            self.queue.put(packet, block=False)
-
-            # Throws RuntimeError: dictionary changed size during iteration
-            # for s in self.msgs:
-            #     if int(s) <= int(seq):
-            #         del self.msgs[s]
 
             with self.lock:
                 new_msgs = {}
@@ -408,6 +302,8 @@ def new_msg(self, name: str, msg: dai.Buffer) -> None:
                         new_msgs[name] = msg
                 self.msgs = new_msgs
 
+            return self._add_detections_to_packet(packet, dets)
+
     def add_detections(self, seq: str, dets: dai.ImgDetections):
         # Used to match the scaled bounding boxes by the 2-stage NN script node
         self.msgs[seq][self.nn_results.name] = dets
diff --git a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_nn_encoded.py b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_nn_encoded.py
deleted file mode 100644
index 9db1202d0..000000000
--- a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_nn_encoded.py
+++ /dev/null
@@ -1,58 +0,0 @@
-from typing import Tuple, List
-
-import depthai as dai
-
-from depthai_sdk.classes.packets import FramePacket
-from depthai_sdk.oak_outputs.xout.xout_base import StreamXout
-from depthai_sdk.oak_outputs.xout.xout_h26x import XoutH26x
-from depthai_sdk.oak_outputs.xout.xout_mjpeg import XoutMjpeg
-from depthai_sdk.oak_outputs.xout.xout_nn import XoutNnResults
-
-
-class XoutNnH26x(XoutNnResults, XoutH26x):
-    name: str = "H26x NN Results"
-    # Streams
-    frames: StreamXout
-    nn_results: StreamXout
-
-    def __init__(self,
-                 det_nn: 'NNComponent',
-                 frames: StreamXout,
-                 nn_results: StreamXout,
-                 color: bool,
-                 profile: dai.VideoEncoderProperties.Profile,
-                 fps: float,
-                 frame_shape: Tuple[int, ...]):
-        self.nn_results = nn_results
-
-        XoutH26x.__init__(self, frames, color, profile, fps, frame_shape)
-        XoutNnResults.__init__(self, det_nn, frames, nn_results)
-
-    def xstreams(self) -> List[StreamXout]:
-        return [self.frames, self.nn_results]
-
-    def visualize(self, packet: FramePacket):
-        decoded_frame = XoutH26x.decode_frame(self, packet)
-        if decoded_frame is None:
-            return
-
-        packet.frame = decoded_frame
-        XoutNnResults.visualize(self, packet)
-
-
-class XoutNnMjpeg(XoutNnResults, XoutMjpeg):
-    def __init__(self,
-                 det_nn: 'NNComponent',
-                 frames: StreamXout,
-                 nn_results: StreamXout,
-                 color: bool,
-                 lossless: bool,
-                 fps: float,
-                 frame_shape: Tuple[int, ...]):
-        self.nn_results = nn_results
-        XoutMjpeg.__init__(self, frames, color, lossless, fps, frame_shape)
-        XoutNnResults.__init__(self, det_nn, frames, nn_results)
-
-    def visualize(self, packet: FramePacket):
-        packet.frame = XoutMjpeg.decode_frame(self, packet)
-        XoutNnResults.visualize(self, packet)
diff --git a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_pointcloud.py b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_pointcloud.py
index 26bb2a438..6dd73ee0e 100644
--- a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_pointcloud.py
+++ b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_pointcloud.py
@@ -1,14 +1,12 @@
-import logging
-import warnings
-from typing import List, Optional, Union
+from typing import List, Optional
 
 import depthai as dai
 import numpy as np
 
-from depthai_sdk.classes.packets import DepthPacket, PointcloudPacket
+from depthai_sdk.classes.packets import PointcloudPacket
+from depthai_sdk.components.pointcloud_helper import create_xyz
 from depthai_sdk.oak_outputs.xout.xout_base import StreamXout
 from depthai_sdk.oak_outputs.xout.xout_frames import XoutFrames
-from depthai_sdk.components.pointcloud_helper import create_xyz
 
 try:
     import cv2
@@ -20,27 +18,21 @@ class XoutPointcloud(XoutFrames):
     def __init__(self,
                  device: dai.Device,
                  depth_frames: StreamXout,
-                 fps: int,
                  color_frames: Optional[StreamXout] = None):
-
         self.color_frames = color_frames
-        XoutFrames.__init__(self, frames=depth_frames, fps=fps)
+        XoutFrames.__init__(self, frames=depth_frames)
         self.name = 'Pointcloud'
-        self.fps = fps
         self.device = device
         self.xyz = None
 
         self.msgs = dict()
 
-    def visualize(self, packet: DepthPacket):
-        pass
-
     def xstreams(self) -> List[StreamXout]:
         if self.color_frames is not None:
             return [self.frames, self.color_frames]
         return [self.frames]
 
-    def new_msg(self, name: str, msg: dai.Buffer) -> None:
+    def new_msg(self, name: str, msg: dai.Buffer):
         if name not in self._streams:
             return  # From Replay modules. TODO: better handling?
 
@@ -59,9 +51,6 @@ def new_msg(self, name: str, msg: dai.Buffer) -> None:
 
         if len(self.msgs[seq]) == len(self.xstreams()):
             # Frames synced!
-            if self.queue.full():
-                self.queue.get()  # Get one, so queue isn't full
-
             depth_frame: dai.ImgFrame = self.msgs[seq][self.frames.name]
 
             color_frame = None
@@ -71,21 +60,19 @@ def new_msg(self, name: str, msg: dai.Buffer) -> None:
             if self.xyz is None:
                 self.xyz = create_xyz(self.device, depth_frame.getWidth(), depth_frame.getHeight())
 
-            pcl = self.xyz * np.expand_dims(np.array(depth_frame.getFrame()), axis = -1)
+            pcl = self.xyz * np.expand_dims(np.array(depth_frame.getFrame()), axis=-1)
 
             # TODO: postprocessing
-
-            packet = PointcloudPacket(
-                self.get_packet_name(),
-                pcl,
-                depth_map=depth_frame,
-                color_frame=color_frame,
-                visualizer=self._visualizer
-            )
-            self.queue.put(packet, block=False)
-
+            # Cleanup
             new_msgs = {}
             for name, msg in self.msgs.items():
                 if int(name) > int(seq):
                     new_msgs[name] = msg
             self.msgs = new_msgs
+
+            return PointcloudPacket(
+                self.get_packet_name(),
+                pcl,
+                depth_map=depth_frame,
+                colorize_frame=color_frame
+            )
diff --git a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_seq_sync.py b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_seq_sync.py
index a9399aebf..31665af85 100644
--- a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_seq_sync.py
+++ b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_seq_sync.py
@@ -1,5 +1,5 @@
 from abc import abstractmethod
-from typing import List
+from typing import List, Union, Dict
 
 from depthai_sdk.oak_outputs.syncing import SequenceNumSync
 from depthai_sdk.oak_outputs.xout.xout_base import XoutBase, StreamXout
@@ -10,20 +10,22 @@ def xstreams(self) -> List[StreamXout]:
         return self.streams
 
     def __init__(self, streams: List[StreamXout]):
-        self.streams = streams
+        # Filter out None streams
+        self.streams = [s for s in streams if s is not None]
+
         # Save StreamXout before initializing super()!
         XoutBase.__init__(self)
-        SequenceNumSync.__init__(self, len(streams))
+        SequenceNumSync.__init__(self, len(self.streams))
         self.msgs = dict()
 
     @abstractmethod
-    def package(self, msgs: List):
+    def package(self, msgs: Union[List, Dict]):
         raise NotImplementedError('XoutSeqSync is an abstract class, you need to override package() method!')
 
-    def new_msg(self, name: str, msg) -> None:
+    def new_msg(self, name: str, msg):
         # Ignore frames that we aren't listening for
         if name not in self._streams: return
 
         synced = self.sync(msg.getSequenceNum(), name, msg)
         if synced:
-            self.package(synced)
+            return self.package(synced)
diff --git a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_tracker.py b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_tracker.py
index 08309cd5c..ecd989d5f 100644
--- a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_tracker.py
+++ b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_tracker.py
@@ -1,345 +1,210 @@
 import logging
 import math
-from collections import defaultdict
-from typing import Union, Dict, Optional
+from datetime import timedelta
+from typing import Dict, Optional, List, Union, Tuple
 
 import depthai as dai
 import numpy as np
-from depthai_sdk.classes import DetectionPacket, TrackerPacket
-from depthai_sdk.classes.packets import _TrackingDetection
+
+from depthai_sdk.classes import TrackerPacket
+from depthai_sdk.classes.packets import TrackingDetection
 from depthai_sdk.oak_outputs.xout.xout_base import StreamXout
 from depthai_sdk.oak_outputs.xout.xout_nn import XoutNnResults
 from depthai_sdk.tracking import KalmanFilter
 from depthai_sdk.visualize.bbox import BoundingBox
-from depthai_sdk.visualize.configs import TextPosition
-from depthai_sdk.visualize.visualizer import Visualizer
 
 
-class XoutTracker(XoutNnResults):
-    buffer_size: int = 10
+class TrackedObject:
+    def __init__(self, baseline: float, focal: float, apply_kalman: bool, calculate_speed: bool):
+        # Point
+        self.kalman_3d: Optional[KalmanFilter] = None
+        # BBox
+        self.kalman_2d: Optional[KalmanFilter] = None
+
+        self.previous_detections: List[TrackingDetection] = []
+        self.blacklist = False
+        self.lost_counter = 0
+
+        self.baseline = baseline
+        self.focal = focal
+        self.apply_kalman = apply_kalman
+        self.calculate_speed = calculate_speed
+
+    def new_tracklet(self, tracklet: dai.Tracklet, ts: timedelta, color: Tuple, label: str):
+        is_3d = self._is_3d(tracklet)
+        tracking_det = TrackingDetection(
+            img_detection=tracklet.srcImgDetection,
+            label_str=label,
+            confidence=tracklet.srcImgDetection.confidence,
+            color=color,
+            bbox=BoundingBox(tracklet.srcImgDetection),
+            angle=None,
+            tracklet=tracklet,
+            ts=ts,
+            filtered_2d=self._calc_kalman_2d(tracklet, ts) if self.apply_kalman else None,
+            filtered_3d=self._calc_kalman_3d(tracklet, ts) if self.apply_kalman and is_3d else None,
+            speed=None,
+        )
+        self.previous_detections.append(tracking_det)
+        # Calc speed should be called after adding new TrackingDetection to self.previous_detections
+        tracking_det.speed = self.calc_speed(ts) if (self.calculate_speed and is_3d) else None
+
+    def calc_speed(self, ts: timedelta) -> Union[float, np.ndarray]:
+        """
+        Should be called after adding new TrackingDetection to self.previous_detections
+        """
+
+        def get_coords(det) -> dai.Point3f:
+            return det.filtered_3d or det.tracklet.spatialCoordinates
+
+        def get_dist(p1: dai.Point3f, p2: dai.Point3f) -> float:
+            return np.sqrt((p2.x - p1.x) ** 2 + (p2.y - p1.y) ** 2 + (p2.z - p1.z) ** 2) / 1000
+
+        speeds = []
+        for i in range(len(self.previous_detections) - 1):
+            d1 = self.previous_detections[i]
+            # if d1 timestamp is older than 1 second, skip
+            if (ts - d1.ts).total_seconds() > 1:
+                continue
+            d2 = self.previous_detections[i + 1]
+            distance = get_dist(get_coords(d1), get_coords(d2))
+            time = (d2.ts - d1.ts).total_seconds()
+            speeds.append(distance / time)
+
+        if len(speeds) == 0:
+            return 0.0
+
+        window_size = 3
+        window = np.hanning(window_size)
+        window /= window.sum()
+
+        smoothed = np.convolve(speeds, window, mode='same')
+        return np.mean(smoothed)
+
+    def _is_3d(self, tracklet: dai.Tracklet) -> bool:
+        return (tracklet.spatialCoordinates.x != 0.0 or
+                tracklet.spatialCoordinates.y != 0.0 or
+                tracklet.spatialCoordinates.z != 0.0)
+
+    def _calc_kalman_3d(self, tracklet: dai.Tracklet, ts: timedelta) -> Union[None, dai.Point3f]:
+        x_space = tracklet.spatialCoordinates.x
+        y_space = tracklet.spatialCoordinates.y
+        z_space = tracklet.spatialCoordinates.z
+        meas_vec_space = np.array([[x_space], [y_space], [z_space]])
+        meas_std_space = z_space ** 2 / (self.baseline * self.focal)
+
+        if self.kalman_3d is None:
+            self.kalman_3d = KalmanFilter(10, 0.1, meas_vec_space, ts)
+            return None
+
+        dt = (ts - self.kalman_3d.time).total_seconds()
+        self.kalman_3d.predict(dt)
+        self.kalman_3d.update(meas_vec_space)
+        self.kalman_3d.time = ts
+        self.kalman_3d.meas_std = meas_std_space
+        vec_space = self.kalman_3d.x
+        return dai.Point3f(vec_space[0], vec_space[1], vec_space[2])
+
+    def _calc_kalman_2d(self, tracklet: dai.Tracklet, ts: timedelta) -> Union[None, BoundingBox]:
+        bb = BoundingBox(tracklet.srcImgDetection)
+        x_mid, y_mid = bb.get_centroid().to_tuple()
+
+        meas_vec_bbox = np.array([[x_mid], [y_mid], [bb.width], [bb.height]])
+
+        if self.kalman_2d is None:
+            self.kalman_2d = KalmanFilter(10, 0.1, meas_vec_bbox, ts)
+            return None
+
+        dt = (ts - self.kalman_2d.time).total_seconds()
+
+        self.kalman_2d.predict(dt)
+        self.kalman_2d.update(meas_vec_bbox)
+        self.kalman_2d.time = ts
+        vec_bbox = self.kalman_2d.x
+
+        return BoundingBox([
+            vec_bbox[0][0] - vec_bbox[2][0] / 2,
+            vec_bbox[1][0] - vec_bbox[3][0] / 2,
+            vec_bbox[0][0] + vec_bbox[2][0] / 2,
+            vec_bbox[1][0] + vec_bbox[3][0] / 2,
+        ])
+
 
+class XoutTracker(XoutNnResults):
     def __init__(self,
                  det_nn: 'NNComponent',
                  frames: StreamXout,
                  device: dai.Device,
                  tracklets: StreamXout,
+                 bbox: BoundingBox,
                  apply_kalman: bool = False,
                  forget_after_n_frames: Optional[int] = None,
-                 calculate_speed: bool = False):
-        super().__init__(det_nn, frames, tracklets)
+                 calculate_speed: bool = False,
+                 ):
+        """
+        apply_kalman: Whether to apply kalman filter to tracklets
+        forget_after_n_frames: If tracklet is lost for n frames, remove it from tracked_objects
+
+        """
+        super().__init__(det_nn, frames, tracklets, bbox)
         self.name = 'Object Tracker'
-        self.device = device
-
-        self.__read_device_calibration()
-
-        self.buffer = []
-        self.spatial_buffer = []
+        self.__read_device_calibration(device)
 
-        self.lost_counter = {}
-        self.blacklist = set()
+        self.tracked_objects: Dict[int, TrackedObject] = {}
 
         self.apply_kalman = apply_kalman
         self.forget_after_n_frames = forget_after_n_frames
-        self.kalman_filters: Dict[int, Dict[str, KalmanFilter]] = {}
         self.calculate_speed = calculate_speed
 
-    def setup_visualize(self,
-                        visualizer: Visualizer,
-                        visualizer_enabled: bool,
-                        name: str = None):
-        super().setup_visualize(visualizer, visualizer_enabled, name)
-
-    def on_callback(self, packet: Union[DetectionPacket, TrackerPacket]):
-        if len(packet.frame.shape) == 2:
-            packet.frame = np.dstack((packet.frame, packet.frame, packet.frame))
-
-        frame_shape = self.det_nn._input.stream_size[::-1]
-
-        if self._frame_shape is None:
-            # Lazy-load the frame shape
-            self._frame_shape = np.array([*frame_shape])
-            if self._visualizer:
-                self._visualizer.frame_shape = self._frame_shape
-
-        spatial_points = self._get_spatial_points(packet)
-        threshold = self.forget_after_n_frames
-
-        if threshold:
-            self._update_lost_counter(packet, threshold)
-
-        self._update_buffers(packet, spatial_points)
-
-        # Optional kalman filter
-        if self.apply_kalman:
-            self._kalman_filter(packet, spatial_points)
+    def package(self, msgs: Dict) -> TrackerPacket:
+        tracklets: dai.Tracklets = msgs[self.nn_results.name]
 
-        # Estimate speed
-        tracklet2speed = self._calculate_speed(spatial_points)
+        for tracklet in tracklets.tracklets:
+            # If there is no id in self.tracked_objects, create new TrackedObject. This could happen if
+            # TrackingStatus.NEW, or we removed it (too many lost frames)
+            if tracklet.id not in self.tracked_objects:
+                self.tracked_objects[tracklet.id] = TrackedObject(self.baseline, self.focal, self.apply_kalman,
+                                                                  self.calculate_speed)
 
-        if self._visualizer:
-            self._add_tracklet_visualization(packet, spatial_points, tracklet2speed)
-
-        self._add_detections(packet, tracklet2speed)
+            if tracklet.status == dai.Tracklet.TrackingStatus.NEW:
+                pass
+            elif tracklet.status == dai.Tracklet.TrackingStatus.TRACKED:
+                self.tracked_objects[tracklet.id].lost_counter = 0
+            elif tracklet.status == dai.Tracklet.TrackingStatus.LOST:
+                self.tracked_objects[tracklet.id].lost_counter += 1
 
-    def visualize(self, packet):
-        super().visualize(packet)
+            img_d = tracklet.srcImgDetection
+            # When adding new tracklet, TrackletObject class will also perform filtering
+            # and speed estimation
+            self.tracked_objects[tracklet.id] \
+                .new_tracklet(tracklet,
+                              tracklets.getTimestamp(),
+                              self.labels[img_d.label][1] if self.labels else (255, 255, 255),
+                              self.labels[img_d.label][0] if self.labels else str(img_d.label)
+                              )
+            if tracklet.status == dai.Tracklet.TrackingStatus.REMOVED or \
+                    (self.forget_after_n_frames is not None and \
+                     self.forget_after_n_frames <= self.tracked_objects[tracklet.id].lost_counter):
+                # Remove TrackedObject
+                self.tracked_objects.pop(tracklet.id)
 
-    def package(self, msgs: Dict):
-        if self.queue.full():
-            self.queue.get()  # Get one, so queue isn't full
         packet = TrackerPacket(
             self.get_packet_name(),
             msgs[self.frames.name],
-            msgs[self.nn_results.name],
-            self._visualizer
-        )
-        self.queue.put(packet, block=False)
-
-    def _add_tracklet_visualization(self, packet, spatial_points, tracklet2speed):
-        h, w = self._frame_shape[:2]
-        filtered_tracklets = [tracklet for tracklet in packet.daiTracklets.tracklets if
-                              tracklet.id not in self.blacklist]
-
-        norm_bbox = BoundingBox().resize_to_aspect_ratio(packet.frame.shape, self._nn_size, self._resize_mode)
-
-        self._visualizer.add_detections(detections=filtered_tracklets,
-                                        normalizer=norm_bbox,
-                                        label_map=self.labels,
-                                        spatial_points=spatial_points)
-
-        # Add tracking ids
-        for tracklet in filtered_tracklets:
-            det = tracklet.srcImgDetection
-            bbox = (w * det.xmin, h * det.ymin, w * det.xmax, h * det.ymax)
-            bbox = tuple(map(int, bbox))
-            self._visualizer.add_text(
-                f'ID: {tracklet.id}',
-                bbox=bbox,
-                position=TextPosition.MID
-            )
-
-            if self._visualizer.config.tracking.show_speed and tracklet.id in tracklet2speed:
-                speed = tracklet2speed[tracklet.id]
-                speed = f'{speed:.1f} m/s\n{speed * 3.6:.1f} km/h'
-                bbox = tracklet.srcImgDetection
-                bbox = (int(w * bbox.xmin), int(h * bbox.ymin), int(w * bbox.xmax), int(h * bbox.ymax))
-
-                self._visualizer.add_text(
-                    speed,
-                    bbox=bbox,
-                    position=TextPosition.TOP_RIGHT,
-                    outline=True
-                )
-
-        # Add tracking lines
-        self._visualizer.add_trail(
-            tracklets=[t for p in self.buffer for t in p.daiTracklets.tracklets if t.id not in self.blacklist],
-            label_map=self.labels,
-            bbox=norm_bbox,
+            tracklets,
+            bbox=self.bbox,
         )
 
-    def _update_lost_counter(self, packet, lost_threshold: int):
-        for i, tracklet in enumerate(packet.daiTracklets.tracklets):
-            if tracklet.status == dai.Tracklet.TrackingStatus.NEW:
-                self.__remove_from_blacklist(tracklet)
-                self.lost_counter[tracklet.id] = 0
-            elif tracklet.status == dai.Tracklet.TrackingStatus.TRACKED:
-                self.__remove_from_blacklist(tracklet)
-                self.lost_counter[tracklet.id] = 0
-            elif tracklet.status == dai.Tracklet.TrackingStatus.LOST and tracklet.id in self.lost_counter:
-                self.lost_counter[tracklet.id] += 1
-
-            if tracklet.id in self.lost_counter and self.lost_counter[tracklet.id] >= lost_threshold:
-                self.__add_to_blacklist(tracklet)
-                self.lost_counter.pop(tracklet.id)
-
-    def _update_buffers(self, packet, spatial_points=None):
-        # Update buffer
-        self.buffer.append(packet)
-        if self.buffer_size < len(self.buffer):
-            self.buffer.pop(0)
-
-        # Update spatial buffer
-        if spatial_points is not None:
-            self.spatial_buffer.append(spatial_points)
-            if self.buffer_size < 5:
-                self.spatial_buffer.pop(0)
-
-    def _kalman_filter(self, packet, spatial_points=None):
-        current_time = packet.daiTracklets.getTimestamp()
-        is_3d = spatial_points is not None
-
-        tracklets = []
-
-        for i, tracklet in enumerate(packet.daiTracklets.tracklets):
-            if tracklet.id in self.blacklist:  # Skip blacklisted tracklets
-                continue
-
-            meas_vec_space = 0
-            meas_std_space = 0
-
-            roi = tracklet.roi
-            x1 = roi.topLeft().x
-            y1 = roi.topLeft().y
-            x2 = roi.bottomRight().x
-            y2 = roi.bottomRight().y
+        for obj_id, tracked_obj in self.tracked_objects.items():
+            if obj_id not in packet.tracklets:
+                packet.tracklets[obj_id] = []
+            for tracking_det in tracked_obj.previous_detections:
+                packet.tracklets[obj_id].append(tracking_det)
 
-            if is_3d:
-                x_space = tracklet.spatialCoordinates.x
-                y_space = tracklet.spatialCoordinates.y
-                z_space = tracklet.spatialCoordinates.z
-                meas_vec_space = np.array([[x_space], [y_space], [z_space]])
-                meas_std_space = z_space ** 2 / (self.baseline * self.focal)
+        return packet
 
-            meas_vec_bbox = np.array([[(x1 + x2) / 2], [(y1 + y2) / 2], [x2 - x1], [y2 - y1]])
-
-            if tracklet.status == dai.Tracklet.TrackingStatus.NEW:
-                self.kalman_filters[tracklet.id] = {'bbox': KalmanFilter(10, 0.1, meas_vec_bbox, current_time)}
-                if is_3d:
-                    self.kalman_filters[tracklet.id]['space'] = KalmanFilter(10, 0.1, meas_vec_space, current_time)
-
-            elif tracklet.status == dai.Tracklet.TrackingStatus.TRACKED or tracklet.status == dai.Tracklet.TrackingStatus.LOST:
-                if tracklet.id not in self.kalman_filters:
-                    continue
-
-                dt = current_time - self.kalman_filters[tracklet.id]['bbox'].time
-                dt = dt.total_seconds()
-
-                self.kalman_filters[tracklet.id]['bbox'].predict(dt)
-                self.kalman_filters[tracklet.id]['bbox'].update(meas_vec_bbox)
-                self.kalman_filters[tracklet.id]['bbox'].time = current_time
-                vec_bbox = self.kalman_filters[tracklet.id]['bbox'].x
-
-                if is_3d:
-                    self.kalman_filters[tracklet.id]['space'].predict(dt)
-                    self.kalman_filters[tracklet.id]['space'].update(meas_vec_space)
-                    self.kalman_filters[tracklet.id]['space'].time = current_time
-                    self.kalman_filters[tracklet.id]['space'].meas_std = meas_std_space
-                    vec_space = self.kalman_filters[tracklet.id]['space'].x
-
-                x1_filter = vec_bbox[0] - vec_bbox[2] / 2
-                x2_filter = vec_bbox[0] + vec_bbox[2] / 2
-                y1_filter = vec_bbox[1] - vec_bbox[3] / 2
-                y2_filter = vec_bbox[1] + vec_bbox[3] / 2
-
-                rect = dai.Rect(x1_filter, y1_filter, x2_filter - x1_filter, y2_filter - y1_filter)
-                new_tracklet = self.__create_tracklet(tracklet, rect, vec_space if is_3d else None)
-                tracklets.append(new_tracklet)
-
-            elif tracklet.status == dai.Tracklet.TrackingStatus.REMOVED:
-                self.kalman_filters.pop(tracklet.id, None)
-
-            if tracklets:
-                packet.daiTracklets.tracklets = tracklets
-
-    def _add_detections(self, packet, tracklet2speed):
-        for tracklet in packet.daiTracklets.tracklets:
-            if tracklet.id in self.blacklist:  # Skip blacklisted tracklets
-                continue
-
-            d = _TrackingDetection()
-            img_d = tracklet.srcImgDetection
-            d.tracklet = tracklet
-            d.label = self.labels[img_d.label][0] if self.labels else str(img_d.label)
-            d.color = self.labels[img_d.label][1] if self.labels else (255, 255, 255)
-            roi = tracklet.roi.denormalize(self._frame_shape[1], self._frame_shape[0])
-            d.top_left = (int(roi.x), int(roi.y))
-            d.bottom_right = (int(roi.x + roi.width), int(roi.y + roi.height))
-
-            if tracklet.id in tracklet2speed:
-                d.speed = tracklet2speed[tracklet.id]
-                d.speed_kmph = d.speed * 3.6
-                d.speed_mph = d.speed * 2.23694
-
-            packet.detections.append(d)
-
-    def _calculate_speed(self, spatial_points) -> dict:
-        if spatial_points is None or self.calculate_speed is False:
-            return {}
-
-        tracklet2speed = {}
-        if spatial_points is not None:
-            spatial_coords = defaultdict(list)
-            t = defaultdict(list)
-            tracklets = defaultdict(list)
-            for buffered_packet in self.buffer:
-                for tracklet in buffered_packet.daiTracklets.tracklets:
-                    spatial_coords[tracklet.id].append(tracklet.spatialCoordinates)
-                    t[tracklet.id].append(buffered_packet.daiTracklets.getTimestamp())
-                    tracklets[tracklet.id].append(tracklet)
-
-            indices = spatial_coords.keys()
-            for idx in indices:
-                # Skip if there is only one point
-                if len(spatial_coords[idx]) < 2:
-                    continue
-
-                n = len(spatial_coords[idx])
-                speeds = []
-
-                for i in range(n - 1):
-                    x1, y1, z1 = spatial_coords[idx][i].x, spatial_coords[idx][i].y, spatial_coords[idx][i].z
-                    x2, y2, z2 = spatial_coords[idx][i + 1].x, spatial_coords[idx][i + 1].y, spatial_coords[idx][
-                        i + 1].z
-                    distance = np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2 + (z2 - z1) ** 2) / 1000
-                    time = (t[idx][i + 1] - t[idx][i]).total_seconds()
-                    speeds.append(distance / time)
-
-                window_size = 3
-                window = np.hanning(window_size)
-                window /= window.sum()
-
-                smoothed = np.convolve(speeds, window, mode='same')
-                speed = np.mean(smoothed)
-
-                tracklet2speed[idx] = speed
-
-        return tracklet2speed
-
-    @staticmethod
-    def _get_spatial_points(packet) -> list:
-        try:
-            if packet._is_spatial_detection():
-                spatial_points = [packet._get_spatials(det.srcImgDetection)
-                                  for det in
-                                  packet.daiTracklets.tracklets]
-            else:
-                spatial_points = None
-        except IndexError:
-            spatial_points = None
-
-        return spatial_points
-
-    def __get_img_detection(self, tracklet, confidence: float = 1.0):
-        """Converts tracklet to ImgDetection."""
-        img_d = dai.ImgDetection()
-        img_d.label = tracklet.label
-        img_d.confidence = confidence
-        img_d.xmin = tracklet.roi.x
-        img_d.ymin = tracklet.roi.y
-        img_d.xmax = tracklet.roi.x + tracklet.roi.width
-        img_d.ymax = tracklet.roi.y + tracklet.roi.height
-        return img_d
-
-    def __create_tracklet(self, tracklet, roi=None, spatial_points=None):
-        """Creates a Tracklet object."""
-        tracklet_obj = dai.Tracklet()
-        tracklet_obj.id = tracklet.id
-        tracklet_obj.age = tracklet.age
-        tracklet_obj.label = tracklet.label
-        tracklet_obj.status = tracklet.status
-        tracklet_obj.roi = roi
-        if spatial_points is not None:
-            tracklet_obj.spatialCoordinates = dai.Point3f(spatial_points[0], spatial_points[1], spatial_points[2])
-        else:
-            tracklet_obj.spatialCoordinates = tracklet.spatialCoordinates
-
-        img_d = self.__get_img_detection(tracklet, confidence=tracklet.srcImgDetection.confidence)
-        tracklet_obj.srcImgDetection = img_d
-        return tracklet_obj
-
-    def __read_device_calibration(self):
-        calib = self.device.readCalibration()
+    def __read_device_calibration(self, device: dai.Device):
+        calib = device.readCalibration()
         eeprom = calib.getEepromData()
         left_cam = calib.getStereoLeftCameraId()
         if left_cam != dai.CameraBoardSocket.AUTO and left_cam in eeprom.cameraData.keys():
@@ -351,11 +216,3 @@ def __read_device_calibration(self):
             logging.warning("Calibration data missing, using OAK-D defaults")
             self.baseline = 75
             self.focal = 440
-
-    def __add_to_blacklist(self, tracklet):
-        if tracklet.id not in self.blacklist:
-            self.blacklist.add(tracklet.id)
-
-    def __remove_from_blacklist(self, tracklet):
-        if tracklet.id in self.blacklist:
-            self.blacklist.remove(tracklet.id)
diff --git a/depthai_sdk/src/depthai_sdk/oak_outputs/xout_base.py b/depthai_sdk/src/depthai_sdk/oak_outputs/xout_base.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/depthai_sdk/src/depthai_sdk/previews.py b/depthai_sdk/src/depthai_sdk/previews.py
index b694d8807..24e3a4798 100644
--- a/depthai_sdk/src/depthai_sdk/previews.py
+++ b/depthai_sdk/src/depthai_sdk/previews.py
@@ -11,6 +11,7 @@
 
 try:
     from turbojpeg import TurboJPEG, TJFLAG_FASTUPSAMPLE, TJFLAG_FASTDCT, TJPF_GRAY
+
     turbo = TurboJPEG()
 except:
     turbo = None
diff --git a/depthai_sdk/src/depthai_sdk/readers/abstract_reader.py b/depthai_sdk/src/depthai_sdk/readers/abstract_reader.py
index ee583c456..65798a159 100644
--- a/depthai_sdk/src/depthai_sdk/readers/abstract_reader.py
+++ b/depthai_sdk/src/depthai_sdk/readers/abstract_reader.py
@@ -2,8 +2,10 @@
 from abc import ABC, abstractmethod
 from pathlib import Path
 from typing import List, Tuple, Dict
+
 import numpy as np
 
+
 class AbstractReader(ABC):
     @abstractmethod
     def read(self) -> Dict[str, np.ndarray]:
diff --git a/depthai_sdk/src/depthai_sdk/readers/db3_reader.py b/depthai_sdk/src/depthai_sdk/readers/db3_reader.py
index 32c7d0b06..8a8037803 100644
--- a/depthai_sdk/src/depthai_sdk/readers/db3_reader.py
+++ b/depthai_sdk/src/depthai_sdk/readers/db3_reader.py
@@ -1,5 +1,5 @@
 from pathlib import Path
-from typing import Any, Generator, List, Dict, Tuple
+from typing import Generator, List, Dict, Tuple, Optional
 
 import cv2
 import numpy as np
@@ -34,8 +34,7 @@ def __init__(self, folder: Path) -> None:
                 if stream.lower() in con.topic.lower():
                     self.generators[stream.lower()] = self.reader.messages([con])
 
-
-    def read(self) -> Dict[str, np.ndarray]:
+    def read(self) -> Optional[Dict[str, np.ndarray]]:
         ros_msgs: Dict[str, np.ndarray] = dict()
 
         try:
@@ -76,7 +75,7 @@ def getStreams(self) -> List[str]:
 
     def getShape(self, name: str) -> Tuple[int, int]:
         frame = self.frames[name]
-        return (frame.shape[1], frame.shape[0])
+        return frame.shape[1], frame.shape[0]
 
     def get_message_size(self, name: str) -> int:
         size = 1
diff --git a/depthai_sdk/src/depthai_sdk/readers/image_reader.py b/depthai_sdk/src/depthai_sdk/readers/image_reader.py
index 345008965..5a278548f 100644
--- a/depthai_sdk/src/depthai_sdk/readers/image_reader.py
+++ b/depthai_sdk/src/depthai_sdk/readers/image_reader.py
@@ -59,9 +59,9 @@ def __init__(self, path: Path) -> None:
             self.cntr[name] = 0
 
         self.last_cycle_time = time.time()
-        self.cycle_sec = 3.0 # Images get cycled every 3 seconds by default
+        self.cycle_sec = 3.0  # Images get cycled every 3 seconds by default
 
-    def set_cycle_fps(self, fps): # Called from replay.py on set_fps()
+    def set_cycle_fps(self, fps):  # Called from replay.py on set_fps()
         self.cycle_sec = 1.0 / fps
 
     def read(self) -> Dict[str, np.ndarray]:
diff --git a/depthai_sdk/src/depthai_sdk/readers/rosbag_reader.py b/depthai_sdk/src/depthai_sdk/readers/rosbag_reader.py
index eb9cfd619..96ed81348 100644
--- a/depthai_sdk/src/depthai_sdk/readers/rosbag_reader.py
+++ b/depthai_sdk/src/depthai_sdk/readers/rosbag_reader.py
@@ -33,12 +33,12 @@ def getStreams(self) -> List[str]:
     def getShape(self, name: str) -> Tuple[int, int]:
         connection, _, rawdata = next(self.reader.messages('/device_0/sensor_0/Depth_0/image/data'))
         msg = deserialize_cdr(ros1_to_cdr(rawdata, connection.msgtype), connection.msgtype)
-        return (msg.width, msg.height)
+        return msg.width, msg.height
 
     def get_message_size(self, name: str) -> int:
         connection, _, rawdata = next(self.reader.messages('/device_0/sensor_0/Depth_0/image/data'))
         msg = deserialize_cdr(ros1_to_cdr(rawdata, connection.msgtype), connection.msgtype)
-        return len(msg.data) # TODO: test
+        return len(msg.data)  # TODO: test
 
     def close(self):
         self.reader.close()
diff --git a/depthai_sdk/src/depthai_sdk/readers/videocap_reader.py b/depthai_sdk/src/depthai_sdk/readers/videocap_reader.py
index 725729ce5..bbe4984f4 100644
--- a/depthai_sdk/src/depthai_sdk/readers/videocap_reader.py
+++ b/depthai_sdk/src/depthai_sdk/readers/videocap_reader.py
@@ -1,6 +1,7 @@
 import os
 from pathlib import Path
-from typing import List, Tuple, Dict, Any
+from typing import List, Tuple, Dict, Any, Optional
+
 import depthai as dai
 
 try:
@@ -21,6 +22,7 @@ class VideoCapReader(AbstractReader):
 
     def __init__(self, path: Path, loop: bool = False) -> None:
         self.videos: Dict[str, Any] = {}
+        self._closed = False
 
         # self.initialFrames: Dict[str, Any] = dict()
         # self.shapes: Dict[str, Tuple[int, int]] = dict()
@@ -40,14 +42,16 @@ def __init__(self, path: Path, loop: bool = False) -> None:
                     continue
 
                 # Check if name of the file starts with left.. right.., or CameraBoardSocket
-                if f_name.startswith('CameraBoardSocket.'):
-                    f_name = f_name.split('CameraBoardSocket.')[1]
+                if f_name.startswith('CAM_'):
+                    # Remove everything after CAM_x
+                    f_name = f_name[:5]
 
+                socket = None
                 try:
                     socket = parse_camera_socket(f_name)
                 except ValueError:
                     # Invalid file name
-                    continue
+                    pass
 
                 # TODO: avoid changing stream names, just use socket
                 # stream = str(socket)
@@ -72,6 +76,8 @@ def __init__(self, path: Path, loop: bool = False) -> None:
             video['initialFrame'] = f
 
     def read(self):
+        if self._closed:
+            return False
         frames = dict()
         for name, video in self.videos.items():
             if video['initialFrame'] is not None:
@@ -101,11 +107,13 @@ def getStreams(self) -> List[str]:
     def getShape(self, name: str) -> Tuple[int, int]:
         shape = self.videos[name.lower()]['shape']
         return shape
-    def get_socket(self, name: str):
+
+    def get_socket(self, name: str) -> Optional[dai.CameraBoardSocket]:
         return self.videos[name.lower()]['socket']
 
     def close(self):
         [r['reader'].release() for _, r in self.videos.items()]
+        self._closed = True
 
     def disableStream(self, name: str):
         if name.lower() in self.videos:
diff --git a/depthai_sdk/src/depthai_sdk/record.py b/depthai_sdk/src/depthai_sdk/record.py
index c9dd3c917..97869447c 100644
--- a/depthai_sdk/src/depthai_sdk/record.py
+++ b/depthai_sdk/src/depthai_sdk/record.py
@@ -4,13 +4,12 @@
 from pathlib import Path
 from queue import Queue
 from threading import Thread
-from typing import Dict, List
+from typing import List
 
 import depthai as dai
 
-from depthai_sdk.classes.packets import FramePacket
+from depthai_sdk.classes.packets import FramePacket, IMUPacket
 from depthai_sdk.oak_outputs.xout.xout_frames import XoutFrames
-from depthai_sdk.oak_outputs.xout.xout_seq_sync import XoutSeqSync
 from depthai_sdk.recorders.abstract_recorder import Recorder
 
 
@@ -36,12 +35,13 @@ def _run(recorder: Recorder, frame_queue: Queue):
 
 class RecordType(IntEnum):
     VIDEO = 1  # Save to video file
-    ROSBAG = 2  # To ROS .bag
-    MCAP = 3  # To .mcap
-    DB3 = 4 # To .db3 (ros2)
+    VIDEO_LOSSLESS = 2  # Save to lossless video file (.avi)
+    ROSBAG = 3  # To ROS .bag
+    MCAP = 4  # To .mcap
+    DB3 = 5  # To .db3 (ros2)
 
 
-class Record(XoutSeqSync):
+class Record:
     """
     This class records depthai streams from OAK cameras into different formats.
     It will also save calibration .json, so depth reconstruction will be possible.
@@ -53,7 +53,6 @@ def __init__(self, path: Path, record_type: RecordType):
             path (Path): Path to the recording folder
             record_type (RecordType): Recording type
         """
-        super().__init__([])  # We don't yet have streams, we will set it up later
         self.folder = path
         self.record_type = record_type
         self.frame_q = None
@@ -70,6 +69,9 @@ def __init__(self, path: Path, record_type: RecordType):
         elif self.record_type == RecordType.VIDEO:
             from .recorders.video_recorder import VideoRecorder
             self.recorder = VideoRecorder()
+        elif self.record_type == RecordType.VIDEO_LOSSLESS:
+            from .recorders.video_recorder import VideoRecorder
+            self.recorder = VideoRecorder(lossless=True)
         elif self.record_type == RecordType.ROSBAG:
             from .recorders.rosbag_recorder import Rosbag1Recorder
             self.recorder = Rosbag1Recorder()
@@ -79,39 +81,23 @@ def __init__(self, path: Path, record_type: RecordType):
         else:
             raise ValueError(f"Recording type '{self.record_type}' isn't supported!")
 
-    def package(self, msgs: Dict):
-        # Here we get sequence-num synced messages:)
-        mapped = dict()
-        for name, msg in msgs.items():
-            if name in self.name_mapping:  # Map to friendly name
-                mapped[self.name_mapping[name]] = msg
-            else:
-                mapped[name] = msg
-
-        self.frame_q.put(mapped)
-
-    def visualize(self, packet: FramePacket) -> None:
-        pass  # No need.
+    def write(self, packets):
+        if not isinstance(packets, dict):
+            packets = {packets.name: packets}
 
-    def no_sync(self, name: str, msg):
-        # name = self.name_mapping[name] if name in self.name_mapping else name
-        obj = {name: msg}
-        self.frame_q.put(obj)
+        msgs = dict()
+        for name, packet in packets.items():
+            if isinstance(packet, FramePacket):
+                msgs[name] = packet.msg
+            elif isinstance(packet, IMUPacket):
+                msgs[name] = packet.packet
+        self.frame_q.put(msgs)
 
     def start(self, device: dai.Device, xouts: List[XoutFrames]):
         """
         Start recording process. This will create and start the pipeline,
         start recording threads, and initialize all queues.
         """
-        if self.record_type == RecordType.VIDEO:
-            self._streams = [out.frames.name for out in xouts]  # required by XoutSeqSync
-            self.stream_num = len(xouts)
-            self.name_mapping = dict()
-            for xout in xouts:
-                self.name_mapping[xout.frames.name] = xout.name
-        else:  # For MCAP/Rosbags we don't need msg syncing
-            self.new_msg = self.no_sync
-
         self.mxid = device.getMxId()
         self.path = self._create_folder(self.folder, self.mxid)
         calib_data = device.readCalibration()
@@ -130,9 +116,6 @@ def config_mcap(self, pointcloud: bool):
             return
         self.recorder.set_pointcloud(pointcloud)
 
-    # def config_video(self, ):
-    # Nothing to configure for video recorder
-
     # TODO: implement config of BAG to either record depth as frame or pointcloud
     # def config_bag(self, pointcloud: bool):
     #     if self.type != RecordType.BAG:
diff --git a/depthai_sdk/src/depthai_sdk/recorders/abstract_recorder.py b/depthai_sdk/src/depthai_sdk/recorders/abstract_recorder.py
index c0adad881..6b593300e 100644
--- a/depthai_sdk/src/depthai_sdk/recorders/abstract_recorder.py
+++ b/depthai_sdk/src/depthai_sdk/recorders/abstract_recorder.py
@@ -1,9 +1,11 @@
 from abc import ABC, abstractmethod
+from enum import IntEnum
 from pathlib import Path
 from typing import List
+
 import depthai as dai
+
 import depthai_sdk.oak_outputs.xout as outputs
-from enum import IntEnum
 
 
 class Recorder(ABC):
@@ -30,16 +32,7 @@ class StreamType(IntEnum):
         IMU = 5
 
     def __init__(self, xout: outputs.xout_base.XoutBase):
-        if isinstance(xout, outputs.xout_mjpeg.XoutMjpeg):
-            self.type = self.StreamType.MJPEG
-            self.xlink_name = xout.frames.name
-        elif isinstance(xout, outputs.xout_h26x.XoutH26x):
-            self.xlink_name = xout.frames.name
-            if xout.profile == dai.VideoEncoderProperties.Profile.H265_MAIN:
-                self.type = self.StreamType.H265
-            else:
-                self.type = self.StreamType.H264
-        elif isinstance(xout, outputs.xout_depth.XoutDepth):
+        if isinstance(xout, outputs.xout_depth.XoutDisparityDepth):
             self.xlink_name = xout.frames.name
             self.type = self.StreamType.DEPTH  # TODO is depth raw or should it be DEPTH?
         elif isinstance(xout, outputs.xout_disparity.XoutDisparity):
@@ -47,8 +40,16 @@ def __init__(self, xout: outputs.xout_base.XoutBase):
             self.type = self.StreamType.RAW
         elif isinstance(xout, outputs.xout_frames.XoutFrames):
             self.xlink_name = xout.frames.name
-            self.type = self.StreamType.RAW
-        elif isinstance(xout, outputs.XoutIMU):
+            if xout._fourcc is None:
+                self.type = self.StreamType.RAW
+            elif xout._fourcc == 'hevc':
+                self.type = self.StreamType.H265
+            elif xout._fourcc == 'h264':
+                self.type = self.StreamType.H264
+            elif xout._fourcc == 'mjpeg':
+                self.type = self.StreamType.MJPEG
+
+        elif isinstance(xout, outputs.xout_imu.XoutIMU):
             self.xlink_name = xout.imu_out.name
             self.type = self.StreamType.IMU
         else:
diff --git a/depthai_sdk/src/depthai_sdk/recorders/rosbag_recorder.py b/depthai_sdk/src/depthai_sdk/recorders/rosbag_recorder.py
index a5574ab0e..72d868380 100755
--- a/depthai_sdk/src/depthai_sdk/recorders/rosbag_recorder.py
+++ b/depthai_sdk/src/depthai_sdk/recorders/rosbag_recorder.py
@@ -236,6 +236,7 @@ def __init__(self):
         self._closed = False
 
         self.imu_interpolation = ImuInterpolation()
+
     def _update(self, device: dai.Device, xouts: List['XoutFrames']):
         """
         Args:
@@ -357,20 +358,19 @@ def write(self, name: str, dai_msg: dai.Buffer):
         elif stream.ros_type == PointCloud2:
             raise Exception('PointCloud2 not yet implemented')
         elif stream.ros_type == Imu:
-            dai_msg: dai.IMUData
-            for packet in dai_msg.packets:
-                report = packet.acceleroMeter or packet.gyroscope or packet.magneticField or packet.rotationVector
-                msg = Imu(
-                    header=self.get_header(report.getTimestampDevice(), report.sequence),
-                    orientation=Quaternion(x=0.0, y=0.0, z=0.0, w=1.0),
-                    orientation_covariance=np.array([-1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]),
-                    angular_velocity=Vector3(0.0, 0.0, 0.0),
-                    angular_velocity_covariance=np.array([]),
-                    linear_acceleration=Vector3(0.0, 0.0, 0.0),
-                    linear_acceleration_covariance=np.array([])
-                )
-                self.imu_interpolation.Imu(msg, packet)
-                self.write_to_rosbag(name, stream.ros_type.__msgtype__, msg)
+            packet: dai.IMUPacket = dai_msg
+            report = packet.acceleroMeter or packet.gyroscope or packet.magneticField or packet.rotationVector
+            msg = Imu(
+                header=self.get_header(report.getTimestampDevice(), report.sequence),
+                orientation=Quaternion(x=0.0, y=0.0, z=0.0, w=1.0),
+                orientation_covariance=np.array([-1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]),
+                angular_velocity=Vector3(0.0, 0.0, 0.0),
+                angular_velocity_covariance=np.array([]),
+                linear_acceleration=Vector3(0.0, 0.0, 0.0),
+                linear_acceleration_covariance=np.array([])
+            )
+            self.imu_interpolation.Imu(msg, packet)
+            self.write_to_rosbag(name, stream.ros_type.__msgtype__, msg)
         elif stream.ros_type == Image:
             # msg = self.bridge.Image(dai_msg)
             dai_msg: dai.ImgFrame
diff --git a/depthai_sdk/src/depthai_sdk/recorders/video_recorder.py b/depthai_sdk/src/depthai_sdk/recorders/video_recorder.py
index 0582736bd..1e183cbd7 100644
--- a/depthai_sdk/src/depthai_sdk/recorders/video_recorder.py
+++ b/depthai_sdk/src/depthai_sdk/recorders/video_recorder.py
@@ -8,15 +8,15 @@
 
 class VideoRecorder(Recorder):
     """
-    Writes encoded streams raw (.mjpeg/.h264/.hevc) or directly to mp4 container.
-    Writes unencoded streams to mp4 using cv2.VideoWriter
+    Writes video streams (.mjpeg/.h264/.hevc) or directly to mp4/avi container.
     """
 
-    def __init__(self):
+    def __init__(self, lossless: bool = False):
         self.path = None
         self._stream_type = dict()
         self._writers = dict()
         self._closed = False
+        self._lossless = lossless
 
     def __getitem__(self, item):
         return self._writers[item]
@@ -41,28 +41,32 @@ def update(self, path: Path, device: dai.Device, xouts: List['XoutFrames']):
             # for example, 'color_bitstream' (encoded) or 'color_video' (unencoded),
             # if component was created with name='color'
             xout_name = xout.name  # for example, 'color' --> file is color.mp4 (encoded) or color.avi (unencoded)
-
+            file_name = xout_name
+            if file_name.startswith('CameraBoardSocket.'):
+                file_name = file_name[len('CameraBoardSocket.'):]
             stream = OakStream(xout)
             fourcc = stream.fourcc()  # TODO add default fourcc? stream.fourcc() can be None.
-            if stream.is_raw():
+
+            print(fourcc, xout_name, stream.type)
+            if stream.is_raw() or stream.is_depth():
                 from .video_writers.video_writer import VideoWriter
-                self._writers[xout_name] = VideoWriter(self.path, xout_name, fourcc, xout.fps)
+                self._writers[xout_name] = VideoWriter(self.path, file_name, self._lossless)
             else:
                 try:
                     from .video_writers.av_writer import AvWriter
-                    self._writers[xout_name] = AvWriter(self.path, xout_name, fourcc, xout.fps, xout._frame_shape)
+                    self._writers[xout_name] = AvWriter(self.path, file_name, fourcc)
                 except Exception as e:
                     # TODO here can be other errors, not only import error
                     logging.warning(f'Exception while creating AvWriter: {e}.'
                                     '\nFalling back to FileWriter, saving uncontainerized encoded streams.')
                     from .video_writers.file_writer import FileWriter
-                    self._writers[xout_name] = FileWriter(self.path, xout_name, fourcc)
+                    self._writers[xout_name] = FileWriter(self.path, file_name, fourcc)
 
     def create_files_for_buffer(self, subfolder: str, buf_name: str):
         for _, writer in self._writers.items():
             writer.create_file_for_buffer(subfolder, buf_name)
 
-    def create_file_for_buffer(self, wr_name: str, subfolder: str, buf_name: str):  # get frames' properties for the file from buf_name
+    def create_file_for_buffer(self, wr_name: str, subfolder: str, buf_name: str):
         self._writers[wr_name].create_file_for_buffer(subfolder, buf_name)
 
     def create_file(self, wr_name: str, subfolder: str, frame: Union[np.ndarray, dai.ImgFrame]):
diff --git a/depthai_sdk/src/depthai_sdk/recorders/video_writers/av_writer.py b/depthai_sdk/src/depthai_sdk/recorders/video_writers/av_writer.py
index 62ed798aa..c7796e38e 100644
--- a/depthai_sdk/src/depthai_sdk/recorders/video_writers/av_writer.py
+++ b/depthai_sdk/src/depthai_sdk/recorders/video_writers/av_writer.py
@@ -1,7 +1,7 @@
 import os
 from fractions import Fraction
 from pathlib import Path
-from typing import Tuple, Union
+from typing import Tuple, Union, Optional, List
 
 import depthai as dai
 import numpy as np
@@ -45,7 +45,7 @@ def is_keyframe(encoded_frame: np.array) -> bool:
 
 
 class AvWriter(BaseWriter):
-    def __init__(self, path: Path, name: str, fourcc: str, fps: float, frame_shape: Tuple[int, int]):
+    def __init__(self, path: Path, name: str, fourcc: str):
         """
         Args:
             path: Path to the folder where the file will be created.
@@ -57,13 +57,15 @@ def __init__(self, path: Path, name: str, fourcc: str, fps: float, frame_shape:
         super().__init__(path, name)
 
         self.start_ts = None
-        self.frame_shape = frame_shape
-
-        self._fps = fps
         self._fourcc = fourcc
+
         self._stream = None
+        self._file = None
+        self.closed = False
+        self._codec = None  # Used to determine dimensions of encoded frames
+        self._frame_buffer: List[dai.ImgFrame] = []
 
-    def _create_stream(self, fourcc: str, fps: float) -> None:
+    def _create_stream(self, shape: Tuple) -> None:
         """
         Create stream in file with given fourcc and fps, works in-place.
 
@@ -71,16 +73,24 @@ def _create_stream(self, fourcc: str, fps: float) -> None:
             fourcc: Stream codec.
             fps: Frames per second of the stream.
         """
-        self._stream = self._file.add_stream(fourcc, rate=int(fps))
+        self._stream = self._file.add_stream(self._fourcc)
         self._stream.time_base = Fraction(1, 1000 * 1000)  # Microseconds
 
         # We need to set pixel format for MJEPG, for H264/H265 it's yuv420p by default
-        if fourcc == 'mjpeg':
+        if self._fourcc == 'mjpeg':
             self._stream.pix_fmt = 'yuvj420p'
 
-        if self.frame_shape is not None:
-            self._stream.width = self.frame_shape[0]
-            self._stream.height = self.frame_shape[1]
+        self._stream.width = shape[0]
+        self._stream.height = shape[1]
+
+    def get_dimension(self, img: dai.ImgFrame) -> Optional[Tuple[int, int]]:
+        enc_packets = self._codec.parse(img.getData())
+        if len(enc_packets) == 0:
+            return None
+        frames = self._codec.decode(enc_packets[-1])
+        if not frames:
+            return None
+        return frames[0].width, frames[0].height
 
     def create_file_for_buffer(self, subfolder: str, buf_name: str) -> None:  # independent of type of frames
         self.create_file(subfolder)
@@ -104,23 +114,20 @@ def _create_file(self, path_to_file: str) -> None:
         """
         global av
         import av
-        self._file = av.open(str(Path(path_to_file).with_suffix(f'.{self._fourcc}')), 'w')
-        self._create_stream(self._fourcc, self._fps)
-
-    def write(self, frame: dai.ImgFrame) -> None:
-        """
-        Write packet bytes to h264 file.
+        # We will remux .h264 later
+        suffix = '.h264' if self._fourcc.lower() == 'h264' else '.mp4'
+        self._file = av.open(str(Path(path_to_file).with_suffix(suffix)), 'w')
 
-        Args:
-            frame: ImgFrame from depthai pipeline.
-        """
-        if self._file is None:
-            self.create_file(subfolder='')
+        # Needed to get dimensions from the frame. Only decode first frame.
+        self._codec = av.CodecContext.create(self._fourcc, "r")
 
+    def __mux_imgframe(self, frame: dai.ImgFrame) -> None:
         frame_data = frame.getData()
 
-        if self.start_ts is None and not is_keyframe(frame_data):
-            return
+        if self.start_ts is None:
+            # For H26x, wait for a keyframe
+            if self._fourcc != 'mjpeg' and not is_keyframe(frame_data):
+                return
 
         packet = av.Packet(frame_data)  # Create new packet with byte array
 
@@ -129,23 +136,51 @@ def write(self, frame: dai.ImgFrame) -> None:
             self.start_ts = frame.getTimestampDevice()
 
         ts = int((frame.getTimestampDevice() - self.start_ts).total_seconds() * 1e6)  # To microsec
-        packet.dts = ts
-        packet.pts = ts
+        packet.dts = ts + 1  # +1 to avoid zero dts
+        packet.pts = ts + 1
+        packet.stream = self._stream
         self._file.mux_one(packet)  # Mux the Packet into container
 
+    def write(self, frame: dai.ImgFrame) -> None:
+        """
+        Write packet bytes to h264 file.
+
+        Args:
+            frame: ImgFrame from depthai pipeline.
+        """
+        if self.closed:
+            return
+        if self._file is None:
+            self.create_file(subfolder='')
+
+        if self._stream is None:
+            shape = self.get_dimension(frame)
+            if shape is None:
+                # Save frame, so we can mux it later when dimnesions are known
+                self._frame_buffer.append(frame)
+                return
+
+            self._create_stream(shape)
+            for buffered_frame in self._frame_buffer:
+                self.__mux_imgframe(buffered_frame)
+
+        self.__mux_imgframe(frame)
+
     def close(self) -> None:
         """
-        Close the file and remux it to mp4.
+        Close the file and potentially remux it to mp4.
         """
+        self.closed = True
         if self._file is not None:
             p = self._stream.encode(None)
             self._file.mux(p)
             self._file.close()
 
-        # Remux the stream to finalize the output file
-        self.remux_video(str(self._file.name))
+        # Remux the h264 stream to finalize the output file
+        if self._fourcc == 'h264':
+            self.remux_h264_video(str(self._file.name))
 
-    def remux_video(self, input_file: Union[Path, str]) -> None:
+    def remux_h264_video(self, input_file: Union[Path, str]) -> None:
         """
         Remuxes h264 file to mp4.
 
@@ -161,13 +196,13 @@ def remux_video(self, input_file: Union[Path, str]) -> None:
         with av.open(mp4_file, "w", format="mp4") as output_container, \
                 av.open(input_file, "r", format=self._fourcc) as input_container:
             input_stream = input_container.streams[0]
-            output_stream = output_container.add_stream(template=input_stream, rate=self._fps)
+            fps = input_stream.average_rate
+            output_stream = output_container.add_stream(template=input_stream, rate=fps)
 
-            if self.frame_shape:
-                output_stream.width = self.frame_shape[0]
-                output_stream.height = self.frame_shape[1]
+            output_stream.width = input_stream.width
+            output_stream.height = input_stream.height
 
-            frame_time = (1 / self._fps) * input_stream.time_base.denominator
+            frame_time = (1 / fps) * input_stream.time_base.denominator
             for i, packet in enumerate(input_container.demux(input_stream)):
                 packet.dts = i * frame_time
                 packet.pts = i * frame_time
diff --git a/depthai_sdk/src/depthai_sdk/recorders/video_writers/base_writer.py b/depthai_sdk/src/depthai_sdk/recorders/video_writers/base_writer.py
index d35bb4344..1bcf51d5b 100644
--- a/depthai_sdk/src/depthai_sdk/recorders/video_writers/base_writer.py
+++ b/depthai_sdk/src/depthai_sdk/recorders/video_writers/base_writer.py
@@ -1,5 +1,6 @@
+import time
 from abc import ABC
-from collections import deque
+from collections import deque, defaultdict
 from pathlib import Path
 from typing import Dict
 
@@ -10,27 +11,45 @@ def __init__(self, path: Path, name: str):
         self.name = name
 
         self._buffers: Dict[str, deque] = {}
+        self._buffers_max_seconds: Dict[str, int] = {}  # in seconds
+        self._buffers_timestamps = defaultdict(list)
+        self._buffers_approx_fps: Dict[str, float] = {}
         self._file = None
-        self._fps = None
 
     def create_file_for_buffer(self, subfolder: str, bufname: str):
         raise NotImplementedError()
 
     def init_buffer(self, name: str, max_seconds: int):
         if max_seconds > 0:
-            self._buffers[name] = deque(maxlen=int(max_seconds * self._fps))
+            self._buffers[name] = deque()
+            self._buffers_max_seconds[name] = max_seconds
 
     def add_to_buffer(self, name: str, frame):
         if self._buffers[name] is None:
             return
 
-        if len(self._buffers[name]) == self._buffers[name].maxlen:
+        timestamp = time.time()
+        self._buffers_timestamps[name].append(timestamp)
+
+        # Calculate time window based on max_seconds
+        time_window = self._buffers_max_seconds[name]
+
+        # Remove frames that fall outside the time window
+        while self._buffers_timestamps[name] and (timestamp - self._buffers_timestamps[name][0] > time_window):
             self._buffers[name].popleft()
+            self._buffers_timestamps[name].pop(0)
 
         self._buffers[name].append(frame)
 
     def is_buffer_full(self, name: str) -> bool:
-        return len(self._buffers[name]) == self._buffers[name].maxlen
+        if self._buffers[name].maxlen:
+            return len(self._buffers[name]) == self._buffers[name].maxlen
+
+        if not self._buffers_timestamps[name]:
+            return False
+
+        diff = self._buffers_timestamps[name][0] + self._buffers_max_seconds[name] - self._buffers_timestamps[name][-1]
+        return diff < 0.1
 
     def is_buffer_empty(self, name: str) -> bool:
         return len(self._buffers[name]) == 0
diff --git a/depthai_sdk/src/depthai_sdk/recorders/video_writers/video_writer.py b/depthai_sdk/src/depthai_sdk/recorders/video_writers/video_writer.py
index 684704f3d..b0b0f731d 100644
--- a/depthai_sdk/src/depthai_sdk/recorders/video_writers/video_writer.py
+++ b/depthai_sdk/src/depthai_sdk/recorders/video_writers/video_writer.py
@@ -1,14 +1,9 @@
-from collections import deque
+from datetime import timedelta
+from fractions import Fraction
 from pathlib import Path
-from typing import Union
-
-try:
-    import cv2
-except ImportError:
-    cv2 = None
 
+import av
 import depthai as dai
-import numpy as np
 
 from depthai_sdk.recorders.video_writers import BaseWriter
 from depthai_sdk.recorders.video_writers.utils import create_writer_dir
@@ -16,40 +11,28 @@
 
 class VideoWriter(BaseWriter):
     """
-    Writes raw streams to mp4 using cv2.VideoWriter.
+    Writes raw streams to file
     """
-    _fps: float
-    _path: str
 
-    def __init__(self, path: Path, name: str, fourcc: str, fps: float):
+    def __init__(self, path: Path, name: str, lossless: bool = False):
         """
         Args:
             path: Path to save the output. Either a folder or a file.
             name: Name of the stream.
-            fourcc: FourCC code of the codec used to compress the frames.
-            fps: Frames per second.
+            lossless: If True, save the stream without compression.
         """
 
         super().__init__(path, name)
 
-        self._fourcc = None
-        self._w, self._h = None, None
-        self._fps = fps
+        self._lossless = lossless
 
-        self._buffer = None
-        self._is_buffer_enabled = False
+        self._fourcc: str = None
+        self._format: str = None
+        self._start_ts: timedelta = None
 
     def __exit__(self, exc_type, exc_val, exc_tb):
         self.close()
 
-    def init_buffer(self, name: str, max_seconds: int):
-        if max_seconds > 0:
-            self._buffers[name] = deque(maxlen=int(max_seconds * self._fps))
-            self._is_buffer_enabled = True
-
-    def set_fourcc(self, fourcc: str):
-        self._fourcc = fourcc
-
     def create_file_for_buffer(self, subfolder: str, buf_name: str):
         if self._buffers[buf_name] is None:
             raise RuntimeError(f"Buffer {buf_name} is not enabled")
@@ -60,42 +43,70 @@ def create_file_for_buffer(self, subfolder: str, buf_name: str):
         frame = self._buffers[buf_name][0]
         self.create_file(subfolder, frame)
 
-    def create_file(self, subfolder: str, frame: Union[dai.ImgFrame, np.ndarray]):
-        path_to_file = create_writer_dir(self.path / subfolder, self.name, 'mp4')
+    def create_file(self, subfolder: str, frame: dai.ImgFrame):
+        if self._lossless or frame.getType() == dai.ImgFrame.Type.RAW16:
+            extension = 'avi'
+        else:
+            extension = 'mp4'
+
+        path_to_file = create_writer_dir(self.path / subfolder, self.name, extension)
 
-        if not path_to_file.endswith('.mp4'):
-            path_to_file = path_to_file[:-4] + '.mp4'
+        if not path_to_file.endswith('.' + extension):
+            path_to_file = path_to_file[:-4] + '.' + extension
 
         self._create_file(path_to_file, frame)
 
-    def _create_file(self, path_to_file: str, frame: Union[dai.ImgFrame, np.ndarray]):
-        if isinstance(frame, np.ndarray):
-            self._h, self._w = frame.shape[:2]
+    def _create_file(self, path_to_file: str, frame: dai.ImgFrame):
+        options = {}
+        if self._lossless:
+            self._fourcc = 'rawvideo'
+        elif frame.getType() == dai.ImgFrame.Type.RAW16:
+            self._fourcc = 'ffv1'
+            self._format = 'gray16le'
+        else:  # Mono/Color, encode
+            self._fourcc = 'h264'
+            options['crf'] = '15'
+
+        self._file = av.open(path_to_file, 'w')
+        self._stream = self._file.add_stream(self._fourcc)
+        self._stream.options = options
+        self._stream.time_base = Fraction(1, 1000)
+        self._stream.codec_context.width = frame.getWidth()
+        self._stream.codec_context.height = frame.getHeight()
+
+    def write(self, img_frame: dai.ImgFrame):
+        if self._file is None:
+            self.create_file(subfolder='', frame=img_frame)
+        if self._start_ts is None:
+            self._start_ts = img_frame.getTimestampDevice()
+
+        if img_frame.getType() == dai.ImgFrame.Type.YUV420p:
+            video_format = 'yuv420p'
+        elif img_frame.getType() == dai.ImgFrame.Type.NV12:
+            video_format = 'nv12'
+        elif img_frame.getType() in [dai.ImgFrame.Type.RAW8, dai.ImgFrame.Type.GRAY8]:
+            video_format = 'gray'
+        elif img_frame.getType() == dai.ImgFrame.Type.RAW16:
+            video_format = 'gray16le'
         else:
-            self._h, self._w = frame.getHeight(), frame.getWidth()
-
-        if not isinstance(frame, np.ndarray):
-            frame = frame.getCvFrame()
+            raise ValueError(f'Unsupported frame type: {img_frame.getType()}')
 
-        c = 1 if frame.ndim == 2 else frame.shape[2]
+        video_frame = av.VideoFrame.from_ndarray(img_frame.getFrame(), format=video_format)
 
-        self._fourcc = 'mp4v'
-        self._file = cv2.VideoWriter(path_to_file,
-                                     cv2.VideoWriter_fourcc(*self._fourcc),
-                                     self._fps,
-                                     (self._w, self._h),
-                                     isColor=c != 1)
+        ts = int((img_frame.getTimestampDevice() - self._start_ts).total_seconds() * 1e3)  # To milliseconds
+        video_frame.pts = ts + 1
 
-    def write(self, frame: Union[dai.ImgFrame, np.ndarray]):
-        if self._file is None:
-            self.create_file(subfolder='', frame=frame)
-
-        self._file.write(frame if isinstance(frame, np.ndarray) else frame.getCvFrame())
+        for packet in self._stream.encode(video_frame):
+            self._file.mux(packet)
 
     def close(self) -> None:
         """
         Close the file if it is open.
         """
         if self._file:
-            self._file.release()
-            self._file = None
+            # Flush stream
+            for packet in self._stream.encode():
+                self._file.mux(packet)
+
+            # Close output file
+            self._file.close()
diff --git a/depthai_sdk/src/depthai_sdk/replay.py b/depthai_sdk/src/depthai_sdk/replay.py
index 23e0bd9e1..8bfc48c66 100644
--- a/depthai_sdk/src/depthai_sdk/replay.py
+++ b/depthai_sdk/src/depthai_sdk/replay.py
@@ -1,8 +1,9 @@
-import logging
+import os
 import os
 import time
 from threading import Thread
 from time import monotonic
+from typing import Callable
 
 import depthai as dai
 
@@ -16,42 +17,49 @@
              '.pnm', '.pfm', '.sr', '.ras', '.tiff', '.tif', '.exr', '.hdr', '.pic']
 
 
-class ReplayStream:
-    stream_name: str  # XLink stream name
-    queue: dai.DataInputQueue  # Input queue
-    frame: np.ndarray  # Last read frame from Reader (ndarray)
-    imgFrame: dai.ImgFrame  # Last read ImgFrame from Reader (dai.ImgFrame)
-    _shape: Tuple[int, int]  # width, height
-    disabled: bool
-    size_bytes: int  # bytes
+def _run(delay: float, sendFrames: Callable):
+    while True:
+        if not sendFrames():
+            break
+        time.sleep(delay)
+    logging.info('Replay `run` thread stopped')
 
+
+class ReplayStream:
     @property
     def shape(self) -> Tuple[int, int]:
         return self.resize if self.resize else self._shape
 
     def __init__(self):
         self.node: dai.node.XLinkIn = None
+        self.queue: dai.DataInputQueue = None
         self.disabled = False
         self.stream_name = ''
-        self.camera_socket: dai.CameraBoardSocket = None
+        self.camera_socket: dai.CameraBoardSocket = None  # Forced socket
 
         self.resize: Tuple[int, int] = None
         self.resize_mode: ResizeMode = None
+        self._shape: Tuple[int, int] = None
+        self.callbacks: List[Callable] = []
+
+        self.frame: np.ndarray  # Last read frame from Reader (ndarray)
+        self.imgFrame: dai.ImgFrame  # Last read ImgFrame from Reader (dai.ImgFrame)
+        self.size_bytes: int  # bytes
 
     def get_socket(self) -> dai.CameraBoardSocket:
-        if self.camera_socket:
+        if self.camera_socket is not None:
             return self.camera_socket
         if 'left' in self.stream_name.lower():
             return dai.CameraBoardSocket.LEFT
         elif 'right' in self.stream_name.lower():
             return dai.CameraBoardSocket.RIGHT
         else:
-            return dai.CameraBoardSocket.RGB
+            return dai.CameraBoardSocket.CAM_A
         # raise Exception("Please specify replay stream CameraBoardSocket via replay.specify_socket()")
 
 
 class Replay:
-    def __init__(self, path: str):
+    def __init__(self, path: Union[Path, str]):
         """
         Helper file to replay recorded depthai stream. It reads from recorded files (mjpeg/avi/mp4/h265/h264/bag)
         and sends frames back to OAK camera to replay the scene, including depth reconstruction from 2 synced mono
@@ -142,6 +150,8 @@ def _get_path(self, path: str) -> Path:
         @param path: depthai-recording path.
         @return: Replay module
         """
+        if isinstance(path, Path):
+            return path.resolve()
         if isUrl(path):
             if isYoutubeLink(path):
                 # Overwrite source - so Replay class can use it
@@ -200,6 +210,9 @@ def set_loop(self, flag: bool):
     def get_fps(self) -> float:
         return self.fps
 
+    def _add_callback(self, stream_name: str, callback: Callable):
+        self.streams[stream_name.lower()].callbacks.append(callback)
+
     def resize(self, stream_name: str, size: Tuple[int, int], mode: ResizeMode = ResizeMode.STRETCH):
         """
         Resize color frames prior to sending them to the device.
@@ -288,27 +301,14 @@ def initStereoDepth(self,
         left.node.out.link(stereo.left)
         right.node.out.link(stereo.right)
 
-    def start(self, cb):
+    def start(self):
         """
         Start sending frames to the OAK device on a new thread
         """
-        self.thread = Thread(target=self.run, args=(cb,))
+        self.thread = Thread(target=_run, args=(1.0 / self.fps, self.sendFrames,))
         self.thread.start()
 
-    def run(self, cb):
-        delay = 1.0 / self.fps
-        while True:
-            if not self.sendFrames(cb):
-                break
-
-            time.sleep(delay)
-            if self._stop:
-                break
-
-        logging.info('Replay `run` thread stopped')
-        self._stop = True
-
-    def sendFrames(self, cb=None) -> bool:
+    def sendFrames(self) -> bool:
         """
         Reads and sends recorded frames from all enabled streams to the OAK camera.
 
@@ -317,13 +317,14 @@ def sendFrames(self, cb=None) -> bool:
         """
         if not self._pause:  # If replaying is paused, don't read new frames
             if not self._readFrames():
+                self._stop = True
                 return False  # End of the recording
 
         self._now = monotonic()
         for stream_name, stream in self.streams.items():
             stream.imgFrame = self._createImgFrame(stream)
             # Save the imgFrame
-            if cb:  # callback
+            for cb in stream.callbacks:  # callback
                 cb(stream_name.lower(), stream.imgFrame)
 
             # Don't send these frames to the OAK camera
@@ -339,7 +340,7 @@ def sendFrames(self, cb=None) -> bool:
     def createQueues(self, device: dai.Device):
         """
         Creates input queue for each enabled stream
-        
+
         Args:
             device (dai.Device): Device to which we will stream frames
         """
@@ -402,7 +403,7 @@ def _createImgFrame(self, stream: ReplayStream) -> dai.ImgFrame:
     def _readFrames(self) -> bool:
         """
         Reads frames from all Readers.
-        
+
         Returns:
             bool: True if successful, otherwise False.
         """
@@ -430,6 +431,6 @@ def close(self):
         Closes all video readers.
         """
         self._stop = True
+        self.reader.close()
         if self.thread:
             self.thread.join()
-        self.reader.close()
diff --git a/depthai_sdk/src/depthai_sdk/trigger_action/trigger_action.py b/depthai_sdk/src/depthai_sdk/trigger_action/trigger_action.py
index dfcd29c27..a9db8afc8 100644
--- a/depthai_sdk/src/depthai_sdk/trigger_action/trigger_action.py
+++ b/depthai_sdk/src/depthai_sdk/trigger_action/trigger_action.py
@@ -27,10 +27,7 @@ def __init__(self, trigger: Trigger, action: Union[Action, Callable]):
         if isinstance(action, Action) and action.inputs:
             SequenceNumSync.__init__(self, len(action.inputs))
 
-    def new_packet_trigger(self,
-                           packet: FramePacket,
-                           _=None  # visualizer seems redundant here
-                           ) -> None:
+    def new_packet_trigger(self, packet: FramePacket) -> None:
         """
         This method is called when a new packet is received from the trigger input stream.
 
@@ -44,10 +41,7 @@ def new_packet_trigger(self,
                 self.last_trigger_time = trigger_time
                 self.action.activate()
 
-    def new_packet_action(self,
-                          packet: FramePacket,
-                          _=None  # visualizer seems redundant here
-                          ) -> None:
+    def new_packet_action(self, packet: FramePacket) -> None:
         """
         This method is called when a new packet is received from the action input streams.
         Primary purpose of this method is to provide a way to keep a track of the packets.
diff --git a/depthai_sdk/src/depthai_sdk/types.py b/depthai_sdk/src/depthai_sdk/types.py
index 53c4114e0..4c257febf 100644
--- a/depthai_sdk/src/depthai_sdk/types.py
+++ b/depthai_sdk/src/depthai_sdk/types.py
@@ -2,6 +2,8 @@
 
 import depthai as dai
 
+from depthai_sdk.classes.packets import SemanticSegmentationPacket, ImgLandmarksPacket, NnOutputPacket, DetectionPacket
+
 GenericNeuralNetwork = Union[
     dai.node.NeuralNetwork,
     dai.node.MobileNetDetectionNetwork,
@@ -9,3 +11,24 @@
     dai.node.YoloDetectionNetwork,
     dai.node.YoloSpatialDetectionNetwork
 ]
+
+XoutNNOutputPacket = Union[
+    NnOutputPacket,
+    DetectionPacket,
+    ImgLandmarksPacket,
+    SemanticSegmentationPacket
+]
+
+Resolution = Union[
+    str,
+    dai.ColorCameraProperties.SensorResolution,
+    dai.MonoCameraProperties.SensorResolution
+]
+
+NNNode = Union[
+    dai.node.NeuralNetwork,
+    dai.node.MobileNetDetectionNetwork,
+    dai.node.MobileNetSpatialDetectionNetwork,
+    dai.node.YoloDetectionNetwork,
+    dai.node.YoloSpatialDetectionNetwork
+]
diff --git a/depthai_sdk/src/depthai_sdk/utils.py b/depthai_sdk/src/depthai_sdk/utils.py
index 516642637..58ae3bc43 100644
--- a/depthai_sdk/src/depthai_sdk/utils.py
+++ b/depthai_sdk/src/depthai_sdk/utils.py
@@ -2,6 +2,8 @@
 import json
 import logging
 import sys
+import tempfile
+import traceback
 import urllib.request
 from pathlib import Path
 from typing import Dict, List, Tuple, Optional, Union, Any
@@ -38,23 +40,6 @@ def getLocalRecording(recording: str) -> Optional[Path]:
     return None
 
 
-def configPipeline(pipeline: dai.Pipeline,
-                   xlinkChunk: Optional[int] = None,
-                   calib: Optional[dai.CalibrationHandler] = None,
-                   tuningBlob: Optional[str] = None,
-                   openvinoVersion: Union[None, str, dai.OpenVINO.Version] = None
-                   ) -> None:
-    if xlinkChunk:
-        pipeline.setXLinkChunkSize(xlinkChunk)
-    if calib:
-        pipeline.setCalibrationData(calib)
-    if tuningBlob:
-        pipeline.setCameraTuningBlobPath(tuningBlob)
-    if openvinoVersion:
-        # pipeline.setOpenVINOVersion(parseOpenVinoVersion(openvinoVersion))
-        pass
-
-
 def getAvailableRecordings() -> Dict[str, Tuple[List[str], int]]:
     """
     Get available (online) depthai-recordings. Returns list of available recordings and it's size
@@ -415,16 +400,16 @@ def _create_cache_folder() -> bool:
     return True
 
 
-def _create_config() -> None:
+def _create_config() -> Optional[dict]:
     """
-    Create config file in user's home directory.
+    Create config file in user's home directory. If config file already exists, check if sentry_dsn is correct.
 
     Returns:
-        None.
+        dict: Config file content.
     """
     if not _create_cache_folder():
         logging.debug('Failed to create config file.')
-        return
+        return None
 
     config_file = Path.home().joinpath('.depthai_sdk', 'config.json')
     default_config = {
@@ -433,6 +418,13 @@ def _create_config() -> None:
     }
     if not config_file.exists():
         config_file.write_text(json.dumps(default_config))
+    else:
+        content = json.loads(config_file.read_text())
+        if content['sentry_dsn'] != default_config['sentry_dsn']:
+            content['sentry_dsn'] = default_config['sentry_dsn']
+            config_file.write_text(json.dumps(content))
+
+    return json.loads(config_file.read_text())
 
 
 def set_sentry_status(status: bool = True) -> None:
@@ -447,8 +439,7 @@ def set_sentry_status(status: bool = True) -> None:
     """
     # check if config exists
     config_file = Path.home().joinpath('.depthai_sdk', 'config.json')
-    if not config_file.exists():
-        _create_config()
+    _create_config()
 
     # read config
     config = json.loads(config_file.read_text())
@@ -464,13 +455,8 @@ def get_config_field(key: str) -> Any:
         bool: True if sentry is enabled, False otherwise.
     """
     # check if config exists
-    config_file = Path.home().joinpath('.depthai_sdk', 'config.json')
-    if not config_file.exists():
-        raise FileNotFoundError('Config file not found.')
-
-    # read config
-    config = json.loads(config_file.read_text())
-    return config[key]
+    config_file = _create_config()
+    return config_file.get(key, None)
 
 
 def report_crash_dump(device: dai.Device) -> None:
@@ -487,11 +473,32 @@ def report_crash_dump(device: dai.Device) -> None:
         device_id = crash_dump.deviceId
 
         crash_dump_json = crash_dump.serializeToJson()
-        path = f'/tmp/crash_{commit_hash}_{device_id}.json'
-        with open(path, 'w') as f:
-            json.dump(crash_dump_json, f)
-
         from sentry_sdk import capture_exception, configure_scope
-        with configure_scope() as scope:
-            scope.add_attachment(content_type='application/json', path=path)
-            capture_exception(CrashDumpException())
+        # Save crash dump to a temporary file
+        with tempfile.TemporaryDirectory() as temp_dir:
+            path = Path(temp_dir) / f'crash_{commit_hash}_{device_id}.json'
+            with open(path, 'w') as f:
+                json.dump(crash_dump_json, f)
+
+            with configure_scope() as scope:
+                logging.info('Reporting crash dump to sentry.')
+                scope.add_attachment(content_type='application/json', path=str(path))
+                capture_exception(CrashDumpException())
+
+
+def _sentry_before_send(event, hint):
+    if 'exc_info' in hint:
+        exc_type, exc_value, tb = hint['exc_info']
+        tb_info = traceback.extract_tb(tb)
+
+        if isinstance(exc_value, (KeyboardInterrupt, SystemExit)):
+            return None
+
+        # Loop through the traceback to check for any frame that originated in your module
+        for tbi in tb_info:
+            # Assuming your module files have the pattern "my_module_*", you can do:
+            if 'depthai_sdk' in tbi.filename:
+                return event  # if the error originated in your module, send it
+
+    # If none of the frames came from your module, or there's no exception info, don't send the event
+    return None
diff --git a/depthai_sdk/src/depthai_sdk/visualize/bbox.py b/depthai_sdk/src/depthai_sdk/visualize/bbox.py
index 67cf99a76..db2152ef2 100644
--- a/depthai_sdk/src/depthai_sdk/visualize/bbox.py
+++ b/depthai_sdk/src/depthai_sdk/visualize/bbox.py
@@ -1,13 +1,17 @@
-from typing import Optional, Tuple, Union, Sequence
+from typing import Optional, Tuple, Union, Sequence, List
+
+import depthai as dai
 import numpy as np
+
 from depthai_sdk.classes.enum import ResizeMode
-import depthai as dai
+
 
 class Point:
     """
     Used within the BoundingBox class when dealing with points.
     """
-    def __init__(self, x: float, y:float):
+
+    def __init__(self, x: float, y: float):
         self.x = x
         self.y = y
 
@@ -15,7 +19,7 @@ def __str__(self):
         return f"({self.x}, {self.y})"
 
     def to_tuple(self) -> Tuple[float, float]:
-        return (self.x, self.y)
+        return self.x, self.y
 
     def denormalize(self, frame_shape: Sequence) -> Tuple[int, int]:
         """
@@ -29,7 +33,8 @@ class BoundingBox:
     This class helps with bounding box calculations. It can be used to calculate relative bounding boxes,
     map points from relative to absolute coordinates and vice versa, crop frames, etc.
     """
-    def __init__(self, bbox: Union[None, np.ndarray, Tuple[float, float, float, float], dai.ImgDetection] = None):
+
+    def __init__(self, bbox: Union[None, List, np.ndarray, Tuple[float, float, float, float], dai.ImgDetection] = None):
         if isinstance(bbox, (Sequence, np.ndarray)):
             self.xmin, self.ymin, self.xmax, self.ymax = bbox
         elif isinstance(bbox, dai.ImgDetection):
@@ -41,6 +46,29 @@ def __init__(self, bbox: Union[None, np.ndarray, Tuple[float, float, float, floa
     def __str__(self):
         return f"({self.xmin}, {self.ymin}), ({self.xmax}, {self.ymax})"
 
+    def clip(self, min_value=0.0, max_value=1.0) -> 'BoundingBox':
+        """
+        Clips the bounding box to the given range.
+        """
+        return BoundingBox([
+            np.clip(self.xmin, min_value, max_value),
+            np.clip(self.ymin, min_value, max_value),
+            np.clip(self.xmax, min_value, max_value),
+            np.clip(self.ymax, min_value, max_value),
+        ])
+
+    def top_left(self) -> Tuple[float, float]:
+        """
+        Get top-left corner of the bounding box.
+        """
+        return self.xmin, self.ymin
+
+    def bottom_right(self) -> Tuple[float, float]:
+        """
+        Get bottom-right corner of the bounding box.
+        """
+        return self.xmax, self.ymax
+
     def to_tuple(self, frame_shape: Union[Sequence, None] = None) -> Tuple:
         """
         Get bounding box coordinates as a tuple (xmin, ymin, xmax, ymax).
@@ -136,9 +164,9 @@ def crop_frame(self, frame: np.ndarray) -> np.ndarray:
         return frame[left:right, top:bottom]
 
     def resize_to_aspect_ratio(self,
-                                old_aspect_ratio: Union[float, Sequence],
-                                new_aspect_ratio: Union[float, Sequence],
-                                resize_mode: Union[ResizeMode, str] = ResizeMode.LETTERBOX) -> 'BoundingBox':
+                               old_aspect_ratio: Union[float, Sequence],
+                               new_aspect_ratio: Union[float, Sequence],
+                               resize_mode: Union[ResizeMode, str] = ResizeMode.LETTERBOX) -> 'BoundingBox':
         """
         Calculates a new BoundingBox, based on the current BoundingBox, but with a different aspect ratio.
         Example: If the original frame is 1920x1080, and we have a model with input size of 300x300,
diff --git a/depthai_sdk/src/depthai_sdk/visualize/colors.py b/depthai_sdk/src/depthai_sdk/visualize/colors.py
index 8c9cfe70a..64aec06fa 100644
--- a/depthai_sdk/src/depthai_sdk/visualize/colors.py
+++ b/depthai_sdk/src/depthai_sdk/visualize/colors.py
@@ -1,8 +1,8 @@
-import numpy as np
-import math
 import colorsys
+import math
 from typing import Tuple
 
+
 def generate_colors(number_of_colors: int, pastel=0.5):
     colors = []
 
@@ -25,12 +25,13 @@ def generate_colors(number_of_colors: int, pastel=0.5):
     # Return only the first `number_of_colors` colors
     return colors[:number_of_colors]
 
-def get_text_color(background_color: Tuple[int,int,int], threshold=0.6):
+
+def get_text_color(background_color: Tuple[int, int, int], threshold=0.6):
     """
     Determines whether black or white text will be more legible against a given background color.
 
     Args:
-        background_color_bgr: The BGR color that the text will be displayed on.
+        background_color: The BGR color that the text will be displayed on.
         threshold: Float between 0 and 1. A threshold closer to 1 results in the function choosing white text more often.
 
     Returns:
@@ -41,3 +42,11 @@ def get_text_color(background_color: Tuple[int,int,int], threshold=0.6):
         return (0, 0, 0)  # BGR for black
     else:
         return (255, 255, 255)  # BGR for white
+
+
+def hex_to_bgr(hex: str) -> Tuple[int, ...]:
+    """
+    "#ff1f00" (red) => (0, 31, 255)
+    """
+    value = hex.lstrip('#')
+    return tuple(int(value[i:i + 2], 16) for i in (4, 2, 0))
diff --git a/depthai_sdk/src/depthai_sdk/visualize/configs.py b/depthai_sdk/src/depthai_sdk/visualize/configs.py
index 8ee2b43d6..972663cc0 100644
--- a/depthai_sdk/src/depthai_sdk/visualize/configs.py
+++ b/depthai_sdk/src/depthai_sdk/visualize/configs.py
@@ -1,12 +1,8 @@
 from dataclasses import dataclass, field
 from enum import IntEnum
 from typing import Tuple, Optional
-import numpy as np
 
-try:
-    import cv2
-except ImportError:
-    cv2 = None
+import numpy as np
 
 
 class TextPosition(IntEnum):
@@ -51,11 +47,53 @@ class OutputConfig:
 class StereoConfig:
     colorize: StereoColor = StereoColor.RGB
     # cv2.COLORMAP_JET. This was hardcoded, as we want to have an array, because we later invert it / invalidate values 0
-    colormap: np.ndarray = field(default_factory=lambda: np.array([[[128,0,0]],[[132,0,0]],[[136,0,0]],[[140,0,0]],[[144,0,0]],[[148,0,0]],[[152,0,0]],[[156,0,0]],[[160,0,0]],[[164,0,0]],[[168,0,0]],[[172,0,0]],[[176,0,0]],[[180,0,0]],[[184,0,0]],[[188,0,0]],[[192,0,0]],[[196,0,0]],[[200,0,0]],[[204,0,0]],[[208,0,0]],[[212,0,0]],[[216,0,0]],[[220,0,0]],[[224,0,0]],[[228,0,0]],[[232,0,0]],[[236,0,0]],[[240,0,0]],[[244,0,0]],[[248,0,0]],[[252,0,0]],[[255,0,0]],[[255,4,0]],[[255,8,0]],[[255,12,0]],[[255,16,0]],[[255,20,0]],[[255,24,0]],[[255,28,0]],[[255,32,0]],[[255,36,0]],[[255,40,0]],[[255,44,0]],[[255,48,0]],[[255,52,0]],[[255,56,0]],[[255,60,0]],[[255,64,0]],[[255,68,0]],[[255,72,0]],[[255,76,0]],[[255,80,0]],[[255,84,0]],[[255,88,0]],[[255,92,0]],[[255,96,0]],[[255,100,0]],[[255,104,0]],[[255,108,0]],[[255,112,0]],[[255,116,0]],[[255,120,0]],[[255,124,0]],[[255,128,0]],[[255,132,0]],[[255,136,0]],[[255,140,0]],[[255,144,0]],[[255,148,0]],[[255,152,0]],[[255,156,0]],[[255,160,0]],[[255,164,0]],[[255,168,0]],[[255,172,0]],[[255,176,0]],[[255,180,0]],[[255,184,0]],[[255,188,0]],[[255,192,0]],[[255,196,0]],[[255,200,0]],[[255,204,0]],[[255,208,0]],[[255,212,0]],[[255,216,0]],[[255,220,0]],[[255,224,0]],[[255,228,0]],[[255,232,0]],[[255,236,0]],[[255,240,0]],[[255,244,0]],[[255,248,0]],[[255,252,0]],[[254,255,2]],[[250,255,6]],[[246,255,10]],[[242,255,14]],[[238,255,18]],[[234,255,22]],[[230,255,26]],[[226,255,30]],[[222,255,34]],[[218,255,38]],[[214,255,42]],[[210,255,46]],[[206,255,50]],[[202,255,54]],[[198,255,58]],[[194,255,62]],[[190,255,66]],[[186,255,70]],[[182,255,74]],[[178,255,78]],[[174,255,82]],[[170,255,86]],[[166,255,90]],[[162,255,94]],[[158,255,98]],[[154,255,102]],[[150,255,106]],[[146,255,110]],[[142,255,114]],[[138,255,118]],[[134,255,122]],[[130,255,126]],[[126,255,130]],[[122,255,134]],[[118,255,138]],[[114,255,142]],[[110,255,146]],[[106,255,150]],[[102,255,154]],[[98,255,158]],[[94,255,162]],[[90,255,166]],[[86,255,170]],[[82,255,174]],[[78,255,178]],[[74,255,182]],[[70,255,186]],[[66,255,190]],[[62,255,194]],[[58,255,198]],[[54,255,202]],[[50,255,206]],[[46,255,210]],[[42,255,214]],[[38,255,218]],[[34,255,222]],[[30,255,226]],[[26,255,230]],[[22,255,234]],[[18,255,238]],[[14,255,242]],[[10,255,246]],[[6,255,250]],[[1,255,254]],[[0,252,255]],[[0,248,255]],[[0,244,255]],[[0,240,255]],[[0,236,255]],[[0,232,255]],[[0,228,255]],[[0,224,255]],[[0,220,255]],[[0,216,255]],[[0,212,255]],[[0,208,255]],[[0,204,255]],[[0,200,255]],[[0,196,255]],[[0,192,255]],[[0,188,255]],[[0,184,255]],[[0,180,255]],[[0,176,255]],[[0,172,255]],[[0,168,255]],[[0,164,255]],[[0,160,255]],[[0,156,255]],[[0,152,255]],[[0,148,255]],[[0,144,255]],[[0,140,255]],[[0,136,255]],[[0,132,255]],[[0,128,255]],[[0,124,255]],[[0,120,255]],[[0,116,255]],[[0,112,255]],[[0,108,255]],[[0,104,255]],[[0,100,255]],[[0,96,255]],[[0,92,255]],[[0,88,255]],[[0,84,255]],[[0,80,255]],[[0,76,255]],[[0,72,255]],[[0,68,255]],[[0,64,255]],[[0,60,255]],[[0,56,255]],[[0,52,255]],[[0,48,255]],[[0,44,255]],[[0,40,255]],[[0,36,255]],[[0,32,255]],[[0,28,255]],[[0,24,255]],[[0,20,255]],[[0,16,255]],[[0,12,255]],[[0,8,255]],[[0,4,255]],[[0,0,255]],[[0,0,252]],[[0,0,248]],[[0,0,244]],[[0,0,240]],[[0,0,236]],[[0,0,232]],[[0,0,228]],[[0,0,224]],[[0,0,220]],[[0,0,216]],[[0,0,212]],[[0,0,208]],[[0,0,204]],[[0,0,200]],[[0,0,196]],[[0,0,192]],[[0,0,188]],[[0,0,184]],[[0,0,180]],[[0,0,176]],[[0,0,172]],[[0,0,168]],[[0,0,164]],[[0,0,160]],[[0,0,156]],[[0,0,152]],[[0,0,148]],[[0,0,144]],[[0,0,140]],[[0,0,136]],[[0,0,132]],[[0,0,128]]], dtype=np.uint8))
+    colormap: np.ndarray = field(default_factory=lambda: np.array(
+        [[[128, 0, 0]], [[132, 0, 0]], [[136, 0, 0]], [[140, 0, 0]], [[144, 0, 0]], [[148, 0, 0]], [[152, 0, 0]],
+         [[156, 0, 0]], [[160, 0, 0]], [[164, 0, 0]], [[168, 0, 0]], [[172, 0, 0]], [[176, 0, 0]], [[180, 0, 0]],
+         [[184, 0, 0]], [[188, 0, 0]], [[192, 0, 0]], [[196, 0, 0]], [[200, 0, 0]], [[204, 0, 0]], [[208, 0, 0]],
+         [[212, 0, 0]], [[216, 0, 0]], [[220, 0, 0]], [[224, 0, 0]], [[228, 0, 0]], [[232, 0, 0]], [[236, 0, 0]],
+         [[240, 0, 0]], [[244, 0, 0]], [[248, 0, 0]], [[252, 0, 0]], [[255, 0, 0]], [[255, 4, 0]], [[255, 8, 0]],
+         [[255, 12, 0]], [[255, 16, 0]], [[255, 20, 0]], [[255, 24, 0]], [[255, 28, 0]], [[255, 32, 0]], [[255, 36, 0]],
+         [[255, 40, 0]], [[255, 44, 0]], [[255, 48, 0]], [[255, 52, 0]], [[255, 56, 0]], [[255, 60, 0]], [[255, 64, 0]],
+         [[255, 68, 0]], [[255, 72, 0]], [[255, 76, 0]], [[255, 80, 0]], [[255, 84, 0]], [[255, 88, 0]], [[255, 92, 0]],
+         [[255, 96, 0]], [[255, 100, 0]], [[255, 104, 0]], [[255, 108, 0]], [[255, 112, 0]], [[255, 116, 0]],
+         [[255, 120, 0]], [[255, 124, 0]], [[255, 128, 0]], [[255, 132, 0]], [[255, 136, 0]], [[255, 140, 0]],
+         [[255, 144, 0]], [[255, 148, 0]], [[255, 152, 0]], [[255, 156, 0]], [[255, 160, 0]], [[255, 164, 0]],
+         [[255, 168, 0]], [[255, 172, 0]], [[255, 176, 0]], [[255, 180, 0]], [[255, 184, 0]], [[255, 188, 0]],
+         [[255, 192, 0]], [[255, 196, 0]], [[255, 200, 0]], [[255, 204, 0]], [[255, 208, 0]], [[255, 212, 0]],
+         [[255, 216, 0]], [[255, 220, 0]], [[255, 224, 0]], [[255, 228, 0]], [[255, 232, 0]], [[255, 236, 0]],
+         [[255, 240, 0]], [[255, 244, 0]], [[255, 248, 0]], [[255, 252, 0]], [[254, 255, 2]], [[250, 255, 6]],
+         [[246, 255, 10]], [[242, 255, 14]], [[238, 255, 18]], [[234, 255, 22]], [[230, 255, 26]], [[226, 255, 30]],
+         [[222, 255, 34]], [[218, 255, 38]], [[214, 255, 42]], [[210, 255, 46]], [[206, 255, 50]], [[202, 255, 54]],
+         [[198, 255, 58]], [[194, 255, 62]], [[190, 255, 66]], [[186, 255, 70]], [[182, 255, 74]], [[178, 255, 78]],
+         [[174, 255, 82]], [[170, 255, 86]], [[166, 255, 90]], [[162, 255, 94]], [[158, 255, 98]], [[154, 255, 102]],
+         [[150, 255, 106]], [[146, 255, 110]], [[142, 255, 114]], [[138, 255, 118]], [[134, 255, 122]],
+         [[130, 255, 126]], [[126, 255, 130]], [[122, 255, 134]], [[118, 255, 138]], [[114, 255, 142]],
+         [[110, 255, 146]], [[106, 255, 150]], [[102, 255, 154]], [[98, 255, 158]], [[94, 255, 162]], [[90, 255, 166]],
+         [[86, 255, 170]], [[82, 255, 174]], [[78, 255, 178]], [[74, 255, 182]], [[70, 255, 186]], [[66, 255, 190]],
+         [[62, 255, 194]], [[58, 255, 198]], [[54, 255, 202]], [[50, 255, 206]], [[46, 255, 210]], [[42, 255, 214]],
+         [[38, 255, 218]], [[34, 255, 222]], [[30, 255, 226]], [[26, 255, 230]], [[22, 255, 234]], [[18, 255, 238]],
+         [[14, 255, 242]], [[10, 255, 246]], [[6, 255, 250]], [[1, 255, 254]], [[0, 252, 255]], [[0, 248, 255]],
+         [[0, 244, 255]], [[0, 240, 255]], [[0, 236, 255]], [[0, 232, 255]], [[0, 228, 255]], [[0, 224, 255]],
+         [[0, 220, 255]], [[0, 216, 255]], [[0, 212, 255]], [[0, 208, 255]], [[0, 204, 255]], [[0, 200, 255]],
+         [[0, 196, 255]], [[0, 192, 255]], [[0, 188, 255]], [[0, 184, 255]], [[0, 180, 255]], [[0, 176, 255]],
+         [[0, 172, 255]], [[0, 168, 255]], [[0, 164, 255]], [[0, 160, 255]], [[0, 156, 255]], [[0, 152, 255]],
+         [[0, 148, 255]], [[0, 144, 255]], [[0, 140, 255]], [[0, 136, 255]], [[0, 132, 255]], [[0, 128, 255]],
+         [[0, 124, 255]], [[0, 120, 255]], [[0, 116, 255]], [[0, 112, 255]], [[0, 108, 255]], [[0, 104, 255]],
+         [[0, 100, 255]], [[0, 96, 255]], [[0, 92, 255]], [[0, 88, 255]], [[0, 84, 255]], [[0, 80, 255]],
+         [[0, 76, 255]], [[0, 72, 255]], [[0, 68, 255]], [[0, 64, 255]], [[0, 60, 255]], [[0, 56, 255]], [[0, 52, 255]],
+         [[0, 48, 255]], [[0, 44, 255]], [[0, 40, 255]], [[0, 36, 255]], [[0, 32, 255]], [[0, 28, 255]], [[0, 24, 255]],
+         [[0, 20, 255]], [[0, 16, 255]], [[0, 12, 255]], [[0, 8, 255]], [[0, 4, 255]], [[0, 0, 255]], [[0, 0, 252]],
+         [[0, 0, 248]], [[0, 0, 244]], [[0, 0, 240]], [[0, 0, 236]], [[0, 0, 232]], [[0, 0, 228]], [[0, 0, 224]],
+         [[0, 0, 220]], [[0, 0, 216]], [[0, 0, 212]], [[0, 0, 208]], [[0, 0, 204]], [[0, 0, 200]], [[0, 0, 196]],
+         [[0, 0, 192]], [[0, 0, 188]], [[0, 0, 184]], [[0, 0, 180]], [[0, 0, 176]], [[0, 0, 172]], [[0, 0, 168]],
+         [[0, 0, 164]], [[0, 0, 160]], [[0, 0, 156]], [[0, 0, 152]], [[0, 0, 148]], [[0, 0, 144]], [[0, 0, 140]],
+         [[0, 0, 136]], [[0, 0, 132]], [[0, 0, 128]]], dtype=np.uint8))
     wls_filter: bool = False
     wls_lambda: float = 8000
     wls_sigma: float = 1.5
 
+
 @dataclass
 class DetectionConfig:
     """Configuration for drawing bounding boxes."""
@@ -94,7 +132,7 @@ class TextConfig:
 @dataclass
 class TrackingConfig:
     """Configuration for drawing tracking bounding boxes."""
-    max_length: int = -1
+    max_length: int = 500
     deletion_lost_threshold: int = 5
     line_thickness: int = 1
     fading_tails: bool = False
diff --git a/depthai_sdk/src/depthai_sdk/visualize/objects.py b/depthai_sdk/src/depthai_sdk/visualize/objects.py
index 8fe4991ee..e7d19704b 100644
--- a/depthai_sdk/src/depthai_sdk/visualize/objects.py
+++ b/depthai_sdk/src/depthai_sdk/visualize/objects.py
@@ -1,12 +1,9 @@
 import logging
+import math
 from abc import ABC, abstractmethod
 from collections import defaultdict
-from typing import Tuple, List, Union, Optional, Sequence
-
-try:
-    import cv2
-except ImportError:
-    cv2 = None
+from types import SimpleNamespace
+from typing import Tuple, List, Union
 
 import depthai as dai
 import numpy as np
@@ -14,7 +11,14 @@
 
 from depthai_sdk.visualize.bbox import BoundingBox
 from depthai_sdk.visualize.configs import VisConfig, BboxStyle, TextPosition
-from depthai_sdk.visualize.visualizer_helper import spatials_text
+
+
+def spatials_text(spatials: dai.Point3f):
+    return SimpleNamespace(
+        x="X: " + ("{:.1f}m".format(spatials.x / 1000) if not math.isnan(spatials.x) else "--"),
+        y="Y: " + ("{:.1f}m".format(spatials.y / 1000) if not math.isnan(spatials.y) else "--"),
+        z="Z: " + ("{:.1f}m".format(spatials.z / 1000) if not math.isnan(spatials.z) else "--"),
+    )
 
 
 class GenericObject(ABC):
@@ -53,21 +57,6 @@ def set_frame_shape(self, frame_shape: Tuple[int, ...]) -> 'GenericObject':
         self.frame_shape = frame_shape
         return self
 
-    @abstractmethod
-    def draw(self, frame: np.ndarray) -> None:
-        """
-        Draw the object on the frame.
-
-        Args:
-            frame: frame to draw on.
-        """
-        raise NotImplementedError
-
-    def draw_children(self, frame: np.ndarray) -> None:
-        for child in self.children:
-            child.draw(frame)
-
-    @abstractmethod
     def prepare(self) -> 'GenericObject':
         """
         Prepare necessary data for drawing.
@@ -75,7 +64,7 @@ def prepare(self) -> 'GenericObject':
         Returns:
             self
         """
-        raise NotImplementedError
+        return self
 
     @abstractmethod
     def serialize(self) -> dict:
@@ -107,132 +96,6 @@ def children(self) -> List['GenericObject']:
         """
         return self._children
 
-    def draw_bbox(self,
-                  img: np.ndarray,
-                  pt1: Tuple[int, int],
-                  pt2: Tuple[int, int],
-                  color: Tuple[int, int, int],
-                  thickness: int,
-                  r: int,
-                  line_width: int,
-                  line_height: int
-                  ) -> None:
-        """
-        Draw a rounded rectangle on the image (in-place).
-
-        Args:
-            img: Image to draw on.
-            pt1: Top-left corner of the rectangle.
-            pt2: Bottom-right corner of the rectangle.
-            color: Rectangle color.
-            thickness: Rectangle line thickness.
-            r: Radius of the rounded corners.
-            line_width: Width of the rectangle line.
-            line_height: Height of the rectangle line.
-        """
-        x1, y1 = pt1
-        x2, y2 = pt2
-
-        if line_width == 0:
-            line_width = np.abs(x2 - x1)
-            line_width -= 2 * r if r > 0 else 0  # Adjust for rounded corners
-
-        if line_height == 0:
-            line_height = np.abs(y2 - y1)
-            line_height -= 2 * r if r > 0 else 0  # Adjust for rounded corners
-
-        # Top left
-        cv2.line(img, (x1 + r, y1), (x1 + r + line_width, y1), color, thickness)
-        cv2.line(img, (x1, y1 + r), (x1, y1 + r + line_height), color, thickness)
-        cv2.ellipse(img, (x1 + r, y1 + r), (r, r), 180, 0, 90, color, thickness)
-
-        # Top right
-        cv2.line(img, (x2 - r, y1), (x2 - r - line_width, y1), color, thickness)
-        cv2.line(img, (x2, y1 + r), (x2, y1 + r + line_height), color, thickness)
-        cv2.ellipse(img, (x2 - r, y1 + r), (r, r), 270, 0, 90, color, thickness)
-
-        # Bottom left
-        cv2.line(img, (x1 + r, y2), (x1 + r + line_width, y2), color, thickness)
-        cv2.line(img, (x1, y2 - r), (x1, y2 - r - line_height), color, thickness)
-        cv2.ellipse(img, (x1 + r, y2 - r), (r, r), 90, 0, 90, color, thickness)
-
-        # Bottom right
-        cv2.line(img, (x2 - r, y2), (x2 - r - line_width, y2), color, thickness)
-        cv2.line(img, (x2, y2 - r), (x2, y2 - r - line_height), color, thickness)
-        cv2.ellipse(img, (x2 - r, y2 - r), (r, r), 0, 0, 90, color, thickness)
-
-        # Fill the area
-        alpha = self.config.detection.fill_transparency
-        if alpha > 0:
-            overlay = img.copy()
-
-            thickness = -1
-            bbox = (pt1[0], pt1[1], pt2[0], pt2[1])
-
-            top_left = (bbox[0], bbox[1])
-            bottom_right = (bbox[2], bbox[3])
-            top_right = (bottom_right[0], top_left[1])
-            bottom_left = (top_left[0], bottom_right[1])
-
-            top_left_main_rect = (int(top_left[0] + r), int(top_left[1]))
-            bottom_right_main_rect = (int(bottom_right[0] - r), int(bottom_right[1]))
-
-            top_left_rect_left = (top_left[0], top_left[1] + r)
-            bottom_right_rect_left = (bottom_left[0] + r, bottom_left[1] - r)
-
-            top_left_rect_right = (top_right[0] - r, top_right[1] + r)
-            bottom_right_rect_right = (bottom_right[0], bottom_right[1] - r)
-
-            all_rects = [
-                [top_left_main_rect, bottom_right_main_rect],
-                [top_left_rect_left, bottom_right_rect_left],
-                [top_left_rect_right, bottom_right_rect_right]
-            ]
-
-            [cv2.rectangle(overlay, pt1=rect[0], pt2=rect[1], color=color, thickness=thickness) for rect in all_rects]
-
-            cv2.ellipse(overlay, (top_left[0] + r, top_left[1] + r), (r, r), 180.0, 0, 90, color, thickness)
-            cv2.ellipse(overlay, (top_right[0] - r, top_right[1] + r), (r, r), 270.0, 0, 90, color, thickness)
-            cv2.ellipse(overlay, (bottom_right[0] - r, bottom_right[1] - r), (r, r), 0.0, 0, 90, color, thickness)
-            cv2.ellipse(overlay, (bottom_left[0] + r, bottom_left[1] - r), (r, r), 90.0, 0, 90, color, thickness)
-
-            cv2.addWeighted(overlay, alpha, img, 1 - alpha, 0, img)
-
-    def draw_stylized_bbox(self,
-                           img: np.ndarray,
-                           pt1: Tuple[int, int],
-                           pt2: Tuple[int, int],
-                           color: Tuple[int, int, int],
-                           thickness: int,
-                           bbox_style: BboxStyle = None
-                           ) -> None:
-        """
-        Draw a stylized bounding box. The style is either passed as an argument or defined in the config.
-
-        Args:
-            img: Image to draw on.
-            pt1: Top left corner.
-            pt2: Bottom right corner.
-            color: Boundary color.
-            thickness: Border thickness.
-            bbox_style: Bounding box style.
-        """
-        box_w = pt2[0] - pt1[0]
-        box_h = pt2[1] - pt1[1]
-        line_width = int(box_w * self.config.detection.line_width) // 2
-        line_height = int(box_h * self.config.detection.line_height) // 2
-        roundness = int(self.config.detection.box_roundness)
-        bbox_style = bbox_style or self.config.detection.bbox_style
-
-        if bbox_style == BboxStyle.RECTANGLE:
-            self.draw_bbox(img, pt1, pt2, color, thickness, 0, line_width=0, line_height=0)
-        elif bbox_style == BboxStyle.CORNERS:
-            self.draw_bbox(img, pt1, pt2, color, thickness, 0, line_width=line_width, line_height=line_height)
-        elif bbox_style == BboxStyle.ROUNDED_RECTANGLE:
-            self.draw_bbox(img, pt1, pt2, color, thickness, roundness, line_width=0, line_height=0)
-        elif bbox_style == BboxStyle.ROUNDED_CORNERS:
-            self.draw_bbox(img, pt1, pt2, color, thickness, roundness, line_width=line_width, line_height=line_height)
-
 
 class VisImage(GenericObject):
     def __init__(self, image: np.ndarray, frame_shape: Tuple[int, ...]):
@@ -255,7 +118,7 @@ class VisBoundingBox(GenericObject):
     """
 
     def __init__(self,
-                 bbox: Union[np.ndarray, Tuple[int, ...]],
+                 bbox: BoundingBox,
                  label: str,
                  color: Tuple[int, int, int],
                  thickness: int,
@@ -267,9 +130,6 @@ def __init__(self,
         self.thickness = thickness
         self.bbox_style = bbox_style
 
-    def draw(self, frame: np.ndarray) -> None:
-        self.draw_stylized_bbox(frame, self.bbox[0:2], self.bbox[2:4], self.color, self.thickness, self.bbox_style)
-
     def prepare(self) -> 'GenericObject':
         return self
 
@@ -326,7 +186,7 @@ def __init__(self,
         self.labels = []
         self.colors = []
 
-        try:  # Check if the detections are of type _TrackingDetection
+        try:  # Check if the detections are of type TrackingDetection
             self.detections = [t.srcImgDetection for t in self.detections]
         except AttributeError:
             pass
@@ -373,8 +233,7 @@ def prepare(self) -> 'VisDetections':
             # Get normalized bounding box
             normalized_bbox = self.normalizer.get_relative_bbox(BoundingBox(detection))
             if len(self.frame_shape) < 2:
-                logging.debug('Visualizer: skipping detection because frame shape is invalid: {}'
-                              .format(self.frame_shape))
+                logging.debug(f'Visualizer: skipping detection because frame shape is invalid: {self.frame_shape}')
                 return self
 
             # TODO can normalize accept frame shape?
@@ -421,24 +280,6 @@ def get_detections(self) -> List[Tuple[np.ndarray, str, Tuple[int, int, int]]]:
         """
         return list(zip(self.bboxes, self.labels, self.colors))
 
-    def draw(self, frame: np.ndarray) -> None:
-        if self.frame_shape is None:
-            self.frame_shape = frame.shape
-
-        for bbox, _, color in self.get_detections():
-            tl, br = bbox.denormalize(frame.shape)
-            # Draw bounding box
-            self.draw_stylized_bbox(
-                img=frame,
-                pt1=tl,
-                pt2=br,
-                color=color,
-                thickness=self.config.detection.thickness
-            )
-
-        for child in self.children:
-            child.draw(frame)
-
 
 class VisText(GenericObject):
     """
@@ -503,145 +344,6 @@ def serialize(self):
             'background_transparency': self.background_transparency
         }
 
-    def prepare(self) -> 'VisText':
-        # TODO: in the future, we can stop support for passing pixel-space bbox to the 
-        # visualizer.
-        if isinstance(self.bbox, (Sequence, np.ndarray)):
-            # Convert to BoundingBox. Divide by self.frame_shape and load into the BoundingBox
-            self.bbox = list(self.bbox)
-            self.bbox[0] /= self.frame_shape[1]
-            self.bbox[1] /= self.frame_shape[0]
-            self.bbox[2] /= self.frame_shape[1]
-            self.bbox[3] /= self.frame_shape[0]
-            self.bbox = BoundingBox(self.bbox)
-        self.coords = self.coords or self.get_relative_position(bbox=self.bbox,
-                                                                position=self.position,
-                                                                padding=self.padding)
-        return self
-
-    def draw(self, frame: np.ndarray) -> None:
-        if self.frame_shape is None:
-            self.frame_shape = frame.shape
-
-        text_config = self.config.text
-
-        # Extract shape of the bbox if exists
-        if self.bbox is not None:
-            tl, br = self.bbox.denormalize(frame.shape)
-            shape = br[0] - tl[0], br[1] - tl[1]
-        else:
-            shape = frame.shape[:2]
-
-        font_scale = self.size or text_config.font_scale
-        if self.size is None and text_config.auto_scale:
-            font_scale = self.get_text_scale(shape, self.bbox)
-
-        # Calculate font thickness
-        font_thickness = max(1, int(font_scale * 2)) \
-            if text_config.auto_scale else self.thickness or text_config.font_thickness
-
-        dx, dy = cv2.getTextSize(self.text, text_config.font_face, font_scale, font_thickness)[0]
-        dy += 10
-
-        for line in self.text.splitlines():
-            y = self.coords[1]
-
-            background_color = self.background_color or text_config.background_color
-            background_transparency = self.background_transparency or text_config.background_transparency
-            if background_color is not None:
-                img_with_background = cv2.rectangle(img=frame.copy(),
-                                                    pt1=(self.coords[0], y - dy),
-                                                    pt2=(self.coords[0] + dx, y + 10),
-                                                    color=background_color,
-                                                    thickness=-1)
-                # take transparency into account
-                cv2.addWeighted(src1=img_with_background,
-                                alpha=background_transparency,
-                                src2=frame,
-                                beta=1 - background_transparency,
-                                gamma=0,
-                                dst=frame)
-
-            if self.outline:
-                # Background
-                cv2.putText(img=frame,
-                            text=line,
-                            org=self.coords,
-                            fontFace=text_config.font_face,
-                            fontScale=font_scale,
-                            color=text_config.outline_color,
-                            thickness=font_thickness + 1,
-                            lineType=text_config.line_type)
-
-            # Front text
-            cv2.putText(img=frame,
-                        text=line,
-                        org=self.coords,
-                        fontFace=text_config.font_face,
-                        fontScale=font_scale,
-                        color=self.color or text_config.font_color,
-                        thickness=font_thickness,
-                        lineType=text_config.line_type)
-
-            self.coords = (self.coords[0], y + dy)
-
-    def get_relative_position(self,
-                              bbox: BoundingBox,
-                              position: TextPosition,
-                              padding: int
-                              ) -> Tuple[int, int]:
-        """
-        Get relative position of the text w.r.t. the bounding box.
-        If bbox is None,the position is relative to the frame.
-        """
-        if bbox is None:
-            bbox = BoundingBox()
-        text_config = self.config.text
-
-        tl, br = bbox.denormalize(self.frame_shape)
-        shape = br[0] - tl[0], br[1] - tl[1]
-
-        bbox_arr = bbox.to_tuple(self.frame_shape)
-
-        font_scale = self.size or text_config.font_scale
-        if self.size is None and text_config.auto_scale:
-            font_scale = self.get_text_scale(shape, bbox_arr)
-
-        text_width, text_height = 0, 0
-        for text in self.text.splitlines():
-            text_size = cv2.getTextSize(text=text,
-                                        fontFace=text_config.font_face,
-                                        fontScale=font_scale,
-                                        thickness=text_config.font_thickness)[0]
-            text_width = max(text_width, text_size[0])
-            text_height += text_size[1]
-
-        x, y = bbox_arr[0], bbox_arr[1]
-
-        y_pos = position.value % 10
-        if y_pos == 0:  # Y top
-            y = bbox_arr[1] + text_height + padding
-        elif y_pos == 1:  # Y mid
-            y = (bbox_arr[1] + bbox_arr[3]) // 2 + text_height // 2
-        elif y_pos == 2:  # Y bottom
-            y = bbox_arr[3] - text_height - padding
-
-        x_pos = position.value // 10
-        if x_pos == 0:  # X Left
-            x = bbox_arr[0] + padding
-        elif x_pos == 1:  # X mid
-            x = (bbox_arr[0] + bbox_arr[2]) // 2 - text_width // 2
-        elif x_pos == 2:  # X right
-            x = bbox_arr[2] - text_width - padding
-
-        return x, y
-
-    def get_text_scale(self,
-                       frame_shape: Union[np.ndarray, Tuple[int, ...]],
-                       bbox: Optional[BoundingBox] = None
-                       ) -> float:
-        return min(1.0, min(frame_shape) / (1000 if bbox is None else 200))
-
 
 class VisTrail(GenericObject):
     """
@@ -730,12 +432,6 @@ def get_rect_centroid(rect: dai.Rect, w, h) -> Tuple[int, int]:
         """
         return int(w * (rect.x + rect.width) // 2), int(h * (rect.y + rect.height) // 2)
 
-    def draw(self, frame: np.ndarray) -> None:
-        if self.frame_shape is None:
-            self.frame_shape = frame.shape
-
-        self.draw_children(frame)
-
 
 class VisLine(GenericObject):
     """
@@ -777,17 +473,6 @@ def serialize(self):
     def prepare(self) -> 'VisLine':
         return self
 
-    def draw(self, frame: np.ndarray) -> None:
-        if self.frame_shape is None:
-            self.frame_shape = frame.shape
-
-        tracking_config = self.config.tracking
-        cv2.line(frame,
-                 self.pt1, self.pt2,
-                 self.color or tracking_config.line_color,
-                 self.thickness or tracking_config.line_thickness,
-                 tracking_config.line_type)
-
 
 class VisCircle(GenericObject):
     def __init__(self,
@@ -824,18 +509,6 @@ def serialize(self):
 
         return parent
 
-    def draw(self, frame: np.ndarray) -> None:
-        if self.frame_shape is None:
-            self.frame_shape = frame.shape
-
-        circle_config = self.config.circle
-        cv2.circle(frame,
-                   self.coords,
-                   self.radius,
-                   self.color or circle_config.color,
-                   self.thickness or circle_config.thickness,
-                   circle_config.line_type)
-
 
 class VisMask(GenericObject):
     def __init__(self, mask: np.ndarray, alpha: float = None):
@@ -857,12 +530,6 @@ def serialize(self):
 
         return parent
 
-    def draw(self, frame: np.ndarray) -> None:
-        if self.frame_shape is None:
-            self.frame_shape = frame.shape
-
-        cv2.addWeighted(frame, 1 - self.alpha, self.mask, self.alpha, 0, frame)
-
 
 class VisPolygon(GenericObject):
     def __init__(self, polygon):
diff --git a/depthai_sdk/src/depthai_sdk/visualize/visualizer.py b/depthai_sdk/src/depthai_sdk/visualize/visualizer.py
index 67d214f69..18c5e1948 100644
--- a/depthai_sdk/src/depthai_sdk/visualize/visualizer.py
+++ b/depthai_sdk/src/depthai_sdk/visualize/visualizer.py
@@ -1,44 +1,39 @@
 import json
-import os
 from dataclasses import replace
-from enum import Enum
 from typing import List, Tuple, Optional, Union, Any, Dict
 
-try:
-    import cv2
-except ImportError:
-    cv2 = None
-
 import depthai as dai
 import numpy as np
 from depthai import ImgDetection
 
+from depthai_sdk.fps import FPSHandler
 from depthai_sdk.visualize.bbox import BoundingBox
 from depthai_sdk.visualize.configs import VisConfig, TextPosition, BboxStyle, StereoColor
 from depthai_sdk.visualize.encoder import JSONEncoder
 from depthai_sdk.visualize.objects import VisDetections, GenericObject, VisText, VisTrail, VisCircle, VisLine, VisMask, \
     VisBoundingBox
-from depthai_sdk.visualize.visualizer_helper import VisualizerHelper
 
 
-class Platform(Enum):
-    """
-    Platform on which the visualizer is running.
-    """
-    ROBOTHUB = 'robothub'
-    PC = 'pc'
+class VisualzierFps:
+    def __init__(self):
+        self.fps_list: Dict[str, FPSHandler] = {}
 
+    def get_fps(self, name: str) -> float:
+        if name not in self.fps_list:
+            self.fps_list[name] = FPSHandler()
 
-class Visualizer(VisualizerHelper):
-    # Constants
-    IS_INTERACTIVE = 'DISPLAY' in os.environ or os.name == 'nt'
+        self.fps_list[name].nextIter()
+        return self.fps_list[name].fps()
 
+
+class Visualizer:
+    # Constants
     def __init__(self, scale: float = None, fps: bool = False):
-        self.platform: Platform = self._detect_platform()
         self.objects: List[GenericObject] = []
         self._frame_shape: Optional[Tuple[int, ...]] = None
 
         self.config = VisConfig()
+        self.fps = VisualzierFps()
 
         if fps:
             self.output(show_fps=fps)
@@ -59,7 +54,7 @@ def add_object(self, obj: GenericObject) -> 'Visualizer':
         return self
 
     def add_bbox(self,
-                 bbox: Union[np.ndarray, Tuple[int, ...]],
+                 bbox: BoundingBox,
                  color: Tuple[int, int, int] = None,
                  thickness: int = None,
                  bbox_style: BboxStyle = None,
@@ -135,7 +130,7 @@ def add_text(self,
                  outline: bool = True,
                  background_color: Tuple[int, int, int] = None,
                  background_transparency: float = 0.5,
-                 bbox: Union[np.ndarray, Tuple[int, ...], BoundingBox] = None,
+                 bbox: Union[np.ndarray, Tuple, BoundingBox] = None,
                  position: TextPosition = TextPosition.TOP_LEFT,
                  padding: int = 10) -> 'Visualizer':
         """
@@ -157,6 +152,9 @@ def add_text(self,
         Returns:
             self
         """
+        if isinstance(bbox, Tuple) and type(bbox[0]) == float:
+            bbox = BoundingBox(bbox)
+
         text_overlay = VisText(text=text,
                                coords=coords,
                                size=size,
@@ -258,7 +256,7 @@ def add_mask(self, mask: np.ndarray, alpha: float):
         self.add_object(mask_overlay)
         return self
 
-    def draw(self, frame: np.ndarray) -> Optional[np.ndarray]:
+    def drawn(self, frame: np.ndarray) -> Optional[np.ndarray]:
         """
         Draw all objects on the frame if the platform is PC. Otherwise, serialize the objects
         and communicate with the RobotHub application.
@@ -269,20 +267,13 @@ def draw(self, frame: np.ndarray) -> Optional[np.ndarray]:
         Returns:
             np.ndarray if the platform is PC, None otherwise.
         """
-        # Draw overlays
-        for obj in self.objects:
-            obj.draw(frame)
-
-        # Resize frame if needed
-        img_scale = self.config.output.img_scale
-        if img_scale:
-            if isinstance(img_scale, Tuple):
-                frame = cv2.resize(frame, img_scale)
-            elif isinstance(img_scale, float) and img_scale != 1.0:
-                frame = cv2.resize(frame, dsize=None, fx=img_scale, fy=img_scale)
+        raise NotImplementedError('Visualizers that inherit from Visualizer must implement draw() method!')
 
-        self.reset()
-        return frame
+    def show(self, packet):
+        """
+        Show the packet on the screen.
+        """
+        pass
 
     def serialize(self, force_reset: bool = True) -> str:
         """
@@ -295,7 +286,6 @@ def serialize(self, force_reset: bool = True) -> str:
             Stringified JSON.
         """
         parent = {
-            'platform': self.platform.value,
             'frame_shape': self.frame_shape,
             'config': self.config,
             'objects': [obj.serialize() for obj in self.objects]
@@ -458,15 +448,6 @@ def segmentation(self,
 
         return self
 
-    def _detect_platform(self) -> Platform:
-        """
-        Detect the platform on which the visualizer is running.
-
-        Returns:
-            Platform
-        """
-        return Platform.PC if self.IS_INTERACTIVE else Platform.ROBOTHUB
-
     @property
     def frame_shape(self) -> Tuple[int, ...]:
         return self._frame_shape
@@ -481,3 +462,6 @@ def _process_kwargs(kwargs: Dict[str, Any]) -> Dict[str, Any]:
         kwargs.pop('self')
         kwargs = {k: v for k, v in kwargs.items() if v is not None}
         return kwargs
+
+    def close(self):
+        pass
diff --git a/depthai_sdk/src/depthai_sdk/visualize/visualizer_helper.py b/depthai_sdk/src/depthai_sdk/visualize/visualizer_helper.py
index 398e3c9f7..f7602fce7 100644
--- a/depthai_sdk/src/depthai_sdk/visualize/visualizer_helper.py
+++ b/depthai_sdk/src/depthai_sdk/visualize/visualizer_helper.py
@@ -1,8 +1,10 @@
-import math
 from enum import IntEnum
-from types import SimpleNamespace
 from typing import Tuple, Union, List, Any, Dict
 
+from depthai_sdk.classes.nn_results import TrackingDetection, TwoStageDetection
+from depthai_sdk.visualize.configs import BboxStyle
+from depthai_sdk.visualize.objects import VisBoundingBox
+
 try:
     import cv2
 except ImportError:
@@ -13,13 +15,12 @@
 
 from depthai_sdk.classes.packets import (
     DetectionPacket,
-    _TwoStageDetection,
     SpatialBbMappingPacket,
     TrackerPacket,
-    _TrackingDetection
 )
 from depthai_sdk.visualize.bbox import BoundingBox
 
+
 class FramePosition(IntEnum):
     """
     Where on frame do we want to print text.
@@ -238,6 +239,7 @@ def rectangle(src,
 
     return src
 
+
 def draw_mappings(packet: SpatialBbMappingPacket):
     dets = packet.spatials.detections
     for det in dets:
@@ -254,15 +256,7 @@ def draw_mappings(packet: SpatialBbMappingPacket):
         cv2.rectangle(packet.frame, (x_min, y_min), (x_max, y_max), VisualizerHelper.front_color, 1)
 
 
-def spatials_text(spatials: dai.Point3f):
-    return SimpleNamespace(
-        x="X: " + ("{:.1f}m".format(spatials.x / 1000) if not math.isnan(spatials.x) else "--"),
-        y="Y: " + ("{:.1f}m".format(spatials.y / 1000) if not math.isnan(spatials.y) else "--"),
-        z="Z: " + ("{:.1f}m".format(spatials.z / 1000) if not math.isnan(spatials.z) else "--"),
-    )
-
-
-def draw_detections(packet: Union[DetectionPacket, _TwoStageDetection, TrackerPacket],
+def draw_detections(packet: Union[DetectionPacket, TwoStageDetection, TrackerPacket],
                     norm: BoundingBox,
                     label_map: List[Tuple[str, Tuple]] = None):
     """
@@ -312,7 +306,7 @@ def draw_tracklet_id(packet: TrackerPacket):
 def draw_breadcrumb_trail(packets: List[TrackerPacket]):
     packet = packets[-1]  # Current packet
 
-    dict_: Dict[str, List[_TrackingDetection]] = {}
+    dict_: Dict[str, List[TrackingDetection]] = {}
     valid_ids = [t.id for t in packet.daiTracklets.tracklets]
     for idx in valid_ids:
         dict_[str(idx)] = []
@@ -369,27 +363,149 @@ def depth_to_disp_factor(device: dai.Device, stereo: dai.node.StereoDepth) -> fl
     @param device: OAK device
     """
     calib = device.readCalibration()
-    cam1=calib.getStereoLeftCameraId()
-    cam2=calib.getStereoRightCameraId()
+    cam1 = calib.getStereoLeftCameraId()
+    cam2 = calib.getStereoRightCameraId()
     baseline = calib.getBaselineDistance(cam1=cam1, cam2=cam2, useSpecTranslation=True) * 10  # cm to mm
-    rawConf = stereo.initialConfig.get()
+    raw_conf = stereo.initialConfig.get()
 
     align: dai.CameraBoardSocket = stereo.properties.depthAlignCamera
     if align == dai.CameraBoardSocket.AUTO:
         align = cam2
 
     intrinsics = calib.getCameraIntrinsics(align)
-    focalLength = intrinsics[0][0]
+    focal_length = intrinsics[0][0]
 
-    factor = baseline * focalLength
-    if rawConf.algorithmControl.enableExtended:
+    factor = baseline * focal_length
+    if raw_conf.algorithmControl.enableExtended:
         factor /= 2
 
     return factor
 
-def hex_to_bgr(hex: str) -> Tuple[int, ...]:
+
+def draw_bbox(img: np.ndarray,
+              pt1: Tuple[int, int],
+              pt2: Tuple[int, int],
+              color: Tuple[int, int, int],
+              thickness: int,
+              r: int,
+              line_width: int,
+              line_height: int,
+              alpha: float
+              ) -> None:
+    """
+    Draw a rounded rectangle on the image (in-place).
+
+    Args:
+        img: Image to draw on.
+        pt1: Top-left corner of the rectangle.
+        pt2: Bottom-right corner of the rectangle.
+        color: Rectangle color.
+        thickness: Rectangle line thickness.
+        r: Radius of the rounded corners.
+        line_width: Width of the rectangle line.
+        line_height: Height of the rectangle line.
+        alpha: Opacity of the rectangle.
+    """
+    x1, y1 = pt1
+    x2, y2 = pt2
+
+    if line_width == 0:
+        line_width = np.abs(x2 - x1)
+        line_width -= 2 * r if r > 0 else 0  # Adjust for rounded corners
+
+    if line_height == 0:
+        line_height = np.abs(y2 - y1)
+        line_height -= 2 * r if r > 0 else 0  # Adjust for rounded corners
+
+    # Top left
+    cv2.line(img, (x1 + r, y1), (x1 + r + line_width, y1), color, thickness)
+    cv2.line(img, (x1, y1 + r), (x1, y1 + r + line_height), color, thickness)
+    cv2.ellipse(img, (x1 + r, y1 + r), (r, r), 180, 0, 90, color, thickness)
+
+    # Top right
+    cv2.line(img, (x2 - r, y1), (x2 - r - line_width, y1), color, thickness)
+    cv2.line(img, (x2, y1 + r), (x2, y1 + r + line_height), color, thickness)
+    cv2.ellipse(img, (x2 - r, y1 + r), (r, r), 270, 0, 90, color, thickness)
+
+    # Bottom left
+    cv2.line(img, (x1 + r, y2), (x1 + r + line_width, y2), color, thickness)
+    cv2.line(img, (x1, y2 - r), (x1, y2 - r - line_height), color, thickness)
+    cv2.ellipse(img, (x1 + r, y2 - r), (r, r), 90, 0, 90, color, thickness)
+
+    # Bottom right
+    cv2.line(img, (x2 - r, y2), (x2 - r - line_width, y2), color, thickness)
+    cv2.line(img, (x2, y2 - r), (x2, y2 - r - line_height), color, thickness)
+    cv2.ellipse(img, (x2 - r, y2 - r), (r, r), 0, 0, 90, color, thickness)
+
+    # Fill the area
+    if 0 < alpha:
+        overlay = img.copy()
+
+        thickness = -1
+        bbox = (pt1[0], pt1[1], pt2[0], pt2[1])
+
+        top_left = (bbox[0], bbox[1])
+        bottom_right = (bbox[2], bbox[3])
+        top_right = (bottom_right[0], top_left[1])
+        bottom_left = (top_left[0], bottom_right[1])
+
+        top_left_main_rect = (int(top_left[0] + r), int(top_left[1]))
+        bottom_right_main_rect = (int(bottom_right[0] - r), int(bottom_right[1]))
+
+        top_left_rect_left = (top_left[0], top_left[1] + r)
+        bottom_right_rect_left = (bottom_left[0] + r, bottom_left[1] - r)
+
+        top_left_rect_right = (top_right[0] - r, top_right[1] + r)
+        bottom_right_rect_right = (bottom_right[0], bottom_right[1] - r)
+
+        all_rects = [
+            [top_left_main_rect, bottom_right_main_rect],
+            [top_left_rect_left, bottom_right_rect_left],
+            [top_left_rect_right, bottom_right_rect_right]
+        ]
+
+        [cv2.rectangle(overlay, pt1=rect[0], pt2=rect[1], color=color, thickness=thickness) for rect in all_rects]
+
+        cv2.ellipse(overlay, (top_left[0] + r, top_left[1] + r), (r, r), 180.0, 0, 90, color, thickness)
+        cv2.ellipse(overlay, (top_right[0] - r, top_right[1] + r), (r, r), 270.0, 0, 90, color, thickness)
+        cv2.ellipse(overlay, (bottom_right[0] - r, bottom_right[1] - r), (r, r), 0.0, 0, 90, color, thickness)
+        cv2.ellipse(overlay, (bottom_left[0] + r, bottom_left[1] - r), (r, r), 90.0, 0, 90, color, thickness)
+
+        cv2.addWeighted(overlay, alpha, img, 1 - alpha, 0, img)
+
+
+def draw_stylized_bbox(img: np.ndarray, obj: VisBoundingBox) -> None:
     """
-    "#ff1f00" (red) => (0, 31, 255)
+    Draw a stylized bounding box. The style is either passed as an argument or defined in the config.
+
+    Args:
+        img: Image to draw on.
+        obj: Bounding box to draw.
     """
-    value = hex.lstrip('#')
-    return tuple(int(value[i:i + 2], 16) for i in (4, 2, 0))
+    pt1, pt2 = obj.bbox.denormalize(img.shape)
+
+    box_w = pt2[0] - pt1[0]
+    box_h = pt2[1] - pt1[1]
+
+    line_width = int(box_w * obj.config.detection.line_width) // 2
+    line_height = int(box_h * obj.config.detection.line_height) // 2
+    roundness = int(obj.config.detection.box_roundness)
+    bbox_style = obj.bbox_style or obj.config.detection.bbox_style
+    alpha = obj.config.detection.fill_transparency
+
+    if bbox_style == BboxStyle.RECTANGLE:
+        draw_bbox(img, pt1, pt2,
+                  obj.color, obj.thickness, 0,
+                  line_width=0, line_height=0, alpha=alpha)
+    elif bbox_style == BboxStyle.CORNERS:
+        draw_bbox(img, pt1, pt2,
+                  obj.color, obj.thickness, 0,
+                  line_width=line_width, line_height=line_height, alpha=alpha)
+    elif bbox_style == BboxStyle.ROUNDED_RECTANGLE:
+        draw_bbox(img, pt1, pt2,
+                  obj.color, obj.thickness, roundness,
+                  line_width=0, line_height=0, alpha=alpha)
+    elif bbox_style == BboxStyle.ROUNDED_CORNERS:
+        draw_bbox(img, pt1, pt2,
+                  obj.color, obj.thickness, roundness,
+                  line_width=line_width, line_height=line_height, alpha=alpha)
diff --git a/depthai_sdk/src/depthai_sdk/visualize/visualizers/opencv_text.py b/depthai_sdk/src/depthai_sdk/visualize/visualizers/opencv_text.py
new file mode 100644
index 000000000..32e76da86
--- /dev/null
+++ b/depthai_sdk/src/depthai_sdk/visualize/visualizers/opencv_text.py
@@ -0,0 +1,132 @@
+from typing import Optional, Sequence, Tuple, Union
+
+import cv2
+import numpy as np
+
+from depthai_sdk.visualize.bbox import BoundingBox
+from depthai_sdk.visualize.configs import VisConfig
+from depthai_sdk.visualize.objects import VisText
+
+
+class OpenCvTextVis:
+    def __init__(self, text: VisText, config: VisConfig):
+        self.text = text
+        self.config = config
+
+    def draw_text(self, frame: np.ndarray):
+        obj = self.text
+
+        self.prepare(frame.shape)
+
+        text_config = self.config.text
+
+        # Extract shape of the bbox if exists
+        if obj.bbox is not None:
+            # shape = self.bbox[2] - self.bbox[0], self.bbox[3] - self.bbox[1]
+            tl, br = obj.bbox.denormalize(frame.shape)
+            shape = br[0] - tl[0], br[1] - tl[1]
+        else:
+            shape = frame.shape[:2]
+
+        font_scale = obj.size or text_config.font_scale
+        if obj.size is None and text_config.auto_scale:
+            font_scale = self.get_text_scale(shape, obj.bbox)
+
+        # Calculate font thickness
+        font_thickness = max(1, int(font_scale * 2)) \
+            if text_config.auto_scale else obj.thickness or text_config.font_thickness
+
+        dy = cv2.getTextSize(obj.text, text_config.font_face, font_scale, font_thickness)[0][1] + 10
+
+        for line in obj.text.splitlines():
+            y = obj.coords[1]
+
+            if obj.outline:
+                # Background
+                cv2.putText(img=frame,
+                            text=line,
+                            org=obj.coords,
+                            fontFace=text_config.font_face,
+                            fontScale=font_scale,
+                            color=text_config.background_color,
+                            thickness=font_thickness + 1,
+                            lineType=text_config.line_type)
+
+            # Front text
+            cv2.putText(img=frame,
+                        text=line,
+                        org=obj.coords,
+                        fontFace=text_config.font_face,
+                        fontScale=font_scale,
+                        color=obj.color or text_config.font_color,
+                        thickness=font_thickness,
+                        lineType=text_config.line_type)
+
+            obj.coords = (obj.coords[0], y + dy)
+
+    def get_relative_position(self, obj: VisText, frame_shape) -> Tuple[int, int]:
+        """
+        Get relative position of the text w.r.t. the bounding box.
+        If bbox is None,the position is relative to the frame.
+        """
+        if obj.bbox is None:
+            obj.bbox = BoundingBox()
+        text_config = self.config.text
+
+        tl, br = obj.bbox.denormalize(frame_shape)
+        shape = br[0] - tl[0], br[1] - tl[1]
+
+        bbox_arr = obj.bbox.to_tuple(frame_shape)
+
+        font_scale = obj.size or text_config.font_scale
+        if obj.size is None and text_config.auto_scale:
+            self.get_text_scale(shape, bbox_arr)
+
+        text_width, text_height = 0, 0
+        for text in obj.text.splitlines():
+            text_size = cv2.getTextSize(text=text,
+                                        fontFace=text_config.font_face,
+                                        fontScale=font_scale,
+                                        thickness=text_config.font_thickness)[0]
+            text_width = max(text_width, text_size[0])
+            text_height += text_size[1]
+
+        x, y = bbox_arr[0], bbox_arr[1]
+
+        y_pos = obj.position.value % 10
+        if y_pos == 0:  # Y top
+            y = bbox_arr[1] + text_height + obj.padding
+        elif y_pos == 1:  # Y mid
+            y = (bbox_arr[1] + bbox_arr[3]) // 2 + text_height // 2
+        elif y_pos == 2:  # Y bottom
+            y = bbox_arr[3] - text_height - obj.padding
+
+        x_pos = obj.position.value // 10
+        if x_pos == 0:  # X Left
+            x = bbox_arr[0] + obj.padding
+        elif x_pos == 1:  # X mid
+            x = (bbox_arr[0] + bbox_arr[2]) // 2 - text_width // 2
+        elif x_pos == 2:  # X right
+            x = bbox_arr[2] - text_width - obj.padding
+
+        return x, y
+
+    def prepare(self, frame_shape):
+        # TODO: in the future, we can stop support for passing pixel-space bbox to the 
+        # visualizer.
+        if isinstance(self.text.bbox, (Sequence, np.ndarray)) and type(self.text.bbox[0]) == int:
+            # Convert to BoundingBox. Divide by self.frame_shape and load into the BoundingBox
+            bbox = list(self.text.bbox)
+            bbox[0] /= frame_shape[1]
+            bbox[1] /= frame_shape[0]
+            bbox[2] /= frame_shape[1]
+            bbox[3] /= frame_shape[0]
+            self.text.bbox = BoundingBox(bbox)
+
+        self.text.coords = self.text.coords or self.get_relative_position(self.text, frame_shape)
+
+    def get_text_scale(self,
+                       frame_shape: Union[np.ndarray, Tuple[int, ...]],
+                       bbox: Optional[BoundingBox] = None
+                       ) -> float:
+        return min(1.0, min(frame_shape) / (1000 if bbox is None else 200))
diff --git a/depthai_sdk/src/depthai_sdk/visualize/visualizers/opencv_visualizer.py b/depthai_sdk/src/depthai_sdk/visualize/visualizers/opencv_visualizer.py
new file mode 100644
index 000000000..ca204a39b
--- /dev/null
+++ b/depthai_sdk/src/depthai_sdk/visualize/visualizers/opencv_visualizer.py
@@ -0,0 +1,102 @@
+import logging
+from typing import Optional
+
+import cv2
+import numpy as np
+
+from depthai_sdk.classes.packets import DisparityPacket, FramePacket
+from depthai_sdk.visualize.configs import TextPosition
+from depthai_sdk.visualize.objects import (
+    VisBoundingBox,
+    VisCircle,
+    VisDetections,
+    VisLine,
+    VisMask,
+    VisText,
+    VisTrail,
+)
+from depthai_sdk.visualize.visualizer import Visualizer
+from depthai_sdk.visualize.visualizer_helper import draw_stylized_bbox, draw_bbox
+from depthai_sdk.visualize.visualizers.opencv_text import OpenCvTextVis
+
+
+class OpenCvVisualizer(Visualizer):
+    def draw(self, frame: np.ndarray) -> Optional[np.ndarray]:
+        """
+        Draw all objects on the frame if the platform is PC. Otherwise, serialize the objects
+        and communicate with the RobotHub application.
+
+        Args:
+            frame: The frame to draw on.
+
+        Returns:
+            np.ndarray if the platform is PC, None otherwise.
+        """
+        # Draw overlays
+        for obj in self.objects:
+            if type(obj) == VisBoundingBox:
+                draw_stylized_bbox(frame, obj=obj)
+            elif type(obj) == VisDetections:
+                for bbox, _, color in obj.get_detections():
+                    tl, br = bbox.denormalize(frame.shape)
+                    draw_bbox(
+                        img=frame,
+                        pt1=tl,
+                        pt2=br,
+                        color=color,
+                        thickness=self.config.detection.thickness,
+                        r=self.config.detection.radius,
+                        line_width=self.config.detection.line_width,
+                        line_height=self.config.detection.line_height,
+                        alpha=self.config.detection.alpha,
+                    )
+            elif type(obj) == VisText:
+                OpenCvTextVis(obj, self.config).draw_text(frame)
+            elif type(obj) == VisTrail:
+                obj = obj.prepare()
+                # Children: VisLine
+                self.objects.extend(obj.children)
+            elif type(obj) == VisLine:
+                cv2.line(frame,
+                         obj.pt1, obj.pt2,
+                         obj.color or self.config.tracking.line_color,
+                         obj.thickness or self.config.tracking.line_thickness,
+                         self.config.tracking.line_type)
+            elif type(obj) == VisCircle:
+                circle_config = self.config.circle
+                cv2.circle(frame,
+                           obj.coords,
+                           obj.radius,
+                           obj.color or circle_config.color,
+                           obj.thickness or circle_config.thickness,
+                           circle_config.line_type)
+            elif type(obj) == VisMask:
+                cv2.addWeighted(frame, 1 - obj.alpha, obj.mask, obj.alpha, 0, frame)
+
+        self.reset()
+        return frame
+
+    def show(self, packet) -> None:
+        if self.config.output.show_fps:
+            fps = self.fps.get_fps(packet.name)
+            self.add_text(text=f'FPS: {fps:.1f}', position=TextPosition.TOP_LEFT)
+
+        if isinstance(packet, DisparityPacket):
+            frame = packet.get_colorized_frame(self)
+        elif isinstance(packet, FramePacket):
+            frame = packet.decode()
+        else:
+            logging.warning(f'Unknown packet type: {type(packet)}')
+            return
+
+        if frame is not None:
+            drawn_frame = self.draw(frame)
+            if self.config.output.img_scale:
+                drawn_frame = cv2.resize(drawn_frame,
+                                         None,
+                                         fx=self.config.output.img_scale,
+                                         fy=self.config.output.img_scale)
+            cv2.imshow(packet.name, drawn_frame)
+
+    def close(self):
+        cv2.destroyAllWindows()
diff --git a/depthai_sdk/src/depthai_sdk/visualize/visualizers/viewer_visualizer.py b/depthai_sdk/src/depthai_sdk/visualize/visualizers/viewer_visualizer.py
new file mode 100644
index 000000000..90f788005
--- /dev/null
+++ b/depthai_sdk/src/depthai_sdk/visualize/visualizers/viewer_visualizer.py
@@ -0,0 +1,111 @@
+import logging
+import subprocess
+import sys
+
+import depthai_viewer as viewer
+import numpy as np
+from depthai_viewer.components.rect2d import RectFormat
+
+from depthai_sdk.classes.packets import FramePacket, IMUPacket, PointcloudPacket
+from depthai_sdk.visualize.objects import (
+    VisBoundingBox,
+    VisCircle,
+    VisDetections,
+    VisLine,
+    VisMask,
+    VisText,
+    VisTrail,
+)
+from depthai_sdk.visualize.visualizer import Visualizer
+
+
+class DepthaiViewerVisualizer(Visualizer):
+    """
+    Visualizer for Depthai Viewer (https://github.com/luxonis/depthai-viewer)
+    """
+
+    def __init__(self, scale, fps):
+        super().__init__(scale, fps)
+
+        try:
+            # timeout is optional, but it might be good to prevent the script from hanging if the module is large.
+            process = subprocess.Popen([sys.executable, "-m", "depthai_viewer"], stdout=subprocess.PIPE,
+                                       stderr=subprocess.PIPE)
+            stdout, stderr = process.communicate(timeout=3)
+
+            if process.returncode != 0:
+                err_msg = stderr.decode("utf-8")
+                if 'Failed to bind TCP address' in err_msg:
+                    # Already running
+                    pass
+                elif 'No module named depthai_viewer' in err_msg:
+                    raise Exception(f"DepthAI Viewer is not installed. "
+                                    f"Please run '{sys.executable} -m pip install depthai_viewer' to install it.")
+                else:
+                    logging.exception(f"Error occurred while trying to run depthai_viewer: {err_msg}")
+            else:
+                print("depthai_viewer ran successfully.")
+        except subprocess.TimeoutExpired:
+            # Installed and running depthai_viewer successfully
+            pass
+        except subprocess.CalledProcessError as e:
+            print(f"An error occurred while trying to run 'depthai_viewer': {str(e)}")
+
+        viewer.init("Depthai Viewer")
+        viewer.connect()
+
+    def show(self, packet) -> None:
+        if isinstance(packet, FramePacket):
+            bgr_frame = packet.decode()
+            rgb_frame = bgr_frame[..., ::-1]
+            frame = np.dstack((rgb_frame, np.full(bgr_frame.shape[:2], 255, dtype=np.uint8)))
+            viewer.log_image(packet.name, frame)
+
+        if type(packet) == IMUPacket:
+            viewer.log_imu(*packet.get_imu_vals())
+        elif type(packet) == PointcloudPacket:
+            if packet.colorize_frame is not None:
+                bgr_frame = packet.colorize_frame
+                rgb_frame = bgr_frame[..., ::-1]
+                frame = np.dstack((rgb_frame, np.full(bgr_frame.shape[:2], 255, dtype=np.uint8)))
+                viewer.log_image(f'color', frame)
+                viewer.log_points(packet.name, packet.points.reshape(-1, 3) / 1000, colors=rgb_frame.reshape(-1, 3))
+            else:
+                viewer.log_points(packet.name, packet.points.reshape(-1, 3) / 1000)
+
+        vis_bbs = []
+        for i, obj in enumerate(self.objects):
+            if type(obj) == VisBoundingBox:
+                vis_bbs.append(obj)
+            elif type(obj) == VisDetections:
+                pass
+            elif type(obj) == VisText:
+                pass
+            elif type(obj) == VisTrail:
+                pass
+            elif type(obj) == VisLine:
+                pass
+            elif type(obj) == VisCircle:
+                pass
+            elif type(obj) == VisMask:
+                pass
+
+        if 0 < len(vis_bbs):
+            rects = [vis_bb.bbox.clip().denormalize(frame.shape) for vis_bb in vis_bbs]
+            # Convert from (pt1,pt2) to [x1,y1,x2,y2]
+            rects = [np.array([*rect[0], *rect[1]]) for rect in rects]
+            # BGR to RGB
+            colors = [np.array(vis_bb.color)[..., ::-1] for vis_bb in vis_bbs]
+            labels = [vis_bb.label for vis_bb in vis_bbs]
+            print(rects)
+            viewer.log_rects(
+                f"{packet.name}/Detections",
+                rects=rects,
+                rect_format=RectFormat.XYXY,
+                colors=colors,
+                labels=labels
+            )
+        self.reset()
+
+    def close(self):
+        pass
diff --git a/resources/nn/yolo-v3-tf/yolo-v3-tf.json b/resources/nn/yolo-v3-tf/yolo-v3-tf.json
index fc3950010..7d7eb8996 100644
--- a/resources/nn/yolo-v3-tf/yolo-v3-tf.json
+++ b/resources/nn/yolo-v3-tf/yolo-v3-tf.json
@@ -5,11 +5,11 @@
         "NN_family" : "YOLO",
         "input_size": "416x416",
         "NN_specific_metadata" :
-        { 
+        {
             "classes" : 80,
             "coordinates" : 4,
             "anchors" : [10.0, 13.0, 16.0, 30.0, 33.0, 23.0, 30.0, 61.0, 62.0, 45.0, 59.0, 119.0,116.0, 90.0, 156.0,198.0,373.0, 326.0],
-            "anchor_masks" : 
+            "anchor_masks" :
             {
                 "side52" : [0,1,2],
                 "side26" : [3,4,5],
@@ -102,7 +102,7 @@
             "scissors",
             "teddy bear",
             "hair drier",
-            "toothbrush"        
+            "toothbrush"
         ]
     }
 }