luxonis · FSAFTik · Sep 28, 2025 · Sep 30, 2025 · Sep 30, 2025 · Oct 16, 2025
diff --git a/apps/data-collection/.gitignore b/apps/data-collection/.gitignore
@@ -0,0 +1,3 @@
+clip_textual_hf.onnx
+clip_visual_with_projector.onnx
+tokenizer.json
diff --git a/apps/data-collection/.oakappignore b/apps/data-collection/.oakappignore
@@ -0,0 +1,38 @@
+# Python virtual environments
+venv/
+.venv/
+env
+
+# Node.js
+# ignore node_modules, it will be reinstalled in the container
+node_modules/
+
+# Multimedia files
+media/
+
+# Local models
+*.onnx
+
+# Documentation
+README.md
+
+# VCS
+.gitignore
+.git/
+.github/
+.gitlab/
+
+# The following files are ignored by default
+# uncomment a line if you explicitly need it
+
+# !*.oakapp
+
+# Python
+# !**/.mypy_cache/
+# !**/.ruff_cache/
+
+# IDE files
+# !**/.idea
+# !**/.vscode
+# !**/.zed
+
diff --git a/apps/data-collection/README.md b/apps/data-collection/README.md
@@ -0,0 +1,68 @@
+# Data Collection
+
+This application combines on-device open-vocabulary detection with an interactive frontend to **auto-collect “snaps” (images + metadata) under configurable conditions**.\
+It runs **YOLOE** on the DepthAI backend, and exposes controls in the UI for:
+
+- Selecting labels (by **text** or **image prompt**)
+- Adjusting **confidence threshold**
+- Enabling **snap conditions** (timed, no detections, low confidence, lost-in-middle)
+
+> **Note:** RVC4 standalone mode only.
+
+## Features
+
+- **Class control**
+  - Update classes by text or upload an image to create a visual prompt
+- **Confidence filter**
+  - Drop detections below a chosen threshold
+- **Snapping (auto-capture)**
+  - **Timed** (periodic)
+  - **No detections** (when a frame has zero detections)
+  - **Low confidence** (if any detection falls below threshold)
+  - **Lost-in-middle** (object disappears inside central area; edge buffer configurable)
+  - Cooldowns **reset** when snapping is (re)started
+
+______________________________________________________________________
+
+## Usage
+
+A **Luxonis device** (RVC4) is required
+
+### Arguments
+
+```text
+-fps FPS_LIMIT, --fps_limit FPS_LIMIT
+                    FPS limit. (default: None)
+-api, --api_key API_KEY         HubAI API key
+-ip IP, --ip IP     IP address to serve the frontend on. (default: None)
+-media, --media_path MEDIA_PATH Path to media file; if not set, runs on live camera. (default: None)
+-p PORT, --port PORT
+                    Port to serve the frontend on. (default: None)
+```
+
+## Prerequisites (Frontend)
+
+Build the FE once before running:
+
+```bash
+cd frontend/
+npm i
+npm run build
+cd ..
+```
+
+______________________________________________________________________
+
+## Standalone Mode (RVC4)
+
+Install `oakctl` (see [docs](https://docs.luxonis.com/software-v3/oak-apps/oakctl)), then:
+
+```bash
+oakctl connect <DEVICE_IP>
+oakctl app run .
+```
+
+### Remote access
+
+1. You can upload oakapp to Luxonis Hub via oakctl
+2. And then you can just remotely open App UI via App detail -->
diff --git a/apps/data-collection/backend-run.sh b/apps/data-collection/backend-run.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+echo "Starting Backend"
+exec python3.12 /app/backend/src/main.py
diff --git a/apps/data-collection/backend/src/__init__.py b/apps/data-collection/backend/src/__init__.py
diff --git a/apps/data-collection/backend/src/config/__init__.py b/apps/data-collection/backend/src/config/__init__.py
diff --git a/apps/data-collection/backend/src/config/arguments.py b/apps/data-collection/backend/src/config/arguments.py
@@ -0,0 +1,52 @@
+from argparse import Namespace, ArgumentParser, ArgumentDefaultsHelpFormatter
+
+
+def initialize_argparser() -> Namespace:
+    """Initialize the argument parser for the script."""
+    parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
+
+    parser.add_argument(
+        "-fps",
+        "--fps_limit",
+        help="FPS limit for the model runtime.",
+        required=False,
+        default=None,
+        type=int,
+    )
+
+    parser.add_argument(
+        "-api",
+        "--api_key",
+        help="HubAI API key to access private model. Can also use 'DEPTHAI_HUB_API_KEY' environment variable instead.",
+        required=False,
+        default="",
+        type=str,
+    )
+
+    parser.add_argument(
+        "-media",
+        "--media_path",
+        help="Path to the media file you aim to run the model on. If not set, the model will run on the camera input.",
+        required=False,
+        default=None,
+        type=str,
+    )
+
+    parser.add_argument(
+        "-ip",
+        "--ip",
+        help="IP address to serve the frontend on.",
+        required=False,
+        type=str,
+    )
+    parser.add_argument(
+        "-p",
+        "--port",
+        help="Port to serve the frontend on.",
+        required=False,
+        type=int,
+    )
+
+    args = parser.parse_args()
+
+    return parser, args
diff --git a/apps/data-collection/backend/src/config/cli_env_loader.py b/apps/data-collection/backend/src/config/cli_env_loader.py
@@ -0,0 +1,67 @@
+import argparse
+
+
+class CLIEnvLoader:
+    """
+    Handles environment (.env) loading and command-line argument parsing.
+    """
+
+    @staticmethod
+    def parse_arguments() -> argparse.Namespace:
+        """
+        Private helper to initialize and parse command-line arguments.
+        """
+        parser = argparse.ArgumentParser(
+            formatter_class=argparse.ArgumentDefaultsHelpFormatter
+        )
+
+        parser.add_argument(
+            "-fps",
+            "--fps_limit",
+            help="FPS limit for the model runtime.",
+            required=False,
+            default=None,
+            type=int,
+        )
+
+        parser.add_argument(
+            "-api",
+            "--api_key",
+            help=(
+                "HubAI API key to access private model. "
+                "Alternatively, set 'DEPTHAI_HUB_API_KEY' environment variable."
+            ),
+            required=False,
+            default="",
+            type=str,
+        )
+
+        parser.add_argument(
+            "-media",
+            "--media_path",
+            help=(
+                "Path to the media file to run the model on. "
+                "If not set, the model runs on the live camera input."
+            ),
+            required=False,
+            default=None,
+            type=str,
+        )
+
+        parser.add_argument(
+            "-ip",
+            "--ip",
+            help="IP address to serve the frontend on.",
+            required=False,
+            type=str,
+        )
+
+        parser.add_argument(
+            "-p",
+            "--port",
+            help="Port to serve the frontend on.",
+            required=False,
+            type=int,
+        )
+
+        return parser.parse_args()
diff --git a/apps/data-collection/backend/src/config/config_data_classes.py b/apps/data-collection/backend/src/config/config_data_classes.py
@@ -0,0 +1,35 @@
+from pathlib import Path
+from dataclasses import dataclass
+import depthai as dai
+from box import Box
+
+
+@dataclass
+class ModelInfo:
+    """Stores paths and dimensions of the detection model."""
+
+    yaml_path: Path
+    width: int
+    height: int
+    description: dai.NNModelDescription
+    archive: dai.NNArchive
+    precision: str
+
+
+@dataclass
+class VideoConfig:
+    """Stores video configuration (resolution, FPS)."""
+
+    resolution: list[int]
+    fps: int
+    media_path: str
+    width: int
+    height: int
+
+
+@dataclass
+class NeuralNetworkConfig:
+    """Stores neural network configuration (confidence thresholds, etc.)."""
+
+    nn_yaml: Box
+    model: ModelInfo
diff --git a/apps/data-collection/backend/src/config/model_loader.py b/apps/data-collection/backend/src/config/model_loader.py
@@ -0,0 +1,35 @@
+from argparse import Namespace
+
+import depthai as dai
+from pathlib import Path
+from config.config_data_classes import ModelInfo
+
+
+class ModelLoader:
+    """Resolves DepthAI model archive and metadata."""
+
+    def __init__(self, platform: str, args):
+        self.platform: str = platform
+        self.args: Namespace = args
+
+    def load_model_info(self) -> ModelInfo:
+        models_dir = Path(__file__).parent.parent / "depthai_models"
+        yaml_file = f"yoloe_v8_l_fp16.{self.platform}.yaml"
+        yaml_path = models_dir / yaml_file
+
+        if not yaml_path.exists():
+            raise SystemExit(f"Model YAML not found for yoloe: {yaml_path}")
+
+        desc = dai.NNModelDescription.fromYamlFile(str(yaml_path))
+        desc.platform = self.platform
+        archive = dai.NNArchive(dai.getModelFromZoo(desc))
+        width, height = archive.getInputSize()
+
+        return ModelInfo(
+            yaml_path=yaml_path,
+            width=width,
+            height=height,
+            description=desc,
+            archive=archive,
+            precision="",
+        )
diff --git a/apps/data-collection/backend/src/config/system_configuration.py b/apps/data-collection/backend/src/config/system_configuration.py
@@ -0,0 +1,61 @@
+from __future__ import annotations
+
+from argparse import Namespace
+from pathlib import Path
+from typing import Optional
+from box import Box
+
+from config.cli_env_loader import CLIEnvLoader
+from config.yaml_config_manager import YamlConfigManager
+from config.model_loader import ModelLoader
+from config.config_data_classes import ModelInfo, VideoConfig, NeuralNetworkConfig
+
+
+class SystemConfiguration:
+    """
+    Class that manages configuration initialization.
+    """
+
+    def __init__(self, platform: str):
+        self._platform: str = platform
+        self._args: Namespace = None
+        self._yaml: Optional[YamlConfigManager] = None
+        self._model_info: Optional[ModelInfo] = None
+
+    def build(self):
+        """Initialize all configuration subsystems."""
+        self._args = CLIEnvLoader.parse_arguments()
+
+        base = Path(__file__).parent / "yaml_configs"
+
+        self._yaml = YamlConfigManager(base)
+        self._yaml.load_all()
+
+        model_loader = ModelLoader(self._platform, self._args)
+        self._model_info = model_loader.load_model_info()
+        self._model_info.precision = self._yaml.prompts.precision
+
+    def get_video_config(self) -> VideoConfig:
+        if self._args.fps_limit is None:
+            self._args.fps_limit = self._yaml.video.default_fps
+            print(f"\nFPS limit set to {self._args.fps_limit} for {self._platform}\n")
+
+        return VideoConfig(
+            resolution=self._yaml.video.video_resolution,
+            fps=self._args.fps_limit,
+            media_path=self._args.media_path,
+            width=self._model_info.width,
+            height=self._model_info.height,
+        )
+
+    def get_neural_network_config(self) -> NeuralNetworkConfig:
+        return NeuralNetworkConfig(
+            nn_yaml=self._yaml.nn,
+            model=self._model_info,
+        )
+
+    def get_snaps_config(self) -> Box:
+        return self._yaml.conditions
+
+    def get_prompts_config(self) -> Box:
+        return self._yaml.prompts