luxonis · klemen1999 · Oct 23, 2025 · Oct 23, 2025 · Oct 23, 2025 · Oct 23, 2025
diff --git a/custom-frontend/open-vocabulary-object-detection/README.md b/custom-frontend/open-vocabulary-object-detection/README.md
@@ -16,14 +16,17 @@ Running this example requires a **Luxonis device** connected to your computer. R
 Here is a list of all available parameters:
 
 ```
--fps FPS_LIMIT, --fps_limit FPS_LIMIT
-                    FPS limit. (default: None)
- -ip IP, --ip IP       IP address to serve the frontend on. (default: None)
- -p PORT, --port PORT  Port to serve the frontend on. (default: None)
- -m MODEL, --model MODEL
-                    Name of the model to use: yolo-world or yoloe (default: yoloe)
- --precision PRECISION
-                    Model precision for YOLOE models: int8 (faster) or fp16 (more accurate) (default: fp16)
+  -fps FPS_LIMIT, --fps_limit FPS_LIMIT
+                        FPS limit for the model runtime. (default: None)
+  -media MEDIA_PATH, --media_path MEDIA_PATH
+                        Path to the media file you aim to run the model on. If not set, the model will run on the camera input. (default: None)
+  -ip IP, --ip IP       IP address to serve the frontend on. (default: None)
+  -p PORT, --port PORT  Port to serve the frontend on. (default: None)
+  -m {yolo-world,yoloe}, --model {yolo-world,yoloe}
+                        Name of the model to use: yolo-world or yoloe (default: yoloe)
+  --precision {int8,fp16}
+                        Model precision for YOLOE models: int8 (faster) or fp16 (more accurate). (default: fp16)
+  --semantic_seg        Display output as semantic segmentation otherwise use instance segmentation (only applicable for YOLOE). (default: False)
 ```
 
 ### Model Options

diff --git a/custom-frontend/open-vocabulary-object-detection/backend/src/main.py b/custom-frontend/open-vocabulary-object-detection/backend/src/main.py
@@ -178,6 +178,7 @@ def update_labels(label_names: list[str], offset: int = 0):
         visualizer.addTopic("Video", video_enc.out, "images")
     elif args.model == "yoloe":
         apply_colormap_node = pipeline.create(ApplyColormap).build(nn_with_parser.out)
+        apply_colormap_node.setInstanceToSemanticMask(args.semantic_seg)
         overlay_frames_node = pipeline.create(ImgFrameOverlay).build(
             video_src_out,
             apply_colormap_node.out,
@@ -200,6 +201,16 @@ def update_labels(label_names: list[str], offset: int = 0):
 
     visualizer.addTopic("Detections", annotation_node.out)
 
+    def get_current_params_service(req) -> dict[str, any]:
+        """Returns current parameters used"""
+        out_data = {
+            "confidence_threshold": CONFIDENCE_THRESHOLD,
+            "class_names": CLASS_NAMES,
+            "image_prompt_labels": IMAGE_PROMPT_LABELS,
+        }
+        print("Current params:", out_data)
+        return out_data
+
     def class_update_service(new_classes: list[str]):
         """Changes classes to detect based on the user input"""
         if len(new_classes) == 0:
@@ -210,8 +221,8 @@ def class_update_service(new_classes: list[str]):
                 f"Number of new classes ({len(new_classes)}) exceeds maximum number of classes ({MAX_NUM_CLASSES}), skipping."
             )
             return
+        global CLASS_NAMES, LAST_TEXT_CLASSES
         CLASS_NAMES = new_classes
-        global LAST_TEXT_CLASSES
         LAST_TEXT_CLASSES = new_classes.copy()
         text_features = extract_text_embeddings(
             class_names=CLASS_NAMES,
@@ -256,7 +267,8 @@ def class_update_service(new_classes: list[str]):
 
     def conf_threshold_update_service(new_conf_threshold: float):
         """Changes confidence threshold based on the user input"""
-        CONFIDENCE_THRESHOLD = max(0, min(1, new_conf_threshold))
+        global CONFIDENCE_THRESHOLD
+        CONFIDENCE_THRESHOLD = max(0.01, min(0.99, new_conf_threshold))
         nn_with_parser.getParser(0).setConfidenceThreshold(CONFIDENCE_THRESHOLD)
         print(f"Confidence threshold set to: {CONFIDENCE_THRESHOLD}:")
 
@@ -418,6 +430,12 @@ def image_upload_service(image_data):
             vec = image_features[0, :, 0].copy()
             label = image_data.get("label") or image_data["filename"].split(".")[0]
 
+            global \
+                IMAGE_PROMPT_VECTORS, \
+                IMAGE_PROMPT_LABELS, \
+                MAX_IMAGE_PROMPTS, \
+                MAX_NUM_CLASSES
+
             IMAGE_PROMPT_VECTORS.append(vec)
             IMAGE_PROMPT_LABELS.append(label)
             if len(IMAGE_PROMPT_VECTORS) > MAX_IMAGE_PROMPTS:
@@ -586,6 +604,12 @@ def bbox_prompt_service(payload):
             print(f"Unsupported model for bbox prompt: {args.model}")
             return {"ok": False, "reason": "unsupported_model"}
 
+        global \
+            IMAGE_PROMPT_VECTORS, \
+            IMAGE_PROMPT_LABELS, \
+            MAX_IMAGE_PROMPTS, \
+            MAX_NUM_CLASSES
+
         if args.model == "yolo-world":
             vec = image_features[0, :, 0].copy()
             label = payload.get("label", "object")
@@ -673,6 +697,7 @@ def bbox_prompt_service(payload):
             )
         return {"ok": True, "bbox": {"x0": x0, "y0": y0, "x1": x1, "y1": y1}}
 
+    visualizer.registerService("Get Current Params Service", get_current_params_service)
     visualizer.registerService("Class Update Service", class_update_service)
     visualizer.registerService(
         "Threshold Update Service", conf_threshold_update_service

diff --git a/custom-frontend/open-vocabulary-object-detection/backend/src/utils/arguments.py b/custom-frontend/open-vocabulary-object-detection/backend/src/utils/arguments.py
@@ -56,6 +56,11 @@ def initialize_argparser():
         type=str,
         choices=["int8", "fp16"],
     )
+    parser.add_argument(
+        "--semantic_seg",
+        help="Display output as semantic segmentation otherwise use instance segmentation (only applicable for YOLOE).",
+        action="store_true",
+    )
 
     args = parser.parse_args()