Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 11 additions & 8 deletions custom-frontend/open-vocabulary-object-detection/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,17 @@ Running this example requires a **Luxonis device** connected to your computer. R
Here is a list of all available parameters:

```
-fps FPS_LIMIT, --fps_limit FPS_LIMIT
FPS limit. (default: None)
-ip IP, --ip IP IP address to serve the frontend on. (default: None)
-p PORT, --port PORT Port to serve the frontend on. (default: None)
-m MODEL, --model MODEL
Name of the model to use: yolo-world or yoloe (default: yoloe)
--precision PRECISION
Model precision for YOLOE models: int8 (faster) or fp16 (more accurate) (default: fp16)
-fps FPS_LIMIT, --fps_limit FPS_LIMIT
FPS limit for the model runtime. (default: None)
-media MEDIA_PATH, --media_path MEDIA_PATH
Path to the media file you aim to run the model on. If not set, the model will run on the camera input. (default: None)
-ip IP, --ip IP IP address to serve the frontend on. (default: None)
-p PORT, --port PORT Port to serve the frontend on. (default: None)
-m {yolo-world,yoloe}, --model {yolo-world,yoloe}
Name of the model to use: yolo-world or yoloe (default: yoloe)
--precision {int8,fp16}
Model precision for YOLOE models: int8 (faster) or fp16 (more accurate). (default: fp16)
--semantic_seg Display output as semantic segmentation otherwise use instance segmentation (only applicable for YOLOE). (default: False)
```

### Model Options
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ def update_labels(label_names: list[str], offset: int = 0):
visualizer.addTopic("Video", video_enc.out, "images")
elif args.model == "yoloe":
apply_colormap_node = pipeline.create(ApplyColormap).build(nn_with_parser.out)
apply_colormap_node.setInstanceToSemanticMask(args.semantic_seg)
overlay_frames_node = pipeline.create(ImgFrameOverlay).build(
video_src_out,
apply_colormap_node.out,
Expand All @@ -200,6 +201,16 @@ def update_labels(label_names: list[str], offset: int = 0):

visualizer.addTopic("Detections", annotation_node.out)

def get_current_params_service(req) -> dict[str, any]:
"""Returns current parameters used"""
out_data = {
"confidence_threshold": CONFIDENCE_THRESHOLD,
"class_names": CLASS_NAMES,
"image_prompt_labels": IMAGE_PROMPT_LABELS,
}
print("Current params:", out_data)
return out_data

def class_update_service(new_classes: list[str]):
"""Changes classes to detect based on the user input"""
if len(new_classes) == 0:
Expand All @@ -210,8 +221,8 @@ def class_update_service(new_classes: list[str]):
f"Number of new classes ({len(new_classes)}) exceeds maximum number of classes ({MAX_NUM_CLASSES}), skipping."
)
return
global CLASS_NAMES, LAST_TEXT_CLASSES
CLASS_NAMES = new_classes
global LAST_TEXT_CLASSES
LAST_TEXT_CLASSES = new_classes.copy()
text_features = extract_text_embeddings(
class_names=CLASS_NAMES,
Expand Down Expand Up @@ -256,7 +267,8 @@ def class_update_service(new_classes: list[str]):

def conf_threshold_update_service(new_conf_threshold: float):
"""Changes confidence threshold based on the user input"""
CONFIDENCE_THRESHOLD = max(0, min(1, new_conf_threshold))
global CONFIDENCE_THRESHOLD
CONFIDENCE_THRESHOLD = max(0.01, min(0.99, new_conf_threshold))
nn_with_parser.getParser(0).setConfidenceThreshold(CONFIDENCE_THRESHOLD)
print(f"Confidence threshold set to: {CONFIDENCE_THRESHOLD}:")

Expand Down Expand Up @@ -418,6 +430,12 @@ def image_upload_service(image_data):
vec = image_features[0, :, 0].copy()
label = image_data.get("label") or image_data["filename"].split(".")[0]

global \
IMAGE_PROMPT_VECTORS, \
IMAGE_PROMPT_LABELS, \
MAX_IMAGE_PROMPTS, \
MAX_NUM_CLASSES

IMAGE_PROMPT_VECTORS.append(vec)
IMAGE_PROMPT_LABELS.append(label)
if len(IMAGE_PROMPT_VECTORS) > MAX_IMAGE_PROMPTS:
Expand Down Expand Up @@ -586,6 +604,12 @@ def bbox_prompt_service(payload):
print(f"Unsupported model for bbox prompt: {args.model}")
return {"ok": False, "reason": "unsupported_model"}

global \
IMAGE_PROMPT_VECTORS, \
IMAGE_PROMPT_LABELS, \
MAX_IMAGE_PROMPTS, \
MAX_NUM_CLASSES

if args.model == "yolo-world":
vec = image_features[0, :, 0].copy()
label = payload.get("label", "object")
Expand Down Expand Up @@ -673,6 +697,7 @@ def bbox_prompt_service(payload):
)
return {"ok": True, "bbox": {"x0": x0, "y0": y0, "x1": x1, "y1": y1}}

visualizer.registerService("Get Current Params Service", get_current_params_service)
visualizer.registerService("Class Update Service", class_update_service)
visualizer.registerService(
"Threshold Update Service", conf_threshold_update_service
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,11 @@ def initialize_argparser():
type=str,
choices=["int8", "fp16"],
)
parser.add_argument(
"--semantic_seg",
help="Display output as semantic segmentation otherwise use instance segmentation (only applicable for YOLOE).",
action="store_true",
)

args = parser.parse_args()

Expand Down
Loading
Loading