From 115c3d16c9932b2a94231f695e8260c05419c614 Mon Sep 17 00:00:00 2001 From: aljazkonec1 Date: Tue, 4 Nov 2025 14:24:22 +0100 Subject: [PATCH 1/7] Move example --- examples/cpp/DetectionNetwork/CMakeLists.txt | 2 +- .../cpp/DetectionNetwork/{RVC4 => }/detection_and_keypoints.cpp | 2 +- .../DetectionNetwork/{RVC4 => }/detection_and_keypoints.py | 2 +- tests/CMakeLists.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) rename examples/cpp/DetectionNetwork/{RVC4 => }/detection_and_keypoints.cpp (97%) rename examples/python/DetectionNetwork/{RVC4 => }/detection_and_keypoints.py (96%) diff --git a/examples/cpp/DetectionNetwork/CMakeLists.txt b/examples/cpp/DetectionNetwork/CMakeLists.txt index 9a3adc4e6..8193faeb9 100644 --- a/examples/cpp/DetectionNetwork/CMakeLists.txt +++ b/examples/cpp/DetectionNetwork/CMakeLists.txt @@ -26,5 +26,5 @@ dai_set_example_test_labels(detection_network_remap ondevice rvc2_all rvc4 ci) dai_add_example(detection_and_segmentation RVC4/detection_and_segmentation.cpp ON OFF) dai_set_example_test_labels(detection_and_segmentation rvc4) -dai_add_example(detection_and_keypoints RVC4/detection_and_keypoints.cpp ON OFF) +dai_add_example(detection_and_keypoints detection_and_keypoints.cpp ON OFF) dai_set_example_test_labels(detection_and_keypoints rvc4) \ No newline at end of file diff --git a/examples/cpp/DetectionNetwork/RVC4/detection_and_keypoints.cpp b/examples/cpp/DetectionNetwork/detection_and_keypoints.cpp similarity index 97% rename from examples/cpp/DetectionNetwork/RVC4/detection_and_keypoints.cpp rename to examples/cpp/DetectionNetwork/detection_and_keypoints.cpp index bc8dca07c..667151bb0 100644 --- a/examples/cpp/DetectionNetwork/RVC4/detection_and_keypoints.cpp +++ b/examples/cpp/DetectionNetwork/detection_and_keypoints.cpp @@ -22,7 +22,7 @@ int main() { auto detectionNetwork = pipeline.create(); dai::NNModelDescription modelDescription; - modelDescription.model = "luxonis/yolov8-large-pose-estimation:coco-640x352:1868e39"; + modelDescription.model = "luxonis/yolov8-nano-pose-estimation:coco-512x288"; detectionNetwork->build(cameraNode, modelDescription); auto labelMap = detectionNetwork->getClasses(); diff --git a/examples/python/DetectionNetwork/RVC4/detection_and_keypoints.py b/examples/python/DetectionNetwork/detection_and_keypoints.py similarity index 96% rename from examples/python/DetectionNetwork/RVC4/detection_and_keypoints.py rename to examples/python/DetectionNetwork/detection_and_keypoints.py index c62987701..431679544 100644 --- a/examples/python/DetectionNetwork/RVC4/detection_and_keypoints.py +++ b/examples/python/DetectionNetwork/detection_and_keypoints.py @@ -9,7 +9,7 @@ # Create pipeline with dai.Pipeline() as pipeline: cameraNode = pipeline.create(dai.node.Camera).build() - detectionNetwork = pipeline.create(dai.node.DetectionNetwork).build(cameraNode, dai.NNModelDescription("luxonis/yolov8-large-pose-estimation:coco-640x352:1868e39")) + detectionNetwork = pipeline.create(dai.node.DetectionNetwork).build(cameraNode, dai.NNModelDescription("luxonis/yolov8-nano-pose-estimation:coco-512x288")) labelMap = detectionNetwork.getClasses() qRgb = detectionNetwork.passthrough.createOutputQueue() diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e88884733..6ab38e604 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -389,7 +389,7 @@ dai_set_test_labels(nndata_test onhost ci) #ImgDetections tests dai_add_test(imgdetections_test src/onhost_tests/pipeline/datatype/imgdetections_test.cpp) -dai_set_test_labels(imgdetections_test onhost ci) +dai_set_test_labels(imgdetections_test ondevice rvc2 rvc4 onhost ci) # Model description tests dai_add_test(model_slug_test src/onhost_tests/model_slug_test.cpp) From bb3204ebd1abeaa5a6748e77a1d4cf9ed075c04a Mon Sep 17 00:00:00 2001 From: aljazkonec1 Date: Tue, 4 Nov 2025 16:59:00 +0100 Subject: [PATCH 2/7] Add host parsing option --- CMakeLists.txt | 1 + .../RVC4/detection_and_segmentation.py | 3 +- .../depthai/pipeline/node/DetectionParser.hpp | 29 +- src/pipeline/node/DetectionParser.cpp | 184 ++++ .../DetectionParser/DetectionParserUtils.cpp | 897 ++++++++++++++++++ .../DetectionParser/DetectionParserUtils.hpp | 85 ++ src/pipeline/utilities/NNDataViewer.hpp | 163 ++++ 7 files changed, 1360 insertions(+), 2 deletions(-) create mode 100644 src/pipeline/utilities/DetectionParser/DetectionParserUtils.cpp create mode 100644 src/pipeline/utilities/DetectionParser/DetectionParserUtils.hpp create mode 100644 src/pipeline/utilities/NNDataViewer.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index ae50f4e25..54150150f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -311,6 +311,7 @@ set(TARGET_CORE_SOURCES src/pipeline/node/ImageAlign.cpp src/pipeline/node/ToF.cpp src/pipeline/node/DetectionParser.cpp + src/pipeline/utilities/DetectionParser/DetectionParserUtils.cpp src/pipeline/node/test/MyProducer.cpp src/pipeline/node/test/MyConsumer.cpp src/pipeline/node/UVC.cpp diff --git a/examples/python/DetectionNetwork/RVC4/detection_and_segmentation.py b/examples/python/DetectionNetwork/RVC4/detection_and_segmentation.py index fcbbbfd2f..650f90f2f 100644 --- a/examples/python/DetectionNetwork/RVC4/detection_and_segmentation.py +++ b/examples/python/DetectionNetwork/RVC4/detection_and_segmentation.py @@ -9,7 +9,8 @@ # Create pipeline with dai.Pipeline() as pipeline: cameraNode = pipeline.create(dai.node.Camera).build() - detectionNetwork = pipeline.create(dai.node.DetectionNetwork).build(cameraNode, dai.NNModelDescription("luxonis/yolov8-instance-segmentation-large:coco-640x480")) + detectionNetwork = pipeline.create(dai.node.DetectionNetwork).build(cameraNode, dai.NNModelDescription("luxonis/yolov8-instance-segmentation-nano:coco-512x288")) + # detectionNetwork.detectionParser.runOnHost(True) labelMap = detectionNetwork.getClasses() qRgb = detectionNetwork.passthrough.createOutputQueue() diff --git a/include/depthai/pipeline/node/DetectionParser.hpp b/include/depthai/pipeline/node/DetectionParser.hpp index 78bb8ce8e..4b50a75b3 100644 --- a/include/depthai/pipeline/node/DetectionParser.hpp +++ b/include/depthai/pipeline/node/DetectionParser.hpp @@ -15,6 +15,8 @@ #include #include "depthai/common/YoloDecodingFamily.hpp" +#include "depthai/pipeline/datatype/ImgDetections.hpp" +#include "depthai/pipeline/datatype/NNData.hpp" namespace dai { namespace node { @@ -23,7 +25,7 @@ namespace node { * @brief DetectionParser node. Parses detection results from different neural networks and is being used internally by MobileNetDetectionNetwork and * YoloDetectionNetwork. */ -class DetectionParser : public DeviceNodeCRTP { +class DetectionParser : public DeviceNodeCRTP, public HostRunnable { public: constexpr static const char* NAME = "DetectionParser"; using DeviceNodeCRTP::DeviceNodeCRTP; @@ -177,7 +179,23 @@ class DetectionParser : public DeviceNodeCRTP decodeMobilenet(std::shared_ptr nnData, float confidenceThr); + private: + bool runOnHostVar = false; void setNNArchiveBlob(const NNArchive& nnArchive); void setNNArchiveSuperblob(const NNArchive& nnArchive, int numShaves); void setNNArchiveOther(const NNArchive& nnArchive); @@ -185,6 +203,15 @@ class DetectionParser : public DeviceNodeCRTP& outputs); + // host runnable requirements + void buildStage1() override; + void decodeYolo(std::shared_ptr nnData, std::shared_ptr outDetections); + std::vector inTensorInfo; + uint32_t imgWidth; + uint32_t imgHeight; + uint32_t imgSizesSet = false; + // + std::optional mArchive; std::optional archiveConfig; diff --git a/src/pipeline/node/DetectionParser.cpp b/src/pipeline/node/DetectionParser.cpp index a03b64633..2c0e07b9a 100644 --- a/src/pipeline/node/DetectionParser.cpp +++ b/src/pipeline/node/DetectionParser.cpp @@ -13,6 +13,8 @@ #include "nn_archive/NNArchive.hpp" #include "nn_archive/v1/Head.hpp" #include "pipeline/ThreadedNodeImpl.hpp" +#include "pipeline/datatype/NNData.hpp" +#include "pipeline/utilities/DetectionParser/DetectionParserUtils.hpp" #include "spdlog/fmt/fmt.h" // internal headers @@ -349,5 +351,187 @@ std::vector DetectionParser::getStrides() const { return properties.parser.strides; } +void DetectionParser::setRunOnHost(bool runOnHost) { + if(runOnHost) { + pimpl->logger->warn("Detection parser set to run on host."); + } + runOnHostVar = runOnHost; +} + +/** + * Check if the node is set to run on host + */ +bool DetectionParser::runOnHost() const { + return runOnHostVar; +} + +void DetectionParser::run() { + auto& logger = pimpl->logger; + logger->info("Detection parser running on host."); + + using namespace std::chrono; + while(isRunning()) { + auto tAbsoluteBeginning = steady_clock::now(); + std::shared_ptr inputData; + inputData = input.get(); + if(!inputData) { + logger->error("Error while receiving NN frame."); + continue; + } + auto tAfterMessageBeginning = steady_clock::now(); + + if(!imgSizesSet) { + const bool containsTransformation = inputData->transformation.has_value(); + if(containsTransformation) { + std::tie(imgWidth, imgHeight) = inputData->transformation->getSize(); + } else { + logger->warn("No image size provided for detection parser. Skipping processing and sending empty detections."); + continue; + } + + imgSizesSet = true; + } + + auto outDetections = std::make_shared(); + + switch(properties.parser.nnFamily) { + case DetectionNetworkType::YOLO: { + decodeYolo(inputData, outDetections); + break; + } + case DetectionNetworkType::MOBILENET: { + auto dets = decodeMobilenet(inputData, properties.parser.confidenceThreshold); // TODO (aljaz) update to shared pointer + outDetections->detections = dets; + break; + } + default: { + logger->error("Unknown NN family. 'YOLO' and 'MOBILENET' are supported."); + break; + } + } + + auto tBeforeSend = steady_clock::now(); + + // Copy over seq and ts + outDetections->setSequenceNum(inputData->getSequenceNum()); + outDetections->setTimestamp(inputData->getTimestamp()); + outDetections->setTimestampDevice(inputData->getTimestampDevice()); + outDetections->transformation = inputData->transformation; + // Send detections + out.send(outDetections); + + auto tAbsoluteEnd = steady_clock::now(); + logger->debug("Detection parser total took {}ms, processing {}ms, getting_frames {}ms, sending_frames {}ms", + duration_cast(tAbsoluteEnd - tAbsoluteBeginning).count() / 1000, + duration_cast(tBeforeSend - tAfterMessageBeginning).count() / 1000, + duration_cast(tAfterMessageBeginning - tAbsoluteBeginning).count() / 1000, + duration_cast(tAbsoluteEnd - tBeforeSend).count() / 1000); + } +} + +void DetectionParser::buildStage1() { + auto& logger = pimpl->logger; + + // Grab dimensions from input tensor info + if(properties.networkInputs.size() > 0) { + if(properties.networkInputs.size() > 1) { + logger->warn("Detection parser supports only single input networks, assuming first input"); + } + for(const auto& kv : properties.networkInputs) { + const dai::TensorInfo& tensorInfo = kv.second; + inTensorInfo.push_back(tensorInfo); + } + } + if(inTensorInfo.size() > 0) { + int numDimensions = inTensorInfo[0].numDimensions; + if(numDimensions < 2) { + logger->error("Number of input dimensions is less than 2"); + } else { + imgSizesSet = true; + imgWidth = inTensorInfo[0].dims[numDimensions - 1]; + imgHeight = inTensorInfo[0].dims[numDimensions - 2]; + } + } else { + logger->info("Unable to read input tensor height and width from static inputs. The node will try to get input sizes at runtime."); + } +} + +std::vector DetectionParser::decodeMobilenet(std::shared_ptr nnData, float confidenceThr) { + auto& logger = pimpl->logger; + + if(!nnData) { + return {}; + } + int maxDetections = 100; + std::vector detections; + std::string tensorName; + for(const auto& tensor : nnData->getAllLayers()) { + if(tensor.offset == 0) { + tensorName = tensor.name; + } + } + + auto tensorData = nnData->getTensor(tensorName); + maxDetections = tensorData.size() / 7; + if(static_cast(tensorData.size()) < maxDetections * 7) { + logger->error("Error while parsing Mobilenet. Vector not long enough, expected size: {}, real size {}", maxDetections * 7, tensorData.size()); + return {}; + } + + struct raw_Detection { // need to update it to include more + float header; + float label; + float confidence; + float xmin; + float ymin; + float xmax; + float ymax; + }; + + float* rawPtr = tensorData.data(); + for(int i = 0; i < maxDetections; i++) { + raw_Detection temp; + // TODO This is likely unnecessary optimisation + memcpy(&temp, &rawPtr[i * 7], sizeof(raw_Detection)); + + // if header == -1, stop sooner + if(temp.header == -1.0f) break; + + float currentConfidence = temp.confidence; + if(currentConfidence >= confidenceThr) { + dai::ImgDetection d; + d.label = temp.label; + + d.confidence = currentConfidence; + + d.xmin = temp.xmin; + d.ymin = temp.ymin; + d.xmax = temp.xmax; + d.ymax = temp.ymax; + + detections.push_back(d); + } + } + return detections; +} + +void DetectionParser::decodeYolo(std::shared_ptr nnData, std::shared_ptr outDetections) { + auto& logger = pimpl->logger; + switch(properties.parser.decodingFamily) { + case YoloDecodingFamily::R1AF: // anchor free: yolo v6r1 + utilities::DetectionParserUtils::decodeR1AF(nnData, outDetections, properties, logger); + break; + case YoloDecodingFamily::v3AB: // anchor based yolo v3 v3-Tiny + utilities::DetectionParserUtils::decodeV3AB(nnData, outDetections, properties, logger); + break; + case YoloDecodingFamily::v5AB: // anchor based yolo v5, v7, P + utilities::DetectionParserUtils::decodeV5AB(nnData, outDetections, properties, logger); + break; + case YoloDecodingFamily::TLBR: // top left bottom right anchor free: yolo v6r2, v8 v10 v11 + utilities::DetectionParserUtils::decodeTLBR(nnData, outDetections, properties, logger); + break; + } +} + } // namespace node } // namespace dai diff --git a/src/pipeline/utilities/DetectionParser/DetectionParserUtils.cpp b/src/pipeline/utilities/DetectionParser/DetectionParserUtils.cpp new file mode 100644 index 000000000..c1809e847 --- /dev/null +++ b/src/pipeline/utilities/DetectionParser/DetectionParserUtils.cpp @@ -0,0 +1,897 @@ +#include "DetectionParserUtils.hpp" + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "depthai/common/KeypointsList.hpp" +#include "depthai/common/RotatedRect.hpp" +#include "depthai/common/TensorInfo.hpp" +#include "depthai/pipeline/datatype/ImgDetections.hpp" +#include "depthai/pipeline/datatype/NNData.hpp" +#include "depthai/properties/DetectionParserProperties.hpp" +#include "pipeline/utilities/NNDataViewer.hpp" + +namespace dai { +namespace utilities { +namespace DetectionParserUtils { + +// yolo v6 r1 - anchor free +void decodeR1AF(std::shared_ptr nnData, + std::shared_ptr outDetections, + DetectionParserProperties properties, + std::shared_ptr logger) { + auto layerNames = DetectionParserUtils::getSortedDetectionLayerNames(nnData, "yolo", properties.parser.outputNames); + + const std::vector strides = properties.parser.strides; + if(strides.size() != layerNames.size()) { + std::string errorMsg = fmt::format( + "Number of strides does not match number of output layers. Strides size: {}, output layers size: {}", strides.size(), layerNames.size()); + throw std::runtime_error(errorMsg); + } + const float confidenceThr = properties.parser.confidenceThreshold; + const float iouThr = properties.parser.iouThreshold; + const int numClasses = properties.parser.classes; + int inputWidth; + int inputHeight; + std::tie(inputWidth, inputHeight) = nnData->transformation->getSize(); + + if(inputWidth <= 0 || inputHeight <= 0) { + throw std::runtime_error("Invalid input dimensions retrieved from NNData transformation."); + } + std::vector detectionCandidates; + detectionCandidates.reserve(250); + + for(int strideIdx = 0; strideIdx < static_cast(layerNames.size()); ++strideIdx) { + std::string layerName = layerNames[strideIdx]; + auto tensorInfo = nnData->getTensorInfo(layerName); + if(!tensorInfo) { + std::string errorMsg = fmt::format("Tensor info for layer {} is null", layerName); + throw std::runtime_error(errorMsg); + } + + if(!isTensorOrderValid(*tensorInfo, properties, logger)) { + logger->error("Tensor order for layer {} is invalid, skipping this layer", layerName); + continue; + } + + int layerHeight = tensorInfo->getHeight(); + int layerWidth = tensorInfo->getWidth(); + NNDataViewer outputData = NNDataViewer(*tensorInfo, nnData->data, logger); + if(!outputData.build()) { + std::string errorMsg = fmt::format("Failed to build NNDataViewer for layer {}", layerName); + throw std::runtime_error(errorMsg); + } + + for(int row = 0; row < layerHeight; ++row) { + for(int col = 0; col < layerWidth; ++col) { + const float score = outputData.get(4, row, col); + if(score < confidenceThr) { + continue; + } + + int bestC = 0; + float bestConf = 0.0f; + for(int c = 0; c < numClasses; ++c) { + float candidateProb = outputData.get(c + 5, row, col); + if(candidateProb > bestConf) { + bestConf = candidateProb; + bestC = c; + } + } + if(bestConf * score < confidenceThr) { + continue; + } + + float cx = outputData.get(0, row, col); + float cy = outputData.get(1, row, col); + float w = outputData.get(2, row, col); + float h = outputData.get(3, row, col); + + float xmin = cx - w * 0.5f; + float ymin = cy - h * 0.5f; + float xmax = cx + w * 0.5f; + float ymax = cy + h * 0.5f; + + xmin = std::max(0.0f, std::min(xmin, float(inputWidth))); + ymin = std::max(0.0f, std::min(ymin, float(inputHeight))); + xmax = std::max(0.0f, std::min(xmax, float(inputWidth))); + ymax = std::max(0.0f, std::min(ymax, float(inputHeight))); + + if(xmax <= xmin || ymax <= ymin) { + logger->info("Invalid box with xmax <= xmin or ymax <= ymin, skipping"); + continue; + } + DetectionCandidate candidate = DetectionCandidate{ + xmin, + ymin, + xmax, + ymax, + bestConf * score, + bestC, + strideIdx, + row, + col, + std::nullopt, + }; + + if(!properties.parser.classNames->empty()) { + candidate.labelName = (*properties.parser.classNames)[bestC]; + } + detectionCandidates.emplace_back(std::move(candidate)); + } + } + } + + std::vector keepCandidates = nonMaximumSuppression(detectionCandidates, iouThr); + if(keepCandidates.size() == 0) { + logger->trace("No detections after NMS, skipping overlay."); + return; + } + + createImgDetections(keepCandidates, outDetections, inputWidth, inputHeight); + + if(properties.parser.decodeSegmentation) { + logger->trace("Segmentation decoding."); + segmentationDecode(nnData, keepCandidates, outDetections, properties, logger); + } + + if(properties.parser.decodeKeypoints) { + logger->trace("Keypoints decoding."); + keypointDecode(nnData, keepCandidates, outDetections, properties, logger); + } +} + +/* +Decode anchor based yolo v3 and v3-Tiny +*/ +void decodeV3AB(std::shared_ptr nnData, + std::shared_ptr outDetections, + DetectionParserProperties properties, + std::shared_ptr logger) { + auto layerNames = DetectionParserUtils::getSortedDetectionLayerNames(nnData, "yolo", properties.parser.outputNames); + + const std::vector strides = properties.parser.strides; + if(strides.size() != layerNames.size()) { + std::string errorMsg = fmt::format( + "Number of strides does not match number of output layers. Strides size: {}, output layers size: {}", strides.size(), layerNames.size()); + throw std::runtime_error(errorMsg); + } + + const float confidenceThr = properties.parser.confidenceThreshold; + const float iouThr = properties.parser.iouThreshold; + const int numClasses = properties.parser.classes; + int inputWidth; + int inputHeight; + std::tie(inputWidth, inputHeight) = nnData->transformation->getSize(); + if(inputWidth <= 0 || inputHeight <= 0) { + throw std::runtime_error("Invalid input dimensions retrieved from NNData transformation."); + } + + if(properties.parser.anchorsV2.size() != layerNames.size()) { + logger->error("Number of anchor sets does not match number of output layers. Anchor sets size: {}, output layers size: {}", + properties.parser.anchorsV2.size(), + layerNames.size()); + return; + } + + std::vector detectionCandidates; + detectionCandidates.reserve(250); + + for(int strideIdx = 0; strideIdx < static_cast(layerNames.size()); ++strideIdx) { + std::string layerName = layerNames[strideIdx]; + int stride = strides[strideIdx]; + auto tensorInfo = nnData->getTensorInfo(layerName); + if(!tensorInfo) { + std::string errorMsg = fmt::format("Tensor info for layer {} is null", layerName); + throw std::runtime_error(errorMsg); + } + + if(!isTensorOrderValid(*tensorInfo, properties, logger)) { + logger->error("Tensor order for layer {} is invalid, skipping this layer", layerName); + continue; + } + + int layerHeight = tensorInfo->getHeight(); + int layerWidth = tensorInfo->getWidth(); + int layerChannels = tensorInfo->getChannels(); + + NNDataViewer outputData = NNDataViewer(*tensorInfo, nnData->data, logger); + if(!outputData.build()) { + std::string errorMsg = fmt::format("Failed to build NNDataViewer for layer {}", layerName); + throw std::runtime_error(errorMsg); + } + std::vector>& anchors = properties.parser.anchorsV2[strideIdx]; + int numAnchors = anchors.size(); + int block = 5 + numClasses; + int expectedC = numAnchors * block; + + if(layerChannels != expectedC) { + std::string errorMsg = fmt::format("Layer {} channels mismatch. Expected {}, got {}", layerName, expectedC, layerChannels); + throw std::runtime_error(errorMsg); + } + + auto sigmoid = [](float x) -> float { return 1.f / (1.f + std::exp(-x)); }; + + for(int row = 0; row < layerHeight; ++row) { + for(int col = 0; col < layerWidth; ++col) { + for(int a = 0; a < numAnchors; ++a) { + const int ch0 = a * block; + const float tx = sigmoid(outputData.get(ch0 + 0, row, col)); + const float ty = sigmoid(outputData.get(ch0 + 1, row, col)); + const float tw = outputData.get(ch0 + 2, row, col); + const float th = outputData.get(ch0 + 3, row, col); + const float obj = sigmoid(outputData.get(ch0 + 4, row, col)); + if(obj < confidenceThr) continue; + + int bestC = 0; + float clsProb = 0.0f; + for(int c = 0; c < numClasses; ++c) { + const float prob = outputData.get(ch0 + 5 + c, row, col); + if(prob > clsProb) { + clsProb = prob; + bestC = c; + } + } + const float conf = obj * 1.f / (1.f + std::exp(-clsProb)); + if(conf < confidenceThr) continue; + + // YOLOv3 decode + const float cx = (static_cast(col) + tx) * static_cast(stride); + const float cy = (static_cast(row) + ty) * static_cast(stride); + const float w_exp = std::exp(tw); + const float h_exp = std::exp(th); + const float w = w_exp * anchors[a][0]; + const float h = h_exp * anchors[a][1]; + + float xmin = cx - 0.5f * w; + float ymin = cy - 0.5f * h; + float xmax = cx + 0.5f * w; + float ymax = cy + 0.5f * h; + + xmin = std::max(0.0f, std::min(xmin, float(inputWidth))); + ymin = std::max(0.0f, std::min(ymin, float(inputHeight))); + xmax = std::max(0.0f, std::min(xmax, float(inputWidth))); + ymax = std::max(0.0f, std::min(ymax, float(inputHeight))); + + if(xmax <= xmin || ymax <= ymin) { + logger->info("Invalid box with xmax <= xmin or ymax <= ymin, skipping"); + continue; + } + + DetectionCandidate candidate = DetectionCandidate{ + xmin, + ymin, + xmax, + ymax, + conf, + bestC, + strideIdx, + row, + col, + std::nullopt, + }; + + if(!properties.parser.classNames->empty()) { + candidate.labelName = (*properties.parser.classNames)[bestC]; + } + detectionCandidates.emplace_back(std::move(candidate)); + } + } + } + } + + std::vector keepCandidates = nonMaximumSuppression(detectionCandidates, iouThr); + if(keepCandidates.size() == 0) { + logger->trace("No detections after NMS, skipping overlay."); + return; + } + + createImgDetections(keepCandidates, outDetections, inputWidth, inputHeight); + + if(properties.parser.decodeSegmentation) { + logger->trace("Segmentation decoding."); + segmentationDecode(nnData, keepCandidates, outDetections, properties, logger); + } + + if(properties.parser.decodeKeypoints) { + logger->trace("Keypoints decoding."); + keypointDecode(nnData, keepCandidates, outDetections, properties, logger); + } + + // +} + +/* +Decode anchor based networks, e.g., yolo v5, v7, P +*/ +void decodeV5AB(std::shared_ptr nnData, + std::shared_ptr outDetections, + DetectionParserProperties properties, + std::shared_ptr logger) { + auto layerNames = DetectionParserUtils::getSortedDetectionLayerNames(nnData, "yolo", properties.parser.outputNames); + + const std::vector strides = properties.parser.strides; + if(strides.size() != layerNames.size()) { + std::string errorMsg = fmt::format( + "Number of strides does not match number of output layers. Strides size: {}, output layers size: {}", strides.size(), layerNames.size()); + throw std::runtime_error(errorMsg); + } + + const float confidenceThr = properties.parser.confidenceThreshold; + const float iouThr = properties.parser.iouThreshold; + const int numClasses = properties.parser.classes; + int inputWidth; + int inputHeight; + std::tie(inputWidth, inputHeight) = nnData->transformation->getSize(); + + if(inputWidth <= 0 || inputHeight <= 0) { + throw std::runtime_error("Invalid input dimensions retrieved from NNData transformation."); + } + + if(properties.parser.anchorsV2.size() != layerNames.size()) { + logger->error("Number of anchor sets does not match number of output layers. Anchor sets size: {}, output layers size: {}", + properties.parser.anchorsV2.size(), + layerNames.size()); + return; + } + + std::vector detectionCandidates; + detectionCandidates.reserve(250); + + for(int strideIdx = 0; strideIdx < static_cast(layerNames.size()); ++strideIdx) { + std::string layerName = layerNames[strideIdx]; + int stride = strides[strideIdx]; + auto tensorInfo = nnData->getTensorInfo(layerName); + if(!tensorInfo) { + std::string errorMsg = fmt::format("Tensor info for layer {} is null", layerName); + throw std::runtime_error(errorMsg); + } + + if(!isTensorOrderValid(*tensorInfo, properties, logger)) { + logger->error("Tensor order for layer {} is invalid, skipping this layer", layerName); + continue; + } + + int layerHeight = tensorInfo->getHeight(); + int layerWidth = tensorInfo->getWidth(); + int layerChannels = tensorInfo->getChannels(); + + NNDataViewer outputData = NNDataViewer(*tensorInfo, nnData->data, logger); + if(!outputData.build()) { + std::string errorMsg = fmt::format("Failed to build NNDataViewer for layer {}", layerName); + throw std::runtime_error(errorMsg); + } + std::vector>& anchors = properties.parser.anchorsV2[strideIdx]; + int numAnchors = anchors.size(); + int block = 5 + numClasses; + int expectedC = numAnchors * block; + + if(layerChannels != expectedC) { + logger->error("Layer {} channels mismatch. Expected {}, got {}", layerName, expectedC, layerChannels); + return; + } + + for(int row = 0; row < layerHeight; ++row) { + for(int col = 0; col < layerWidth; ++col) { + for(int a = 0; a < numAnchors; ++a) { + const int ch0 = a * block; + + const float tx = outputData.get(ch0 + 0, row, col); + const float ty = outputData.get(ch0 + 1, row, col); + const float tw = outputData.get(ch0 + 2, row, col); + const float th = outputData.get(ch0 + 3, row, col); + const float obj = outputData.get(ch0 + 4, row, col); + if(obj < confidenceThr) continue; + + int bestC = 0; + float bestConf = 0.0f; + for(int c = 0; c < numClasses; ++c) { + const float prob = outputData.get(ch0 + 5 + c, row, col); + if(prob > bestConf) { + bestConf = prob; + bestC = c; + } + } + const float conf = obj * bestConf; + if(conf < confidenceThr) continue; + + // YOLOv5 decode + const float cx = ((tx * 2.0f - 0.5f) + static_cast(col)) * static_cast(stride); + const float cy = ((ty * 2.0f - 0.5f) + static_cast(row)) * static_cast(stride); + + const float w = tw * tw * 4.0f * anchors[a][0]; + const float h = th * th * 4.0f * anchors[a][1]; + + float xmin = cx - 0.5f * w; + float ymin = cy - 0.5f * h; + float xmax = cx + 0.5f * w; + float ymax = cy + 0.5f * h; + + xmin = std::max(0.0f, std::min(xmin, float(inputWidth))); + ymin = std::max(0.0f, std::min(ymin, float(inputHeight))); + xmax = std::max(0.0f, std::min(xmax, float(inputWidth))); + ymax = std::max(0.0f, std::min(ymax, float(inputHeight))); + + if(xmax <= xmin || ymax <= ymin) continue; + DetectionCandidate candidate = DetectionCandidate{ + xmin, + ymin, + xmax, + ymax, + conf, + bestC, + strideIdx, + row, + col, + std::nullopt, + }; + + if(!properties.parser.classNames->empty()) { + candidate.labelName = (*properties.parser.classNames)[bestC]; + } + detectionCandidates.emplace_back(std::move(candidate)); + } + } + } + } + + std::vector keepCandidates = nonMaximumSuppression(detectionCandidates, iouThr); + if(keepCandidates.size() == 0) { + logger->trace("No detections after NMS, skipping overlay."); + return; + } + + createImgDetections(keepCandidates, outDetections, inputWidth, inputHeight); + + if(properties.parser.decodeSegmentation) { + logger->trace("Segmentation decoding."); + segmentationDecode(nnData, keepCandidates, outDetections, properties, logger); + } + + if(properties.parser.decodeKeypoints) { + logger->trace("Keypoints decoding."); + keypointDecode(nnData, keepCandidates, outDetections, properties, logger); + } +} + +/* +Decode TLBR (top left bottom right) style networks, e.g., yolo v6r2, v8, v10, v11 +*/ +void decodeTLBR(std::shared_ptr nnData, + std::shared_ptr outDetections, + DetectionParserProperties properties, + std::shared_ptr logger) { + auto layerNames = DetectionParserUtils::getSortedDetectionLayerNames(nnData, "yolo", properties.parser.outputNames); + + const std::vector strides = properties.parser.strides; + if(strides.size() != layerNames.size()) { + std::string errorMsg = fmt::format( + "Number of strides does not match number of output layers. Strides size: {}, output layers size: {}", strides.size(), layerNames.size()); + throw std::runtime_error(errorMsg); + } + const float confidenceThr = properties.parser.confidenceThreshold; + const float iouThr = properties.parser.iouThreshold; + const int numClasses = properties.parser.classes; + int inputWidth; + int inputHeight; + std::tie(inputWidth, inputHeight) = nnData->transformation->getSize(); + + if(inputWidth <= 0 || inputHeight <= 0) { + throw std::runtime_error("Invalid input dimensions retrieved from NNData transformation."); + } + + std::vector detectionCandidates; + detectionCandidates.reserve(250); + + for(int strideIdx = 0; strideIdx < static_cast(layerNames.size()); ++strideIdx) { + std::string layerName = layerNames[strideIdx]; + int stride = strides[strideIdx]; + auto tensorInfo = nnData->getTensorInfo(layerName); + if(!tensorInfo) { + std::string errorMsg = fmt::format("Tensor info for layer {} is null", layerName); + throw std::runtime_error(errorMsg); + } + + if(!isTensorOrderValid(*tensorInfo, properties, logger)) { + logger->error("Tensor order for layer {} is invalid, skipping this layer", layerName); + continue; + } + + int layerHeight = tensorInfo->getHeight(); + int layerWidth = tensorInfo->getWidth(); + NNDataViewer outputData = NNDataViewer(*tensorInfo, nnData->data, logger); + if(!outputData.build()) { + std::string errorMsg = fmt::format("Failed to build NNDataViewer for layer {}", layerName); + throw std::runtime_error(errorMsg); + } + + for(int row = 0; row < layerHeight; ++row) { + for(int col = 0; col < layerWidth; ++col) { + const float score = outputData.get(4, row, col); + if(score < confidenceThr) { + continue; + } + + int bestC = 0; + float bestConf = 0.0f; + for(int c = 0; c < numClasses; ++c) { + float candidateProb = outputData.get(c + 5, row, col); + if(candidateProb > bestConf) { + bestConf = candidateProb; + bestC = c; + } + } + float xmin = (col - outputData.get(0, row, col) + 0.5f) * stride; + float ymin = (row - outputData.get(1, row, col) + 0.5f) * stride; + float xmax = (col + outputData.get(2, row, col) + 0.5f) * stride; + float ymax = (row + outputData.get(3, row, col) + 0.5f) * stride; + + if(bestConf < confidenceThr) { + continue; + } + + xmin = std::max(0.0f, std::min(xmin, float(inputWidth))); + ymin = std::max(0.0f, std::min(ymin, float(inputHeight))); + xmax = std::max(0.0f, std::min(xmax, float(inputWidth))); + ymax = std::max(0.0f, std::min(ymax, float(inputHeight))); + + if(xmax <= xmin || ymax <= ymin) { + logger->info("Invalid box with xmax <= xmin or ymax <= ymin, skipping"); + continue; + } + + DetectionCandidate candidate = DetectionCandidate{ + xmin, + ymin, + xmax, + ymax, + bestConf, + bestC, + strideIdx, + row, + col, + std::nullopt, + + }; + + if(!properties.parser.classNames->empty()) { + candidate.labelName = (*properties.parser.classNames)[bestC]; + } + detectionCandidates.emplace_back(std::move(candidate)); + } + } + } + + std::vector keepCandidates = nonMaximumSuppression(detectionCandidates, iouThr); + if(keepCandidates.size() == 0) { + logger->trace("No detections after NMS, skipping overlay."); + return; + } + + createImgDetections(keepCandidates, outDetections, inputWidth, inputHeight); + + if(properties.parser.decodeSegmentation) { + logger->trace("Segmentation decoding."); + segmentationDecode(nnData, keepCandidates, outDetections, properties, logger); + } + + if(properties.parser.decodeKeypoints) { + logger->trace("Keypoints decoding."); + keypointDecode(nnData, keepCandidates, outDetections, properties, logger); + } +} + +bool isTensorOrderValid(dai::TensorInfo& tensorInfo, DetectionParserProperties properties, std::shared_ptr logger) { + // Fix the channel order for Yolo - this is hacky and would be best to be fixed in the actual models and make it consistent + + int anchorMultiplier = properties.parser.anchorsV2.empty() ? 1 : static_cast(properties.parser.anchorsV2.size()); + int channelSize = anchorMultiplier * (properties.parser.classes + properties.parser.coordinates + 1); + + auto checkAndFixOrder = [&](int channelDimIndex, int alternativeDimIndex, dai::TensorInfo::StorageOrder alternativeOrder) -> bool { + // Check that the dims size is big enough + if(static_cast(tensorInfo.dims.size()) <= channelDimIndex || static_cast(tensorInfo.dims.size()) <= alternativeDimIndex) { + logger->error("Invalid tensor dims size. Skipping."); + return false; + } + + if(tensorInfo.dims[channelDimIndex] != uint32_t(channelSize)) { + // Check if the channel size would match the alternative storage order + if(tensorInfo.dims[alternativeDimIndex] == uint32_t(channelSize)) { + logger->trace("Invalid channel size for the tensor. Expected {}, got {}, switching", channelSize, tensorInfo.dims[channelDimIndex]); + tensorInfo.order = alternativeOrder; + } else { + logger->error("Invalid channel size for the tensor. Expected {}, got {}. Skipping.", channelSize, tensorInfo.dims[channelDimIndex]); + return false; + } + } + return true; + }; + + switch(tensorInfo.order) { + case dai::TensorInfo::StorageOrder::CHW: + if(!checkAndFixOrder(0, 2, dai::TensorInfo::StorageOrder::HWC)) return false; + break; + case dai::TensorInfo::StorageOrder::HWC: + if(!checkAndFixOrder(2, 0, dai::TensorInfo::StorageOrder::CHW)) return false; + break; + case dai::TensorInfo::StorageOrder::NCHW: + if(!checkAndFixOrder(1, 3, dai::TensorInfo::StorageOrder::NHWC)) return false; + break; + case dai::TensorInfo::StorageOrder::NHWC: + if(!checkAndFixOrder(3, 1, dai::TensorInfo::StorageOrder::NCHW)) return false; + break; + case dai::TensorInfo::StorageOrder::NHCW: + case dai::TensorInfo::StorageOrder::WHC: + case dai::TensorInfo::StorageOrder::WCH: + case dai::TensorInfo::StorageOrder::HCW: + case dai::TensorInfo::StorageOrder::CWH: + case dai::TensorInfo::StorageOrder::NC: + case dai::TensorInfo::StorageOrder::CN: + case dai::TensorInfo::StorageOrder::C: + case dai::TensorInfo::StorageOrder::H: + case dai::TensorInfo::StorageOrder::W: + default: + logger->error("Invalid storage order for the tensor. Skipping."); + return false; + } + + return true; +} + +std::vector getSortedDetectionLayerNames(std::shared_ptr nnData, std::string searchTerm, std::vector outputNames) { + if(outputNames.empty()) { + outputNames = nnData->getAllLayerNames(); + } + + std::vector layerNames; + for(const auto& name : outputNames) { + // if yolo in the name, push it to layerNames + if(name.find(searchTerm) != std::string::npos) { + layerNames.push_back(name); + } + } + + std::sort(layerNames.begin(), layerNames.end()); + return layerNames; +} + +float YoloIntersectionOverUnion(const DetectionCandidate& box1, const DetectionCandidate& box2) { + float width_of_overlap_area = fmin(box1.xmax, box2.xmax) - fmax(box1.xmin, box2.xmin); + float height_of_overlap_area = fmin(box1.ymax, box2.ymax) - fmax(box1.ymin, box2.ymin); + float area_of_overlap; + if(width_of_overlap_area < 0 || height_of_overlap_area < 0) + area_of_overlap = 0; + else + area_of_overlap = width_of_overlap_area * height_of_overlap_area; + float box_1_area = (box1.ymax - box1.ymin) * (box1.xmax - box1.xmin); + float box_2_area = (box2.ymax - box2.ymin) * (box2.xmax - box2.xmin); + float area_of_union = box_1_area + box_2_area - area_of_overlap; + return area_of_overlap / area_of_union; +} + +std::vector nonMaximumSuppression(std::vector& detectionCandidates, float iouThr) { + std::sort( + detectionCandidates.begin(), detectionCandidates.end(), [](const DetectionCandidate& a, const DetectionCandidate& b) { return a.score > b.score; }); + + std::vector keep(detectionCandidates.size(), 1); + std::vector keepIndices; + keepIndices.reserve(detectionCandidates.size()); + + for(size_t i = 0; i < detectionCandidates.size(); ++i) { + if(!keep[i]) continue; + keepIndices.push_back(i); + + for(size_t j = i + 1; j < detectionCandidates.size(); ++j) { + if(!keep[j]) continue; + if(YoloIntersectionOverUnion(detectionCandidates[i], detectionCandidates[j]) >= iouThr) { + keep[j] = 0; + } + } + } + + std::vector keepCandidates; + keepCandidates.reserve(keepIndices.size()); + for(size_t idx : keepIndices) keepCandidates.push_back(detectionCandidates[idx]); + + return keepCandidates; +} + +void createImgDetections(const std::vector& detectionCandidates, + std::shared_ptr outDetections, + unsigned int width, + unsigned int height) { + for(const auto& det : detectionCandidates) { + dai::ImgDetection detection; + dai::RotatedRect rotatedRect(dai::Rect(dai::Point2f(det.xmin, det.ymin), dai::Point2f(det.xmax, det.ymax)), 0.0f); + detection.setBoundingBox(rotatedRect.normalize(width, height)); + detection.confidence = det.score; + detection.label = det.label; + if(det.labelName) { + detection.labelName = *det.labelName; + } + outDetections->detections.push_back(std::move(detection)); + } +} + +void segmentationDecode(std::shared_ptr nnData, + std::vector& detectionCandidates, + std::shared_ptr outDetections, + DetectionParserProperties properties, + std::shared_ptr logger) { + auto maskFromCoeffs = [](NNDataViewer& protos, const float* coeffs, int width, int height) -> cv::Mat { + cv::Mat maskLow(height, width, CV_32F); + for(int y = 0; y < maskLow.rows; ++y) { + float* row = maskLow.ptr(y); + for(int x = 0; x < maskLow.cols; ++x) { + float sum = 0.f; + for(int c = 0; c < 32; ++c) sum += protos.get(c, y, x) * coeffs[c]; + row[x] = 1.f / (1.f + std::exp(-sum)); // sigmoid + } + } + return maskLow; + }; + + std::pair inputSize = nnData->transformation->getSize(); + int inputWidth = inputSize.first; + int inputHeight = inputSize.second; + + cv::Mat indexMask(inputHeight, inputWidth, CV_8U, cv::Scalar(255)); + + cv::Mat maskLow, maskUp; + + auto maskLayerNames = DetectionParserUtils::getSortedDetectionLayerNames(nnData, "masks", std::vector{}); + if(properties.parser.strides.size() != maskLayerNames.size()) { + logger->error( + "Number of strides does not match number of mask output layers. Strides size: {}, mask output layers size: {}. Skipping segmentation decoding.", + properties.parser.strides.size(), + maskLayerNames.size()); + return; + } + auto protoLayerNames = DetectionParserUtils::getSortedDetectionLayerNames(nnData, "proto", std::vector{}); + if(protoLayerNames.size() == 0) { + logger->error("Expecting proto output layer, found no layer with proto label. Skipping segmentation decoding."); + return; + } + + NNDataViewer protoValues = NNDataViewer(*nnData->getTensorInfo(protoLayerNames[0]), nnData->data, logger); + if(!protoValues.build()) { + logger->error("Failed to build NNDataViewer for proto layer {}. Skipping segmentation decoding.", protoLayerNames[0]); + return; + } + + std::map maskValues; + for(int strideIdx = 0; strideIdx < static_cast(maskLayerNames.size()); ++strideIdx) { + maskValues.try_emplace(strideIdx, *nnData->getTensorInfo(maskLayerNames[strideIdx]), nnData->data, logger); + if(!maskValues.at(strideIdx).build()) { + logger->error("Failed to build NNDataViewer for mask layer {}. Skipping segmentation decoding.", maskLayerNames[strideIdx]); + return; + } + } + + for(size_t i = 0; i < detectionCandidates.size(); ++i) { // loop over all detections + const auto& c = detectionCandidates[i]; + const int detIdx = static_cast(i); // index in outDetections list + + NNDataViewer mask = maskValues.at(c.headIndex); + std::array coeff; + for(int i = 0; i < 32; ++i) { + coeff[i] = mask.get(i, c.rowIndex, c.columnIndex); + } + + TensorInfo protoInfo = *nnData->getTensorInfo(protoLayerNames[0]); + int protoWidth = protoInfo.getWidth(); + int protoHeight = protoInfo.getHeight(); + maskLow = maskFromCoeffs(protoValues, coeff.data(), protoWidth, protoHeight); + + cv::resize(maskLow, maskUp, cv::Size(inputWidth, inputHeight), 0, 0, cv::INTER_LINEAR); + // ROI clamp + int x0 = std::clamp(static_cast(std::floor(c.xmin)), 0, inputWidth - 1); + int y0 = std::clamp(static_cast(std::floor(c.ymin)), 0, inputHeight - 1); + int x1 = std::clamp(static_cast(std::ceil(c.xmax)), 0, inputWidth); + int y1 = std::clamp(static_cast(std::ceil(c.ymax)), 0, inputHeight); + + if(x1 <= x0 || y1 <= y0) continue; + const cv::Rect roi(x0, y0, x1 - x0, y1 - y0); + + // Threshold & paint only unassigned pixels + cv::Mat roiProb = maskUp(roi); + cv::Mat roiBin; + cv::compare(roiProb, static_cast(0.5f), roiBin, cv::CMP_GT); + cv::Mat roiOut = indexMask(roi); + cv::Mat unassigned; + cv::compare(roiOut, 255, unassigned, cv::CMP_EQ); + cv::Mat paintMask; + cv::bitwise_and(roiBin, unassigned, paintMask); + + const uint8_t value = static_cast(std::min(detIdx, 254)); + roiOut.setTo(value, paintMask); + } + + outDetections->setSegmentationMask(indexMask); +} + +void keypointDecode(std::shared_ptr nnData, + std::vector& detectionCandidates, + std::shared_ptr outDetections, + DetectionParserProperties properties, + std::shared_ptr logger) { + int inputWidth; + int inputHeight; + std::tie(inputWidth, inputHeight) = nnData->transformation->getSize(); + + auto yoloLayerNames = DetectionParserUtils::getSortedDetectionLayerNames(nnData, "yolo", properties.parser.outputNames); + std::vector featureMapWidths; + for(int i = 0; i < static_cast(yoloLayerNames.size()); ++i) { + auto tensorInfo = nnData->getTensorInfo(yoloLayerNames[i]); + if(!tensorInfo) { + logger->error("Tensor info for layer {} is null. Skipping keypoints decoding.", yoloLayerNames[i]); + return; + } + featureMapWidths.push_back(tensorInfo->getWidth()); + } + + auto kptsLayerNames = DetectionParserUtils::getSortedDetectionLayerNames(nnData, "kpt_output", std::vector{}); + if(properties.parser.strides.size() != kptsLayerNames.size()) { + logger->error( + "Number of strides does not match number of keypoints output layers. Strides size: {}, keypoints output layers size: {}. Skipping keypoints " + "decoding.", + properties.parser.strides.size(), + kptsLayerNames.size()); + return; + } + + // TODO (aljaz) move to a function + std::map keypointValues; + for(int strideIdx = 0; strideIdx < static_cast(kptsLayerNames.size()); ++strideIdx) { + keypointValues.try_emplace(strideIdx, *nnData->getTensorInfo(kptsLayerNames[strideIdx]), nnData->data, logger); + if(!keypointValues.at(strideIdx).build()) { + logger->error("Failed to build NNDataViewer for keypoints layer {}. Skipping keypoints decoding.", kptsLayerNames[strideIdx]); + return; + } + } + + if(outDetections->detections.size() != detectionCandidates.size()) { + logger->error( + "Number of detections in ImgDetections does not match number of detection candidates. ImgDetections size: {}, detection candidates size: {}. " + "Skipping keypoints decoding.", + outDetections->detections.size(), + detectionCandidates.size()); + return; + } + + for(size_t i = 0; i < detectionCandidates.size(); ++i) { // loop over all detections + const auto& c = detectionCandidates[i]; + int flattenedIndex = c.rowIndex * featureMapWidths[c.headIndex] + c.columnIndex; + + std::vector keypoints; + keypoints.reserve(*properties.parser.nKeypoints); + NNDataViewer keypointMask = keypointValues.at(c.headIndex); + + for(int k = 0; k < properties.parser.nKeypoints; ++k) { + int base = 3 * k; + + // keypointValues tensor storage order HWC + // H == 0 + // W == 51 == 17 * 3 (x, y, conf for each keypoint) + // C == flattened spatial dimensions of row x col of the feature map + float x = std::clamp(keypointMask.get(flattenedIndex, 0, base + 0) / inputWidth, 0.0f, 1.0f); + float y = std::clamp(keypointMask.get(flattenedIndex, 0, base + 1) / inputHeight, 0.0f, 1.0f); + float conf = 1.f / (1.f + std::exp(-(keypointMask.get(flattenedIndex, 0, base + 2)))); + + keypoints.push_back(dai::Keypoint{dai::Point2f(x, y), conf}); + } + + outDetections->detections[i].keypoints = KeypointsList(keypoints); + } +} + +} // namespace DetectionParserUtils +} // namespace utilities +} // namespace dai \ No newline at end of file diff --git a/src/pipeline/utilities/DetectionParser/DetectionParserUtils.hpp b/src/pipeline/utilities/DetectionParser/DetectionParserUtils.hpp new file mode 100644 index 000000000..85b5a234f --- /dev/null +++ b/src/pipeline/utilities/DetectionParser/DetectionParserUtils.hpp @@ -0,0 +1,85 @@ +#pragma once + +#include + +#include + +#include "depthai/pipeline/datatype/ImgDetections.hpp" +#include "depthai/pipeline/datatype/NNData.hpp" +#include "depthai/properties/DetectionParserProperties.hpp" + +namespace dai { +namespace utilities { +namespace DetectionParserUtils { + +struct DetectionCandidate { + float xmin, ymin, xmax, ymax, score; + int label, headIndex, rowIndex, columnIndex; + std::optional labelName; +}; + +/* +Decode anchor free yolo v6r1 with sigmoid assisted center detection +*/ +void decodeR1AF(std::shared_ptr nnData, + std::shared_ptr outDetections, + DetectionParserProperties properties, + std::shared_ptr logger); + +/* +Decode anchor based yolo v3 and v3-Tiny +*/ +void decodeV3AB(std::shared_ptr nnData, + std::shared_ptr outDetections, + DetectionParserProperties properties, + std::shared_ptr logger); + +/* +Decode anchor based networks, e.g., yolo v5, v7, P +*/ +void decodeV5AB(std::shared_ptr nnData, + std::shared_ptr outDetections, + DetectionParserProperties properties, + std::shared_ptr logger); + +/* +Decode anchor free top-left-bottom-right (TLBR) style networks, e.g., yolo v6r2, v8, v10, v11 +*/ +void decodeTLBR(std::shared_ptr nnData, + std::shared_ptr outDetections, + DetectionParserProperties properties, + std::shared_ptr logger); + +std::vector getSortedDetectionLayerNames(std::shared_ptr nnData, std::string searchTerm, std::vector outputNames); + +float YoloIntersectionOverUnion(const DetectionCandidate& box1, const DetectionCandidate& box2); + +bool isTensorOrderValid(dai::TensorInfo& tensorInfo, DetectionParserProperties properties, std::shared_ptr logger); + +void createImgDetections(std::vector& detectionCandidates, + std::vector keepIndices, + std::shared_ptr outDetections, + std::shared_ptr logger); + +std::vector nonMaximumSuppression(std::vector& detectionCandidates, float iouThr); + +void createImgDetections(const std::vector& detectionCandidates, + std::shared_ptr outDetections, + unsigned int width, + unsigned int height); + +void segmentationDecode(std::shared_ptr nnData, + std::vector& detectionCandidates, + std::shared_ptr outDetections, + DetectionParserProperties properties, + std::shared_ptr logger); + +void keypointDecode(std::shared_ptr nnData, + std::vector& detectionCandidates, + std::shared_ptr outDetections, + DetectionParserProperties properties, + std::shared_ptr logger); + +} // namespace DetectionParserUtils +} // namespace utilities +} // namespace dai \ No newline at end of file diff --git a/src/pipeline/utilities/NNDataViewer.hpp b/src/pipeline/utilities/NNDataViewer.hpp new file mode 100644 index 000000000..94ab12cda --- /dev/null +++ b/src/pipeline/utilities/NNDataViewer.hpp @@ -0,0 +1,163 @@ +#pragma once +#include + +#include "depthai/common/TensorInfo.hpp" +#include "depthai/pipeline/datatype/NNData.hpp" +#include "fp16/fp16.h" +namespace dai { +class NNDataViewer { + public: + std::shared_ptr data; + dai::TensorInfo tensor; + std::shared_ptr logger; + + // Factors to multiply with before the vectors + struct FactorsBefore { + int32_t h; + int32_t w; + int32_t c; + }; + + FactorsBefore factorsBefore; + + NNDataViewer(dai::TensorInfo tensor, std::shared_ptr data, std::shared_ptr logger) + : data{data}, tensor{tensor}, logger{logger} {}; + bool build() { + if(tensor.strides.size() < 2) { + logger->error("Tensor doesn't have enough strides. Number of strides: {}, expected: {}", tensor.strides.size(), 2); + return false; + } + if(tensor.strides[0] == 0 || tensor.strides[1] == 0) { + logger->error("Tensor strides should not be set to zero. Strides are {} {}", tensor.strides[0], tensor.strides[1]); + return false; + } + switch(tensor.order) { + case TensorInfo::StorageOrder::NCHW: + if(tensor.dims[0] != 1) { + logger->error("NCHW is only supported in Detection Parser if N is 1. It is {}", tensor.dims[0]); + return false; + } + if(tensor.strides.size() != 4) { + logger->error("Invalid number of strides: {}, expected: {}", tensor.strides.size(), 4); + } + factorsBefore.c = tensor.strides[1]; + factorsBefore.h = tensor.strides[2]; + factorsBefore.w = tensor.getDataTypeSize(); + break; + case TensorInfo::StorageOrder::NHWC: + if(tensor.dims[0] != 1) { + logger->error("NHWC is only supported in Detection Parser if N is 1. It is {}", tensor.dims[0]); + return false; + } + if(tensor.strides.size() != 4) { + logger->error("Invalid number of strides: {}, expected: {}", tensor.strides.size(), 4); + } + factorsBefore.h = tensor.strides[1]; + factorsBefore.w = tensor.strides[2]; + factorsBefore.c = tensor.getDataTypeSize(); + break; + case TensorInfo::StorageOrder::HCW: + factorsBefore.h = tensor.strides[0]; + factorsBefore.c = tensor.strides[1]; + factorsBefore.w = tensor.getDataTypeSize(); + break; + + case TensorInfo::StorageOrder::HWC: + factorsBefore.h = tensor.strides[0]; + factorsBefore.w = tensor.strides[1]; + factorsBefore.c = tensor.getDataTypeSize(); + break; + case TensorInfo::StorageOrder::CHW: + factorsBefore.c = tensor.strides[0]; + factorsBefore.h = tensor.strides[1]; + factorsBefore.w = tensor.getDataTypeSize(); + break; + + case TensorInfo::StorageOrder::CWH: + factorsBefore.c = tensor.strides[0]; + factorsBefore.w = tensor.strides[1]; + factorsBefore.h = tensor.getDataTypeSize(); + break; + + case TensorInfo::StorageOrder::WCH: + factorsBefore.w = tensor.strides[0]; + factorsBefore.c = tensor.strides[1]; + factorsBefore.h = tensor.getDataTypeSize(); + break; + + case TensorInfo::StorageOrder::WHC: + factorsBefore.w = tensor.strides[0]; + factorsBefore.h = tensor.strides[1]; + factorsBefore.c = tensor.getDataTypeSize(); + break; + case TensorInfo::StorageOrder::NHCW: + case TensorInfo::StorageOrder::NC: + case TensorInfo::StorageOrder::CN: + case TensorInfo::StorageOrder::H: + case TensorInfo::StorageOrder::W: + case TensorInfo::StorageOrder::C: + default: + logger->error("Storage order not supported in NNDataViewer"); + return false; + } + return sanity_check(); + } + + bool sanity_check() { + if(data->getSize() < (tensor.offset + (tensor.dims[0] * tensor.strides[0]))) { + logger->error( + "Underlying data does not hold enough data for the tensor to be contained.\ + Tensor size: {}, Tensor offset: {}, Data type size: {}, Data size: {} ", + tensor.dims[0] * tensor.strides[0], + tensor.offset, + tensor.getDataTypeSize(), + data->getSize()); + return false; + } + if(tensor.dims.size() < 2) { + logger->error("Number of dimensions for the input tensor is expected to be at least 2. It is {}", tensor.dims.size()); + return false; + } + return true; + }; + + inline float get(int c, int h, int w) { + // If this turns out to be slow, use a function pointer instead and point to the right getter at build time + int32_t index = tensor.offset + factorsBefore.h * h + factorsBefore.w * w + factorsBefore.c * c; +#ifdef DEPTHAI_SAFE_NN_DATA_ACCESS + logger->trace("Offset {}, fbH {}, fbW {}, fbC {}, h {}, w {}, c{}", tensor.offset, factorsBefore.h, factorsBefore.w, factorsBefore.c, h, w, c); + if(index > data->getSize()) { + logger->error("Out of bound access. Size is {}, index is {}", data->getSize(), index); + return 0.0; + } +#endif + + switch(tensor.dataType) { + case TensorInfo::DataType::U8F: { + uint8_t dataOut = data->getData()[index]; + return (static_cast(dataOut) - tensor.qpZp) * tensor.qpScale; + } + case TensorInfo::DataType::I8: { + int8_t dataOut = static_cast(data->getData()[index]); + return (static_cast(dataOut) - tensor.qpZp) * tensor.qpScale; + } + case TensorInfo::DataType::INT: { + int32_t dataOut = reinterpret_cast(data->getData().data())[index / sizeof(int32_t)]; + return (static_cast(dataOut) - tensor.qpZp) * tensor.qpScale; + } + case TensorInfo::DataType::FP16: { + int16_t dataOut = reinterpret_cast(data->getData().data())[index / sizeof(int16_t)]; + return (fp16_ieee_to_fp32_value(dataOut) - tensor.qpZp) * tensor.qpScale; + } + case TensorInfo::DataType::FP32: { + float dataOut = reinterpret_cast(data->getData().data())[index / sizeof(float)]; + return (static_cast(dataOut) - tensor.qpZp) * tensor.qpScale; + } + case TensorInfo::DataType::FP64: + default: { + return 0.0f; + } + } + } +}; +} // namespace dai From 96a92f58cac13c433300945107e82fb1dcf03ab0 Mon Sep 17 00:00:00 2001 From: aljazkonec1 Date: Wed, 5 Nov 2025 10:47:23 +0100 Subject: [PATCH 3/7] Add host side implementation --- .../pipeline/node/DetectionParserBindings.cpp | 2 ++ examples/cpp/DetectionNetwork/CMakeLists.txt | 2 +- .../detection_and_keypoints.cpp | 1 - .../{RVC4 => }/detection_and_segmentation.cpp | 35 ++++++++++++------- .../{RVC4 => }/detection_and_segmentation.py | 14 ++++++-- tests/CMakeLists.txt | 2 +- 6 files changed, 38 insertions(+), 18 deletions(-) rename examples/cpp/DetectionNetwork/{RVC4 => }/detection_and_segmentation.cpp (84%) rename examples/python/DetectionNetwork/{RVC4 => }/detection_and_segmentation.py (92%) diff --git a/bindings/python/src/pipeline/node/DetectionParserBindings.cpp b/bindings/python/src/pipeline/node/DetectionParserBindings.cpp index 7e5a50c4f..eab544ed4 100644 --- a/bindings/python/src/pipeline/node/DetectionParserBindings.cpp +++ b/bindings/python/src/pipeline/node/DetectionParserBindings.cpp @@ -65,11 +65,13 @@ void bind_detectionparser(pybind11::module& m, void* pCallstack) { DOC(dai, node, DetectionParser, setAnchors, 2)) .def("setAnchorMasks", &DetectionParser::setAnchorMasks, py::arg("anchorMasks"), DOC(dai, node, DetectionParser, setAnchorMasks)) .def("setIouThreshold", &DetectionParser::setIouThreshold, py::arg("thresh"), DOC(dai, node, DetectionParser, setIouThreshold)) + .def("setRunOnHost", &DetectionParser::setRunOnHost, py::arg("runOnHost"), DOC(dai, node, DetectionParser, setRunOnHost)) .def("getNumClasses", &DetectionParser::getNumClasses, DOC(dai, node, DetectionParser, getNumClasses)) .def("getCoordinateSize", &DetectionParser::getCoordinateSize, DOC(dai, node, DetectionParser, getCoordinateSize)) .def("getAnchors", &DetectionParser::getAnchors, DOC(dai, node, DetectionParser, getAnchors)) .def("getAnchorMasks", &DetectionParser::getAnchorMasks, DOC(dai, node, DetectionParser, getAnchorMasks)) .def("getIouThreshold", &DetectionParser::getIouThreshold, DOC(dai, node, DetectionParser, getIouThreshold)) + .def("runOnHost", &DetectionParser::runOnHost, DOC(dai, node, DetectionParser, runOnHost)) .def("build", &DetectionParser::build, DOC(dai, node, DetectionParser, build)); daiNodeModule.attr("DetectionParser").attr("Properties") = detectionParserProperties; } diff --git a/examples/cpp/DetectionNetwork/CMakeLists.txt b/examples/cpp/DetectionNetwork/CMakeLists.txt index 8193faeb9..8c3ba6ecf 100644 --- a/examples/cpp/DetectionNetwork/CMakeLists.txt +++ b/examples/cpp/DetectionNetwork/CMakeLists.txt @@ -23,7 +23,7 @@ dai_set_example_test_labels(detection_network ondevice rvc2_all rvc4 rvc4rgb ci) dai_add_example(detection_network_remap detection_network_remap.cpp ON OFF) dai_set_example_test_labels(detection_network_remap ondevice rvc2_all rvc4 ci) -dai_add_example(detection_and_segmentation RVC4/detection_and_segmentation.cpp ON OFF) +dai_add_example(detection_and_segmentation detection_and_segmentation.cpp ON OFF) dai_set_example_test_labels(detection_and_segmentation rvc4) dai_add_example(detection_and_keypoints detection_and_keypoints.cpp ON OFF) diff --git a/examples/cpp/DetectionNetwork/detection_and_keypoints.cpp b/examples/cpp/DetectionNetwork/detection_and_keypoints.cpp index 667151bb0..f374bdca1 100644 --- a/examples/cpp/DetectionNetwork/detection_and_keypoints.cpp +++ b/examples/cpp/DetectionNetwork/detection_and_keypoints.cpp @@ -86,7 +86,6 @@ int main() { auto currentTime = std::chrono::steady_clock::now(); float fps = counter / std::chrono::duration(currentTime - startTime).count(); - std::cout << "FPS: " << fps << std::endl; } if(cv::waitKey(1) == 'q') { diff --git a/examples/cpp/DetectionNetwork/RVC4/detection_and_segmentation.cpp b/examples/cpp/DetectionNetwork/detection_and_segmentation.cpp similarity index 84% rename from examples/cpp/DetectionNetwork/RVC4/detection_and_segmentation.cpp rename to examples/cpp/DetectionNetwork/detection_and_segmentation.cpp index 4912d04c6..e3e81dcbf 100644 --- a/examples/cpp/DetectionNetwork/RVC4/detection_and_segmentation.cpp +++ b/examples/cpp/DetectionNetwork/detection_and_segmentation.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -16,8 +17,16 @@ cv::Rect frameNorm(const cv::Mat& frame, const dai::Point2f& topLeft, const dai: } int main() { + std::string modelName = "luxonis/yolov8-instance-segmentation-large:coco-640x352"; + bool setRunOnHost = false; + auto device = std::make_shared(); + + if(device->getPlatformAsString() == "RVC2") { + modelName = "luxonis/yolov8-instance-segmentation-nano:coco-512x288"; + setRunOnHost = true; + } // Create pipeline - dai::Pipeline pipeline; + dai::Pipeline pipeline{device}; // Create and configure camera node auto cameraNode = pipeline.create(); @@ -27,8 +36,10 @@ int main() { auto detectionNetwork = pipeline.create(); dai::NNModelDescription modelDescription; - modelDescription.model = "luxonis/yolov8-instance-segmentation-large:coco-640x480"; + + modelDescription.model = modelName; detectionNetwork->build(cameraNode, modelDescription); + detectionNetwork->detectionParser->setRunOnHost(setRunOnHost); auto labelMap = detectionNetwork->getClasses(); // Create output queues @@ -121,16 +132,18 @@ int main() { detections.begin(), detections.end(), [filteredLabel](const dai::ImgDetection& det) { return det.label != filteredLabel; }), detections.end()); } + if(!segmentationMask.empty()) { + cv::Mat lut(1, 256, CV_8U); + for(int i = 0; i < 256; ++i) lut.at(i) = (i >= 255) ? 255 : cv::saturate_cast(i * 25); - cv::Mat lut(1, 256, CV_8U); - for(int i = 0; i < 256; ++i) lut.at(i) = (i == 255) ? 255 : cv::saturate_cast(i * 25); - cv::Mat scaledMask; - cv::LUT(segmentationMask, lut, scaledMask); + cv::Mat scaledMask; + cv::LUT(segmentationMask, lut, scaledMask); - cv::Mat coloredMask; - cv::applyColorMap(scaledMask, coloredMask, cv::COLORMAP_JET); - frame.copyTo(coloredMask, (scaledMask == 255)); - cv::addWeighted(frame, 0.7, coloredMask, 0.3, 0, frame); + cv::Mat coloredMask; + cv::applyColorMap(scaledMask, coloredMask, cv::COLORMAP_JET); + frame.copyTo(coloredMask, (scaledMask == 255)); + cv::addWeighted(frame, 0.7, coloredMask, 0.3, 0, frame); + } // Display detections for(const auto& detection : detections) { @@ -157,8 +170,6 @@ int main() { cv::imshow("rgb", frame); auto currentTime = std::chrono::steady_clock::now(); - float fps = counter / std::chrono::duration(currentTime - startTime).count(); - std::cout << "FPS: " << fps << std::endl; } } } diff --git a/examples/python/DetectionNetwork/RVC4/detection_and_segmentation.py b/examples/python/DetectionNetwork/detection_and_segmentation.py similarity index 92% rename from examples/python/DetectionNetwork/RVC4/detection_and_segmentation.py rename to examples/python/DetectionNetwork/detection_and_segmentation.py index 650f90f2f..a8ecc74a6 100644 --- a/examples/python/DetectionNetwork/RVC4/detection_and_segmentation.py +++ b/examples/python/DetectionNetwork/detection_and_segmentation.py @@ -6,11 +6,19 @@ import numpy as np import time +model_name = "luxonis/yolov8-instance-segmentation-large:coco-640x480" +setRunOnHost = False +device = dai.Device() +if device.getPlatformAsString() == "RVC2": + model_name = "luxonis/yolov8-instance-segmentation-nano:coco-512x288" + setRunOnHost = True + # Create pipeline -with dai.Pipeline() as pipeline: +with dai.Pipeline(device) as pipeline: cameraNode = pipeline.create(dai.node.Camera).build() - detectionNetwork = pipeline.create(dai.node.DetectionNetwork).build(cameraNode, dai.NNModelDescription("luxonis/yolov8-instance-segmentation-nano:coco-512x288")) - # detectionNetwork.detectionParser.runOnHost(True) + + detectionNetwork = pipeline.create(dai.node.DetectionNetwork).build(cameraNode, dai.NNModelDescription(model_name)) + detectionNetwork.detectionParser.setRunOnHost(setRunOnHost) labelMap = detectionNetwork.getClasses() qRgb = detectionNetwork.passthrough.createOutputQueue() diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index d3823f1a8..f4b0776b2 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -524,7 +524,7 @@ FIRE_VIDEO="${fire_video}" KITCHEN_IMAGE_PATH="${kitchen_image}" YOLO_V8_INSTANCE_SEGMENTATION_LARGE_COCO_640x352_KITCHEN_SEGMENTATION_GROUND_TRUTH="${yolo_v8_instance_segmentation_large_coco_640x352_kitchen_segmentation_gt}" ) -dai_set_test_labels(detection_parser_test ondevice rvc4 ci) +dai_set_test_labels(detection_parser_test ondevice rvc4 ci onhost) # Spatial detection network test dai_add_test(spatial_detection_network_test src/ondevice_tests/pipeline/node/spatial_detection_network_test.cpp) From a0dd29a87dcf460a720332847689570829a99707 Mon Sep 17 00:00:00 2001 From: aljazkonec1 Date: Wed, 5 Nov 2025 10:52:36 +0100 Subject: [PATCH 4/7] bump device --- cmake/Depthai/DepthaiDeviceSideConfig.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/Depthai/DepthaiDeviceSideConfig.cmake b/cmake/Depthai/DepthaiDeviceSideConfig.cmake index b0a270a1d..7c6bb3df4 100644 --- a/cmake/Depthai/DepthaiDeviceSideConfig.cmake +++ b/cmake/Depthai/DepthaiDeviceSideConfig.cmake @@ -2,7 +2,7 @@ set(DEPTHAI_DEVICE_SIDE_MATURITY "snapshot") # "full commit hash of device side binary" -set(DEPTHAI_DEVICE_SIDE_COMMIT "e658b28655820c649b3bbed9f44865d00139094d") +set(DEPTHAI_DEVICE_SIDE_COMMIT "8741ce89206d2a5299acc3382c7496e1ee205fcb") # "version if applicable" set(DEPTHAI_DEVICE_SIDE_VERSION "") From 33752f1f44726f7e977dac9e1f6bd1ff79228da1 Mon Sep 17 00:00:00 2001 From: aljazkonec1 Date: Wed, 5 Nov 2025 12:54:19 +0100 Subject: [PATCH 5/7] bump rvc4 --- cmake/Depthai/DepthaiDeviceRVC4Config.cmake | 2 +- .../pipeline/node/spatial_location_calculator_test.cpp | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/cmake/Depthai/DepthaiDeviceRVC4Config.cmake b/cmake/Depthai/DepthaiDeviceRVC4Config.cmake index 43d640f5e..f6ae0d22b 100644 --- a/cmake/Depthai/DepthaiDeviceRVC4Config.cmake +++ b/cmake/Depthai/DepthaiDeviceRVC4Config.cmake @@ -3,4 +3,4 @@ set(DEPTHAI_DEVICE_RVC4_MATURITY "snapshot") # "version if applicable" -set(DEPTHAI_DEVICE_RVC4_VERSION "0.0.1+53bd364bc4c519e9aa6230b3de4d78a78d073373") +set(DEPTHAI_DEVICE_RVC4_VERSION "0.0.1+62ce59c3c4a4a53a9b0773fe83dabbecdc4553e9") diff --git a/tests/src/ondevice_tests/pipeline/node/spatial_location_calculator_test.cpp b/tests/src/ondevice_tests/pipeline/node/spatial_location_calculator_test.cpp index 1316e8566..0a8ca09b7 100644 --- a/tests/src/ondevice_tests/pipeline/node/spatial_location_calculator_test.cpp +++ b/tests/src/ondevice_tests/pipeline/node/spatial_location_calculator_test.cpp @@ -1,6 +1,5 @@ -#include - #include +#include #include #include #include @@ -106,7 +105,6 @@ TEST_CASE("SpatialLocationCalculator synthetic depth data test") { auto outputQueue = spatial->out.createOutputQueue(); auto passthroughQueue = spatial->passthroughDepth.createOutputQueue(); - std::vector depthPixels(width * height, 1000); auto setRegionDepth = [&](const RoiSpec& spec) { const int x0 = static_cast(spec.roi.x); @@ -120,7 +118,7 @@ TEST_CASE("SpatialLocationCalculator synthetic depth data test") { for(const auto& spec : roiSpecs) { setRegionDepth(spec); } - + // Prepare synthetic depth frame auto depthFrame = std::make_shared(); depthFrame->setType(dai::ImgFrame::Type::RAW16); From 255a8824da8078e10cf8b8d19623a8cdc649daa7 Mon Sep 17 00:00:00 2001 From: aljazkonec1 Date: Tue, 11 Nov 2025 16:59:50 +0100 Subject: [PATCH 6/7] update parser --- .../utilities/DetectionParser/DetectionParserUtils.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pipeline/utilities/DetectionParser/DetectionParserUtils.cpp b/src/pipeline/utilities/DetectionParser/DetectionParserUtils.cpp index c1809e847..a9455e551 100644 --- a/src/pipeline/utilities/DetectionParser/DetectionParserUtils.cpp +++ b/src/pipeline/utilities/DetectionParser/DetectionParserUtils.cpp @@ -10,7 +10,7 @@ #include #include -#include "depthai/common/KeypointsList.hpp" +#include "depthai/common/KeypointsListT.hpp" #include "depthai/common/RotatedRect.hpp" #include "depthai/common/TensorInfo.hpp" #include "depthai/pipeline/datatype/ImgDetections.hpp" @@ -888,7 +888,7 @@ void keypointDecode(std::shared_ptr nnData, keypoints.push_back(dai::Keypoint{dai::Point2f(x, y), conf}); } - outDetections->detections[i].keypoints = KeypointsList(keypoints); + outDetections->detections[i].keypoints = KeypointsList(keypoints, properties.parser.keypointEdges); } } From 831232c1ac7faf4cb2e181bc74d5067f9aba220f Mon Sep 17 00:00:00 2001 From: aljazkonec1 Date: Tue, 11 Nov 2025 17:57:55 +0100 Subject: [PATCH 7/7] update example --- examples/python/DetectionNetwork/detection_and_keypoints.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/python/DetectionNetwork/detection_and_keypoints.py b/examples/python/DetectionNetwork/detection_and_keypoints.py index b61c41fc9..4459be138 100644 --- a/examples/python/DetectionNetwork/detection_and_keypoints.py +++ b/examples/python/DetectionNetwork/detection_and_keypoints.py @@ -7,7 +7,7 @@ # Create pipeline with dai.Pipeline() as pipeline: - cameraNode = pipeline.create(dai.node.Camera).build() + cameraNode = pipeline.create(dai.node.Camera).build(sensorFps=12) detectionNetwork = pipeline.create(dai.node.DetectionNetwork).build(cameraNode, dai.NNModelDescription("luxonis/yolov8-nano-pose-estimation:coco-512x288")) labelMap = detectionNetwork.getClasses()