openvinotoolkit · XinWangIntel · Apr 10, 2025 · Dec 24, 2025 · Dec 25, 2025 · Dec 19, 2025
@@ -5,6 +5,16 @@
 set(TARGET_NAME "openvino_npu_level_zero_backend")
 
 file(GLOB_RECURSE SOURCES *.cpp *.hpp *.h)
+
+if(NOT ENABLE_NPU_DEBUG_CAPS)
+    list(REMOVE_ITEM SOURCES
+        ${CMAKE_CURRENT_SOURCE_DIR}/include/zero_dynamic_infer_request.hpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/include/zero_dynamic_pipeline.hpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/src/zero_dynamic_infer_request.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/src/zero_dynamic_pipeline.cpp
+    )
+endif()
+
 source_group(TREE ${CMAKE_CURRENT_SOURCE_DIR} FILES ${SOURCES})
 
 add_library(${TARGET_NAME} STATIC ${SOURCES})
@@ -19,12 +29,14 @@ target_compile_definitions(${TARGET_NAME}
 target_include_directories(${TARGET_NAME}
     PUBLIC
         $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../compiler_adapter/include>
 )
 
 target_link_libraries(${TARGET_NAME}
     PRIVATE
         openvino_npu_common
         openvino::npu_al
+        openvino_npu_driver_compiler_adapter
 )
 #
 # targets install

@@ -0,0 +1,45 @@
+// Copyright (C) 2018-2026 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "intel_npu/utils/zero/zero_utils.hpp"
+#include "zero_dynamic_pipeline.hpp"
+#include "zero_infer_request.hpp"
+
+namespace intel_npu {
+
+class ZeroDynamicInferRequest final : public ZeroInferRequest {
+public:
+    explicit ZeroDynamicInferRequest(const std::shared_ptr<ZeroInitStructsHolder>& initStructs,
+                                     const std::shared_ptr<const ICompiledModel>& compiledModel,
+                                     const Config& config);
+
+    void set_tensor(const ov::Output<const ov::Node>& port, const ov::SoPtr<ov::ITensor>& tensor) override;
+    void set_tensors(const ov::Output<const ov::Node>& port,
+                     const std::vector<ov::SoPtr<ov::ITensor>>& tensors) override;
+
+    void infer_async() override;
+
+protected:
+    void construct_pipeline() override;
+
+    /**
+     * @brief Allocates a tensor on host and stores the reference inside multiple attributes.
+     * @param index The index which the allocated tensor shall use.
+     * @param isInput Determines the containers in which the newly allocated tensors will be stored.
+     * @param allocator If provided, the tensor uses the custom allocator instead of using the default one.
+     * @param batchSize If provided, the value of the shape on the 0th axis is overriden with this value.
+     * @return Pointer towards the allocated tensor
+     */
+    std::shared_ptr<ZeroTensor> allocate_tensor(const size_t index,
+                                                const bool isInput,
+                                                const std::optional<std::size_t> batchSize = std::nullopt) const;
+
+    IODescriptor prepare_io_descriptor_with_user_info(const IODescriptor& descriptor, bool isInput);
+
+    bool _isTensorChanged = false;
+};
+
+}  //  namespace intel_npu
@@ -0,0 +1,142 @@
+// Copyright (C) 2018-2026 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "irgraph.hpp"
+#include "zero_pipeline.hpp"
+
+namespace intel_npu {
+
+struct DynamicPipeline : public Pipeline {
+    struct PipelinedCommandLists {
+        mutable IRGraph::GraphArguments _binding;
+
+        std::vector<std::unique_ptr<CommandList>> _commandLists;
+        // Store command list handles to pass it to ExecutionEngine
+        std::vector<ze_command_list_handle_t> _commandListHandles;
+
+        PipelinedCommandLists(size_t numCommandLists,
+                              const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
+                              const uint32_t& group_ordinal) {
+            _commandLists.reserve(numCommandLists);
+            for (size_t i = 0; i < numCommandLists; i++) {
+                _commandLists.emplace_back(std::make_unique<CommandList>(init_structs, group_ordinal));
+            }
+
+            for (size_t i = 0; i < numCommandLists; i++) {
+                _commandListHandles.push_back(_commandLists[i]->handle());
+            }
+        }
+
+        size_t size() const {
+            return _commandListHandles.size();
+        }
+
+        ze_command_list_handle_t* data() {
+            return _commandListHandles.data();
+        }
+
+        void bind(IRGraph* graph);
+
+        std::vector<ze_command_list_handle_t>& getHandles() {
+            return _commandListHandles;
+        }
+
+        IRGraph::GraphArguments& getBinding() {
+            return _binding;
+        }
+
+        void appendBarrier() const {
+            // TODO
+        }
+
+        void appendNpuTimestamp(uint64_t* timestamp_buff) const {
+            // TODO
+        }
+
+        void updateMutableCommandList(uint32_t arg_index,
+                                      const void* arg_value,
+                                      const ov::Strides& strides,
+                                      const ov::Shape& shapes) {
+            if (arg_index < _binding._inputs.size()) {
+                _binding._inputs[arg_index].setArg(arg_value);
+                // Only store the valid shape dimensions
+                for (int64_t i = 0; i < _binding._inputs[arg_index].dimsCount; i++) {
+                    _binding._inputs[arg_index].sizes[i] = shapes[i];
+                }
+
+                if (!strides.empty()) {
+                    for (int64_t i = 0; i < _binding._inputs[arg_index].dimsCount; i++) {
+                        _binding._inputs[arg_index].strides[i] = strides[i];
+                    }
+                } else {
+                    // Need stride based on element but not byte, calc from shape
+                    _binding._inputs[arg_index].updateStride();
+                }
+            } else {
+                size_t output_index = static_cast<size_t>(arg_index) - _binding._inputs.size();
+                if (output_index < _binding._outputs.size()) {
+                    _binding._outputs[output_index].setArg(arg_value);
+
+                    // Only store the valid shape dimensions
+                    for (int64_t i = 0; i < _binding._outputs[output_index].dimsCount; i++) {
+                        _binding._outputs[output_index].sizes[i] = shapes[i];
+                    }
+
+                    if (!strides.empty()) {
+                        for (int64_t i = 0; i < _binding._outputs[output_index].dimsCount; i++) {
+                            _binding._outputs[output_index].strides[i] = strides[i];
+                        }
+                    } else {
+                        // Need stride based on element but not byte, calc from shape
+                        _binding._outputs[output_index].updateStride();
+                    }
+                }
+            }
+        }
+
+        void appendWaitOnEvent(const std::shared_ptr<Event>& event) {
+            event->AppendWaitOnEvent(**_commandLists.rbegin());
+        }
+
+        void appendReset(const std::shared_ptr<Event>& event) {
+            event->AppendEventReset(**_commandLists.rbegin());
+        }
+
+        void appendSignalEvent(std::shared_ptr<Event>& event) {
+            event->AppendSignalEvent(**_commandLists.rbegin());
+        }
+    };
+
+public:
+    DynamicPipeline(const Config& config,
+                    const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
+                    const std::shared_ptr<IGraph>& graph,
+                    const std::vector<std::vector<std::shared_ptr<ZeroTensor>>>& input_tensors,
+                    const std::vector<std::shared_ptr<ZeroTensor>>& output_tensors,
+                    size_t batch_size = 1);
+
+    DynamicPipeline(const DynamicPipeline&) = delete;
+    DynamicPipeline& operator=(const DynamicPipeline&) = delete;
+    virtual ~DynamicPipeline() = default;
+
+    void push() override;
+    void pull() override;
+    void reset() const override;
+    virtual void update_graph_arguments(uint32_t index,
+                                        const std::shared_ptr<ZeroTensor>& tensor,
+                                        [[maybe_unused]] std::shared_ptr<ov::ITensor> userTensor = nullptr) override;
+    virtual void update_graph_arguments(uint32_t index,
+                                        const std::shared_ptr<ZeroTensor>& tensor,
+                                        size_t batch_index,
+                                        [[maybe_unused]] std::shared_ptr<ov::ITensor> userTensor = nullptr) override;
+
+    virtual std::vector<ov::ProfilingInfo> get_profiling_info() const override;
+
+protected:
+    std::vector<std::unique_ptr<PipelinedCommandLists>> _command_lists;
+};
+
+}  // namespace intel_npu
@@ -17,7 +17,18 @@
 
 namespace intel_npu {
 
-class ZeroInferRequest final : public SyncInferRequest {
+constexpr std::size_t SINGLE_TENSOR = 0;
+constexpr bool INPUT = true;
+constexpr bool OUTPUT = false;
+
+std::optional<size_t> determine_dynamic_batch_size(const IODescriptor& desc,
+                                                   const ov::PartialShape& ioShape,
+                                                   const std::shared_ptr<ov::ITensor>& tensor,
+                                                   const std::optional<size_t> batchSize);
+
+void* get_tensor_data_ptr(const std::shared_ptr<ov::ITensor>& tensor);
+
+class ZeroInferRequest : public SyncInferRequest {
 public:
     explicit ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>& initStructs,
                               const std::shared_ptr<const ICompiledModel>& compiledModel,
@@ -33,11 +44,12 @@ class ZeroInferRequest final : public SyncInferRequest {
 
     void get_result() override;
 
-private:
+protected:
     std::vector<ov::ProfilingInfo> get_profiling_info() const override;
 
     void check_network_precision(const ov::element::Type_t precision) const override;
     void create_pipeline();
+    virtual void construct_pipeline();
 
     std::shared_ptr<ZeroTensor>& get_level_zero_input(size_t index, size_t tensorNo = 0) const;
     std::vector<std::shared_ptr<ZeroTensor>>& get_level_zero_inputs(size_t index) const;

@@ -12,7 +12,7 @@
 
 namespace intel_npu {
 
-struct Pipeline final {
+struct Pipeline {
 public:
     Pipeline(const Config& config,
              const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
@@ -21,18 +21,31 @@ struct Pipeline final {
              const std::vector<std::shared_ptr<ZeroTensor>>& output_tensors,
              size_t batch_size = 1);
 
+    Pipeline(const Config& config,
+             const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
+             const std::shared_ptr<IGraph>& graph,
+             const std::vector<std::vector<std::shared_ptr<ZeroTensor>>>& input_tensors,
+             const std::vector<std::shared_ptr<ZeroTensor>>& output_tensors,
+             std::string logName,
+             size_t batch_size = 1);
+
     Pipeline(const Pipeline&) = delete;
     Pipeline& operator=(const Pipeline&) = delete;
-    ~Pipeline() = default;
+    virtual ~Pipeline() = default;
 
-    void push();
-    void pull();
-    void reset() const;
+    virtual void push();
+    virtual void pull();
+    virtual void reset() const;
 
-    void update_graph_arguments(uint32_t index, const std::shared_ptr<ZeroTensor>& tensor);
-    void update_graph_arguments(uint32_t index, const std::shared_ptr<ZeroTensor>& tensor, size_t batch_index);
+    virtual void update_graph_arguments(uint32_t index,
+                                        const std::shared_ptr<ZeroTensor>& tensor,
+                                        [[maybe_unused]] std::shared_ptr<ov::ITensor> userTensor = nullptr);
+    virtual void update_graph_arguments(uint32_t index,
+                                        const std::shared_ptr<ZeroTensor>& tensor,
+                                        size_t batch_index,
+                                        [[maybe_unused]] std::shared_ptr<ov::ITensor> userTensor = nullptr);
 
-    std::vector<ov::ProfilingInfo> get_profiling_info() const;
+    virtual std::vector<ov::ProfilingInfo> get_profiling_info() const;
 
 protected:
     std::shared_ptr<ZeroInitStructsHolder> _init_structs;

@@ -8,6 +8,9 @@
 #include "intel_npu/utils/zero/zero_api.hpp"
 #include "intel_npu/utils/zero/zero_utils.hpp"
 #include "zero_infer_request.hpp"
+#ifdef NPU_PLUGIN_DEVELOPER_BUILD
+#    include "zero_dynamic_infer_request.hpp"
+#endif
 
 using namespace intel_npu;
 
@@ -176,6 +179,11 @@ ov::device::Type ZeroDevice::getDeviceType() const {
 std::shared_ptr<SyncInferRequest> ZeroDevice::createInferRequest(
     const std::shared_ptr<const ICompiledModel>& compiledModel,
     const Config& config) {
+#ifdef NPU_PLUGIN_DEVELOPER_BUILD
+    if (compiledModel->get_graph()->get_blob_type() == BlobType::LLVM) {
+        return std::make_shared<ZeroDynamicInferRequest>(_initStructs, compiledModel, config);
+    }
+#endif
     return std::make_shared<ZeroInferRequest>(_initStructs, compiledModel, config);
 }