Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
666c9cd
Add dynamic-pipeline
XinWangIntel Apr 10, 2025
6c3cc17
Use predicted shape instead of real shape
XinWangIntel Dec 24, 2025
28ded18
Fix unit test
XinWangIntel Dec 25, 2025
baee752
Port dynamic stride change
XinWangIntel Dec 19, 2025
78f32f3
Follow dynamic stride change
XinWangIntel Jan 9, 2026
facce95
Update allocate_tensor and update_graph_args
XinWangIntel Jan 9, 2026
0b5c2d2
Use setArgumentValueSithStrides to replace setArgumentProperty
XinWangIntel Jan 9, 2026
86589da
Force strides support in metadata be true
XinWangIntel Jan 12, 2026
2b7f2e7
Force open compilation
XinWangIntel Jan 12, 2026
4a2e032
Update local output tensor to use predict shape
XinWangIntel Jan 13, 2026
edb4460
Change predict log from warn to info
XinWangIntel Jan 13, 2026
16ff1aa
Fix stride and shape info
XinWangIntel Jan 16, 2026
2b50f9d
Check User and Local tensor with predicted result
XinWangIntel Jan 20, 2026
fc7b824
Skip check if user tensor is allocated by plugin
XinWangIntel Jan 20, 2026
618ad56
Code clean and use ENABLE_NPU_DEBUG_CAPS to use this feature
XinWangIntel Jan 27, 2026
a38ea45
Init execute params
XinWangIntel Jan 28, 2026
045de4f
Remove tests
XinWangIntel Jan 28, 2026
ed5de07
Remove some debug log
XinWangIntel Jan 29, 2026
c5b7c3f
Only call predict shape if user set new tensor
XinWangIntel Jan 29, 2026
8192a42
Code clean
XinWangIntel Jan 29, 2026
a98f8d8
Code refactor
XinWangIntel Feb 2, 2026
29e2e9c
Fix predict issue
XinWangIntel Feb 3, 2026
5b7629e
Fix output shape
XinWangIntel Feb 3, 2026
8b0311b
Update copyright
XinWangIntel Feb 3, 2026
3137891
Update copyright
XinWangIntel Feb 3, 2026
3a4e5ee
Update copyright
XinWangIntel Feb 3, 2026
742249a
Update copyright
XinWangIntel Feb 3, 2026
0227b85
Clean log and fix copyright
XinWangIntel Feb 3, 2026
f7a7ea0
Remove special flag for MSVC
XinWangIntel Feb 3, 2026
e572db5
Fix style
XinWangIntel Feb 3, 2026
69f323c
Detect new mlir runtime api
XinWangIntel Feb 3, 2026
0ebca81
Fix test that pass shape smaller than min size
XinWangIntel Feb 4, 2026
5ec1727
Skip check for min size
XinWangIntel Feb 4, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions src/plugins/intel_npu/src/backend/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,16 @@
set(TARGET_NAME "openvino_npu_level_zero_backend")

file(GLOB_RECURSE SOURCES *.cpp *.hpp *.h)

if(NOT ENABLE_NPU_DEBUG_CAPS)
list(REMOVE_ITEM SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/include/zero_dynamic_infer_request.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/zero_dynamic_pipeline.hpp
${CMAKE_CURRENT_SOURCE_DIR}/src/zero_dynamic_infer_request.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/zero_dynamic_pipeline.cpp
)
endif()

source_group(TREE ${CMAKE_CURRENT_SOURCE_DIR} FILES ${SOURCES})

add_library(${TARGET_NAME} STATIC ${SOURCES})
Expand All @@ -19,12 +29,14 @@ target_compile_definitions(${TARGET_NAME}
target_include_directories(${TARGET_NAME}
PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../compiler_adapter/include>
)

target_link_libraries(${TARGET_NAME}
PRIVATE
openvino_npu_common
openvino::npu_al
openvino_npu_driver_compiler_adapter
Comment on lines +32 to +39
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why are these needed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dynamic pipeline need to call irgraph special functions, so will depend on compiler adapter

)
#
# targets install
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// Copyright (C) 2018-2026 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "intel_npu/utils/zero/zero_utils.hpp"
#include "zero_dynamic_pipeline.hpp"
#include "zero_infer_request.hpp"

namespace intel_npu {

class ZeroDynamicInferRequest final : public ZeroInferRequest {
public:
explicit ZeroDynamicInferRequest(const std::shared_ptr<ZeroInitStructsHolder>& initStructs,
const std::shared_ptr<const ICompiledModel>& compiledModel,
const Config& config);

void set_tensor(const ov::Output<const ov::Node>& port, const ov::SoPtr<ov::ITensor>& tensor) override;
void set_tensors(const ov::Output<const ov::Node>& port,
const std::vector<ov::SoPtr<ov::ITensor>>& tensors) override;

void infer_async() override;

protected:
void construct_pipeline() override;

/**
* @brief Allocates a tensor on host and stores the reference inside multiple attributes.
* @param index The index which the allocated tensor shall use.
* @param isInput Determines the containers in which the newly allocated tensors will be stored.
* @param allocator If provided, the tensor uses the custom allocator instead of using the default one.
* @param batchSize If provided, the value of the shape on the 0th axis is overriden with this value.
* @return Pointer towards the allocated tensor
*/
std::shared_ptr<ZeroTensor> allocate_tensor(const size_t index,
const bool isInput,
const std::optional<std::size_t> batchSize = std::nullopt) const;

IODescriptor prepare_io_descriptor_with_user_info(const IODescriptor& descriptor, bool isInput);

bool _isTensorChanged = false;
};

} // namespace intel_npu
142 changes: 142 additions & 0 deletions src/plugins/intel_npu/src/backend/include/zero_dynamic_pipeline.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
// Copyright (C) 2018-2026 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "irgraph.hpp"
#include "zero_pipeline.hpp"

namespace intel_npu {

struct DynamicPipeline : public Pipeline {
struct PipelinedCommandLists {
mutable IRGraph::GraphArguments _binding;

std::vector<std::unique_ptr<CommandList>> _commandLists;
// Store command list handles to pass it to ExecutionEngine
std::vector<ze_command_list_handle_t> _commandListHandles;

PipelinedCommandLists(size_t numCommandLists,
const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
const uint32_t& group_ordinal) {
_commandLists.reserve(numCommandLists);
for (size_t i = 0; i < numCommandLists; i++) {
_commandLists.emplace_back(std::make_unique<CommandList>(init_structs, group_ordinal));
}

for (size_t i = 0; i < numCommandLists; i++) {
_commandListHandles.push_back(_commandLists[i]->handle());
}
}

size_t size() const {
return _commandListHandles.size();
}

ze_command_list_handle_t* data() {
return _commandListHandles.data();
}

void bind(IRGraph* graph);

std::vector<ze_command_list_handle_t>& getHandles() {
return _commandListHandles;
}

IRGraph::GraphArguments& getBinding() {
return _binding;
}

void appendBarrier() const {
// TODO
}

void appendNpuTimestamp(uint64_t* timestamp_buff) const {
// TODO
}

void updateMutableCommandList(uint32_t arg_index,
const void* arg_value,
const ov::Strides& strides,
const ov::Shape& shapes) {
if (arg_index < _binding._inputs.size()) {
_binding._inputs[arg_index].setArg(arg_value);
// Only store the valid shape dimensions
for (int64_t i = 0; i < _binding._inputs[arg_index].dimsCount; i++) {
_binding._inputs[arg_index].sizes[i] = shapes[i];
}

if (!strides.empty()) {
for (int64_t i = 0; i < _binding._inputs[arg_index].dimsCount; i++) {
_binding._inputs[arg_index].strides[i] = strides[i];
}
} else {
// Need stride based on element but not byte, calc from shape
_binding._inputs[arg_index].updateStride();
}
} else {
size_t output_index = static_cast<size_t>(arg_index) - _binding._inputs.size();
if (output_index < _binding._outputs.size()) {
_binding._outputs[output_index].setArg(arg_value);

// Only store the valid shape dimensions
for (int64_t i = 0; i < _binding._outputs[output_index].dimsCount; i++) {
_binding._outputs[output_index].sizes[i] = shapes[i];
}

if (!strides.empty()) {
for (int64_t i = 0; i < _binding._outputs[output_index].dimsCount; i++) {
_binding._outputs[output_index].strides[i] = strides[i];
}
} else {
// Need stride based on element but not byte, calc from shape
_binding._outputs[output_index].updateStride();
}
}
}
}

void appendWaitOnEvent(const std::shared_ptr<Event>& event) {
event->AppendWaitOnEvent(**_commandLists.rbegin());
}

void appendReset(const std::shared_ptr<Event>& event) {
event->AppendEventReset(**_commandLists.rbegin());
}

void appendSignalEvent(std::shared_ptr<Event>& event) {
event->AppendSignalEvent(**_commandLists.rbegin());
}
};

public:
DynamicPipeline(const Config& config,
const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
const std::shared_ptr<IGraph>& graph,
const std::vector<std::vector<std::shared_ptr<ZeroTensor>>>& input_tensors,
const std::vector<std::shared_ptr<ZeroTensor>>& output_tensors,
size_t batch_size = 1);

DynamicPipeline(const DynamicPipeline&) = delete;
DynamicPipeline& operator=(const DynamicPipeline&) = delete;
virtual ~DynamicPipeline() = default;

void push() override;
void pull() override;
void reset() const override;
virtual void update_graph_arguments(uint32_t index,
const std::shared_ptr<ZeroTensor>& tensor,
[[maybe_unused]] std::shared_ptr<ov::ITensor> userTensor = nullptr) override;
virtual void update_graph_arguments(uint32_t index,
const std::shared_ptr<ZeroTensor>& tensor,
size_t batch_index,
[[maybe_unused]] std::shared_ptr<ov::ITensor> userTensor = nullptr) override;

virtual std::vector<ov::ProfilingInfo> get_profiling_info() const override;

protected:
std::vector<std::unique_ptr<PipelinedCommandLists>> _command_lists;
};

} // namespace intel_npu
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,18 @@

namespace intel_npu {

class ZeroInferRequest final : public SyncInferRequest {
constexpr std::size_t SINGLE_TENSOR = 0;
constexpr bool INPUT = true;
constexpr bool OUTPUT = false;

std::optional<size_t> determine_dynamic_batch_size(const IODescriptor& desc,
const ov::PartialShape& ioShape,
const std::shared_ptr<ov::ITensor>& tensor,
const std::optional<size_t> batchSize);

void* get_tensor_data_ptr(const std::shared_ptr<ov::ITensor>& tensor);

class ZeroInferRequest : public SyncInferRequest {
public:
explicit ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>& initStructs,
const std::shared_ptr<const ICompiledModel>& compiledModel,
Expand All @@ -33,11 +44,12 @@ class ZeroInferRequest final : public SyncInferRequest {

void get_result() override;

private:
protected:
std::vector<ov::ProfilingInfo> get_profiling_info() const override;

void check_network_precision(const ov::element::Type_t precision) const override;
void create_pipeline();
virtual void construct_pipeline();

std::shared_ptr<ZeroTensor>& get_level_zero_input(size_t index, size_t tensorNo = 0) const;
std::vector<std::shared_ptr<ZeroTensor>>& get_level_zero_inputs(size_t index) const;
Expand Down
29 changes: 21 additions & 8 deletions src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

namespace intel_npu {

struct Pipeline final {
struct Pipeline {
public:
Pipeline(const Config& config,
const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
Expand All @@ -21,18 +21,31 @@ struct Pipeline final {
const std::vector<std::shared_ptr<ZeroTensor>>& output_tensors,
size_t batch_size = 1);

Pipeline(const Config& config,
const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
const std::shared_ptr<IGraph>& graph,
const std::vector<std::vector<std::shared_ptr<ZeroTensor>>>& input_tensors,
const std::vector<std::shared_ptr<ZeroTensor>>& output_tensors,
std::string logName,
size_t batch_size = 1);

Pipeline(const Pipeline&) = delete;
Pipeline& operator=(const Pipeline&) = delete;
~Pipeline() = default;
virtual ~Pipeline() = default;

void push();
void pull();
void reset() const;
virtual void push();
virtual void pull();
virtual void reset() const;

void update_graph_arguments(uint32_t index, const std::shared_ptr<ZeroTensor>& tensor);
void update_graph_arguments(uint32_t index, const std::shared_ptr<ZeroTensor>& tensor, size_t batch_index);
virtual void update_graph_arguments(uint32_t index,
const std::shared_ptr<ZeroTensor>& tensor,
[[maybe_unused]] std::shared_ptr<ov::ITensor> userTensor = nullptr);
virtual void update_graph_arguments(uint32_t index,
const std::shared_ptr<ZeroTensor>& tensor,
size_t batch_index,
[[maybe_unused]] std::shared_ptr<ov::ITensor> userTensor = nullptr);

std::vector<ov::ProfilingInfo> get_profiling_info() const;
virtual std::vector<ov::ProfilingInfo> get_profiling_info() const;

protected:
std::shared_ptr<ZeroInitStructsHolder> _init_structs;
Expand Down
8 changes: 8 additions & 0 deletions src/plugins/intel_npu/src/backend/src/zero_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
#include "intel_npu/utils/zero/zero_api.hpp"
#include "intel_npu/utils/zero/zero_utils.hpp"
#include "zero_infer_request.hpp"
#ifdef NPU_PLUGIN_DEVELOPER_BUILD
# include "zero_dynamic_infer_request.hpp"
#endif

using namespace intel_npu;

Expand Down Expand Up @@ -176,6 +179,11 @@ ov::device::Type ZeroDevice::getDeviceType() const {
std::shared_ptr<SyncInferRequest> ZeroDevice::createInferRequest(
const std::shared_ptr<const ICompiledModel>& compiledModel,
const Config& config) {
#ifdef NPU_PLUGIN_DEVELOPER_BUILD
if (compiledModel->get_graph()->get_blob_type() == BlobType::LLVM) {
return std::make_shared<ZeroDynamicInferRequest>(_initStructs, compiledModel, config);
}
#endif
return std::make_shared<ZeroInferRequest>(_initStructs, compiledModel, config);
}

Expand Down
Loading
Loading