Skip to content

Commit 345163f

Browse files
authored
Added UNFOLD + DQ in default configs for LLMCompiledModel (#28298)
### Details: - *Trasnfer updated default configs from GenAI to LLMCompiledModel*
1 parent 8a19942 commit 345163f

File tree

1 file changed

+32
-6
lines changed

1 file changed

+32
-6
lines changed

src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp

+32-6
Original file line numberDiff line numberDiff line change
@@ -297,12 +297,20 @@ bool is_cw_compressed(const std::shared_ptr<ov::Model>& model) {
297297
struct NPUDesc {
298298
std::string arch;
299299
int64_t max_tiles;
300+
bool compiler_dq;
300301
};
301302

302303
std::optional<NPUDesc> extract_npu_descriptor(const std::shared_ptr<const ov::IPlugin>& plugin) {
303-
const ov::Any arch = plugin->get_property(ov::device::architecture.name(), ov::AnyMap{});
304-
const ov::Any max_tiles = plugin->get_property(ov::intel_npu::max_tiles.name(), ov::AnyMap{});
305-
return std::make_optional(NPUDesc{arch.as<std::string>(), max_tiles.as<int64_t>()});
304+
const std::string arch = plugin->get_property(ov::device::architecture.name(), ov::AnyMap{}).as<std::string>();
305+
const int64_t max_tiles = plugin->get_property(ov::intel_npu::max_tiles.name(), ov::AnyMap{}).as<int64_t>();
306+
307+
bool compiler_dq = false;
308+
const auto device_caps =
309+
plugin->get_property(ov::device::capabilities.name(), ov::AnyMap{}).as<std::vector<std::string>>();
310+
if (std::find(device_caps.begin(), device_caps.end(), "COMPILER_DYNAMIC_QUANTIZATION") != device_caps.end()) {
311+
compiler_dq = true;
312+
}
313+
return std::make_optional(NPUDesc{arch, max_tiles, compiler_dq});
306314
}
307315

308316
std::optional<ov::Any> pop_option(ov::AnyMap& config, const std::string& option_name) {
@@ -349,6 +357,9 @@ ov::AnyMap get_default_prefill_config(const std::shared_ptr<ov::Model>& model, c
349357
if (npudesc.has_value() && npudesc->arch == "4000" && npudesc->max_tiles != -1) {
350358
config.emplace("NPU_DPU_GROUPS", npudesc->max_tiles);
351359
}
360+
if (npudesc.has_value() && npudesc->compiler_dq) {
361+
config.emplace("NPUW_DQ_FULL", "NO");
362+
}
352363
return config;
353364
}
354365

@@ -364,6 +375,12 @@ ov::AnyMap get_default_generate_config(const std::shared_ptr<ov::Model>& model,
364375
if (npudesc.has_value() && npudesc->arch == "4000") {
365376
config.emplace("NPU_DPU_GROUPS", 4);
366377
}
378+
if (hint == ::intel_npu::npuw::llm::GenerateHint::FAST_COMPILE) {
379+
config.emplace("NPUW_UNFOLD_IREQS", "YES");
380+
}
381+
if (npudesc.has_value() && npudesc->compiler_dq) {
382+
config.emplace("NPUW_DQ_FULL", "NO");
383+
}
367384
return config;
368385
}
369386

@@ -468,13 +485,22 @@ ov::npuw::LLMCompiledModel::LLMCompiledModel(const std::shared_ptr<ov::Model>& m
468485
OPENVINO_THROW("GENERATE_HINT is only applicable for default generate config!");
469486
}
470487
auto generate_config =
471-
generate_config_opt.value_or(get_default_generate_config(model, npudesc, generate_hint)).as<ov::AnyMap>();
488+
generate_config_opt.value_or(get_default_generate_config(kvcache_model, npudesc, generate_hint))
489+
.as<ov::AnyMap>();
472490

473491
merge_config_with(prefill_config, other_props);
474492
merge_config_with(generate_config, other_props);
475493

476-
m_kvcache_compiled = std::make_shared<ov::npuw::CompiledModel>(kvcache_model, plugin, generate_config);
477-
m_prefill_compiled = std::make_shared<ov::npuw::CompiledModel>(prefill_model, plugin, prefill_config);
494+
m_kvcache_compiled = std::dynamic_pointer_cast<ov::npuw::CompiledModel>(
495+
ov::npuw::ICompiledModel::create(kvcache_model, plugin, generate_config));
496+
OPENVINO_ASSERT(m_kvcache_compiled,
497+
"Can't create ov::npuw::CompiledModel for passed kvcache "
498+
"model and its config, please check passed config.");
499+
m_prefill_compiled = std::dynamic_pointer_cast<ov::npuw::CompiledModel>(
500+
ov::npuw::ICompiledModel::create(prefill_model, plugin, prefill_config));
501+
OPENVINO_ASSERT(m_prefill_compiled,
502+
"Can't create ov::npuw::CompiledModel for passed prefill "
503+
"model and its config, please check passed config.");
478504

479505
implement_properties();
480506
LOG_DEBUG("Done");

0 commit comments

Comments
 (0)