Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CPU] onednn 3.6 #28174

Closed
wants to merge 10 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,12 @@ jit_dnnl_emitter::jit_dnnl_emitter(jit_generator* host,

void jit_dnnl_emitter::set_injector() {
if (host_isa_ == cpu::x64::sse41) {
eltwise_injector_sse42 =
std::make_shared<jit_uni_eltwise_injector_f32<cpu::x64::sse41>>(h, kind, alpha, beta, 1.f);
eltwise_injector_sse42 = std::make_shared<jit_uni_eltwise_injector<cpu::x64::sse41>>(h, kind, alpha, beta, 1.f);
} else if (host_isa_ == cpu::x64::avx2) {
eltwise_injector_avx2 =
std::make_shared<jit_uni_eltwise_injector_f32<cpu::x64::avx2>>(h, kind, alpha, beta, 1.f);
eltwise_injector_avx2 = std::make_shared<jit_uni_eltwise_injector<cpu::x64::avx2>>(h, kind, alpha, beta, 1.f);
} else if (host_isa_ == cpu::x64::avx512_core) {
eltwise_injector_avx512_core =
std::make_shared<jit_uni_eltwise_injector_f32<cpu::x64::avx512_core>>(h, kind, alpha, beta, 1.f);
std::make_shared<jit_uni_eltwise_injector<cpu::x64::avx512_core>>(h, kind, alpha, beta, 1.f);
} else {
OV_CPU_JIT_EMITTER_THROW("Unsupported ISA ", host_isa_);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,9 @@ class jit_dnnl_emitter : public jit_emitter {
float alpha{0.f};
float beta{0.f};

std::shared_ptr<dnnl::impl::cpu::x64::jit_uni_eltwise_injector_f32<dnnl::impl::cpu::x64::sse41>>
eltwise_injector_sse42;
std::shared_ptr<dnnl::impl::cpu::x64::jit_uni_eltwise_injector_f32<dnnl::impl::cpu::x64::avx2>>
eltwise_injector_avx2;
std::shared_ptr<dnnl::impl::cpu::x64::jit_uni_eltwise_injector_f32<dnnl::impl::cpu::x64::avx512_core>>
std::shared_ptr<dnnl::impl::cpu::x64::jit_uni_eltwise_injector<dnnl::impl::cpu::x64::sse41>> eltwise_injector_sse42;
std::shared_ptr<dnnl::impl::cpu::x64::jit_uni_eltwise_injector<dnnl::impl::cpu::x64::avx2>> eltwise_injector_avx2;
std::shared_ptr<dnnl::impl::cpu::x64::jit_uni_eltwise_injector<dnnl::impl::cpu::x64::avx512_core>>
eltwise_injector_avx512_core;

private:
Expand Down
13 changes: 7 additions & 6 deletions src/plugins/intel_cpu/src/nodes/bin_conv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,12 @@ struct jit_uni_bin_conv_kernel_f32 : public jit_uni_bin_conv_kernel, public jit_
for (int i = 0; i < end_idx; i++) {
auto& post_op = p.entry_[i];
if (post_op.is_eltwise()) {
eltwise_injectors.push_back(std::make_shared<jit_uni_eltwise_injector_f32<isa>>(this,
post_op.eltwise,
true,
eltwise_reserved,
mask_post_op_reserved));
eltwise_injectors.push_back(std::make_shared<jit_uni_eltwise_injector<isa>>(this,
post_op.eltwise,
data_type::f32,
true,
eltwise_reserved,
mask_post_op_reserved));
} else if (post_op.is_depthwise()) {
depthwise_injectors.push_back(
std::make_shared<jit_uni_depthwise_injector_f32<isa>>(this, post_op, mask_post_op_reserved));
Expand Down Expand Up @@ -217,7 +218,7 @@ struct jit_uni_bin_conv_kernel_f32 : public jit_uni_bin_conv_kernel, public jit_

Xbyak::Label l_table;

nstl::vector<std::shared_ptr<jit_uni_eltwise_injector_f32<isa>>> eltwise_injectors;
nstl::vector<std::shared_ptr<jit_uni_eltwise_injector<isa>>> eltwise_injectors;
nstl::vector<std::shared_ptr<jit_uni_depthwise_injector_f32<isa>>> depthwise_injectors;

void cvt2ps(dnnl::memory::data_type type_in, Vmm vmm_in, const Xbyak::Operand& op, bool scalar_load) {
Expand Down
5 changes: 2 additions & 3 deletions src/plugins/intel_cpu/src/nodes/common/softmax.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,7 @@ struct jit_uni_softmax_kernel_f32 : public jit_uni_softmax_kernel, public jit_ge
}

void generate() override {
exp_injector.reset(
new jit_uni_eltwise_injector_f32<isa>(this, dnnl::impl::alg_kind::eltwise_exp, 0.f, 0.f, 1.0f));
exp_injector.reset(new jit_uni_eltwise_injector<isa>(this, dnnl::impl::alg_kind::eltwise_exp, 0.f, 0.f, 1.0f));

if (mayiuse(avx512_core))
uni_vcvtneps2bf16.reset(new jit_uni_vcvtneps2bf16(this, isa));
Expand Down Expand Up @@ -200,7 +199,7 @@ struct jit_uni_softmax_kernel_f32 : public jit_uni_softmax_kernel, public jit_ge

std::unique_ptr<jit_uni_vcvtneps2bf16> uni_vcvtneps2bf16;

std::shared_ptr<jit_uni_eltwise_injector_f32<isa>> exp_injector;
std::shared_ptr<jit_uni_eltwise_injector<isa>> exp_injector;

jit_softmax_config_params jcp_;

Expand Down
12 changes: 6 additions & 6 deletions src/plugins/intel_cpu/src/nodes/interpolate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,11 @@ struct jit_uni_interpolate_kernel_f32 : public jit_uni_interpolate_kernel, publi
for (int i = 0; i < p.len(); i++) {
auto& post_op = p.entry_[i];
if (post_op.is_eltwise()) {
eltwise_injectors.push_back(std::make_shared<jit_uni_eltwise_injector_f32<isa>>(this,
post_op.eltwise.alg,
post_op.eltwise.alpha,
post_op.eltwise.beta,
1.f));
eltwise_injectors.push_back(std::make_shared<jit_uni_eltwise_injector<isa>>(this,
post_op.eltwise.alg,
post_op.eltwise.alpha,
post_op.eltwise.beta,
1.f));
} else if (post_op.is_depthwise()) {
depthwise_injectors.push_back(std::make_shared<jit_uni_depthwise_injector_f32<isa>>(this, post_op));
} else if (post_op.is_quantization()) {
Expand Down Expand Up @@ -275,7 +275,7 @@ struct jit_uni_interpolate_kernel_f32 : public jit_uni_interpolate_kernel, publi
std::vector<size_t> store_pool_vec_idxs;
std::vector<size_t> load_pool_gpr_idxs;

std::vector<std::shared_ptr<jit_uni_eltwise_injector_f32<isa>>> eltwise_injectors;
std::vector<std::shared_ptr<jit_uni_eltwise_injector<isa>>> eltwise_injectors;
std::vector<std::shared_ptr<jit_uni_depthwise_injector_f32<isa>>> depthwise_injectors;
std::vector<std::shared_ptr<jit_uni_quantization_injector_f32<isa>>> quantization_injectors;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ void BrgemmKernel::init_brgemm_copy_b(
brgemm_matmul_conf_t brgCopyKernelConf;
brgCopyKernelConf.src_dt = is_avx_f16_only ? dnnl_data_type_t::dnnl_f32 : dt_in0;
brgCopyKernelConf.wei_dt = is_avx_f16_only ? dnnl_data_type_t::dnnl_f32 : dt_in1;
brgCopyKernelConf.orig_wei_dt = dt_in1;
brgCopyKernelConf.orig_wei_dt = static_cast<dnnl_data_type_t>(DnnlExtensionUtils::ElementTypeToDataType(inType));
brgCopyKernelConf.wei_n_blk = N_blk;
brgCopyKernelConf.wei_tag = transpose ? dnnl_ba : dnnl_ab;
brgCopyKernelConf.copy_B_wei_stride = copy_B_wei_stride;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -550,12 +550,13 @@ void GateUpCombine::generate() {
const auto zmm_up = zmm0;
const auto ymm_dst = ymm5;

auto injector = std::make_shared<jit_uni_eltwise_injector_f32<dnnl::impl::cpu::x64::avx512_core>>(
auto injector = std::make_shared<jit_uni_eltwise_injector<dnnl::impl::cpu::x64::avx512_core>>(
this,
m_act_alg,
1.f,
1.0f,
1.f,
data_type::f32,
true, // save_state, true due to additional r15 is used.
Xbyak::Reg64(Xbyak::Operand::R10), // p_table
Xbyak::Opmask(1), // k_mask
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@ void NonMaxSuppression<isa>::generate() {
load_vector_emitter.reset(new jit_load_emitter(this, isa, ov::element::f32, ov::element::f32, vector_step));
load_scalar_emitter.reset(new jit_load_emitter(this, isa, ov::element::f32, ov::element::f32, scalar_step));

exp_injector.reset(
new x64::jit_uni_eltwise_injector_f32<isa>(this, dnnl::impl::alg_kind::eltwise_exp, 0.f, 0.f, 1.f));
exp_injector.reset(new x64::jit_uni_eltwise_injector<isa>(this, dnnl::impl::alg_kind::eltwise_exp, 0.f, 0.f, 1.f));

this->preamble();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ class NonMaxSuppression : public JitKernel<NmsCompileParams, NmsCallArgs> {
Xbyak::Opmask k_mask = Xbyak::Opmask(7);
Xbyak::Opmask k_mask_one = Xbyak::Opmask(6);

std::shared_ptr<dnnl::impl::cpu::x64::jit_uni_eltwise_injector_f32<isa>> exp_injector;
std::shared_ptr<dnnl::impl::cpu::x64::jit_uni_eltwise_injector<isa>> exp_injector;

inline void hard_nms();

Expand Down
12 changes: 6 additions & 6 deletions src/plugins/intel_cpu/src/nodes/mvn.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -949,11 +949,11 @@ struct jit_uni_mvn_kernel_f32 : public jit_uni_mvn_kernel, public jit_generator
for (int i = 0; i < p.len(); i++) {
auto& post_op = p.entry_[i];
if (post_op.is_eltwise()) {
eltwise_injectors.push_back(std::make_shared<jit_uni_eltwise_injector_f32<isa>>(this,
post_op.eltwise.alg,
post_op.eltwise.alpha,
post_op.eltwise.beta,
post_op.eltwise.scale));
eltwise_injectors.push_back(std::make_shared<jit_uni_eltwise_injector<isa>>(this,
post_op.eltwise.alg,
post_op.eltwise.alpha,
post_op.eltwise.beta,
post_op.eltwise.scale));
} else if (post_op.is_depthwise()) {
depthwise_injectors.push_back(std::make_shared<jit_uni_depthwise_injector_f32<isa>>(this, post_op));
} else if (post_op.is_quantization()) {
Expand Down Expand Up @@ -1093,7 +1093,7 @@ struct jit_uni_mvn_kernel_f32 : public jit_uni_mvn_kernel, public jit_generator

const int tile_size[kTileNum] = {8, 4, 2, 1};

std::vector<std::shared_ptr<jit_uni_eltwise_injector_f32<isa>>> eltwise_injectors;
std::vector<std::shared_ptr<jit_uni_eltwise_injector<isa>>> eltwise_injectors;
std::vector<std::shared_ptr<jit_uni_depthwise_injector_f32<isa>>> depthwise_injectors;
std::vector<std::shared_ptr<jit_uni_quantization_injector_f32<isa>>> quantization_injectors;

Expand Down
12 changes: 6 additions & 6 deletions src/plugins/intel_cpu/src/nodes/normalize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,11 +224,11 @@ struct jit_uni_normalize_kernel_f32 : public jit_uni_normalize_kernel, public ji
for (int i = 0; i < p.len(); i++) {
auto& post_op = p.entry_[i];
if (post_op.is_eltwise()) {
eltwise_injectors.push_back(std::make_shared<jit_uni_eltwise_injector_f32<isa>>(this,
post_op.eltwise.alg,
post_op.eltwise.alpha,
post_op.eltwise.beta,
post_op.eltwise.scale));
eltwise_injectors.push_back(std::make_shared<jit_uni_eltwise_injector<isa>>(this,
post_op.eltwise.alg,
post_op.eltwise.alpha,
post_op.eltwise.beta,
post_op.eltwise.scale));
} else if (post_op.is_depthwise()) {
depthwise_injectors.push_back(std::make_shared<jit_uni_depthwise_injector_f32<isa>>(this, post_op));
} else if (post_op.is_quantization()) {
Expand Down Expand Up @@ -310,7 +310,7 @@ struct jit_uni_normalize_kernel_f32 : public jit_uni_normalize_kernel, public ji

std::unique_ptr<jit_uni_vcvtneps2bf16> uni_vcvtneps2bf16 = nullptr;

std::vector<std::shared_ptr<jit_uni_eltwise_injector_f32<isa>>> eltwise_injectors;
std::vector<std::shared_ptr<jit_uni_eltwise_injector<isa>>> eltwise_injectors;
std::vector<std::shared_ptr<jit_uni_depthwise_injector_f32<isa>>> depthwise_injectors;
std::vector<std::shared_ptr<jit_uni_quantization_injector_f32<isa>>> quantization_injectors;

Expand Down
22 changes: 10 additions & 12 deletions src/plugins/intel_cpu/src/nodes/reduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,7 @@ struct jit_uni_reduce_kernel_f32 : public jit_uni_reduce_kernel, public jit_gene

void generate() override {
if (jcp_.reduce_mode == Algorithm::ReduceLogSumExp) {
exp_injector =
std::make_shared<jit_uni_eltwise_injector_f32<isa>>(this, alg_kind::eltwise_exp, 0.f, 0.f, 1.f);
exp_injector = std::make_shared<jit_uni_eltwise_injector<isa>>(this, alg_kind::eltwise_exp, 0.f, 0.f, 1.f);
}

if (mayiuse(avx512_core))
Expand Down Expand Up @@ -244,7 +243,7 @@ struct jit_uni_reduce_kernel_f32 : public jit_uni_reduce_kernel, public jit_gene
Xbyak::Label l_table;

std::shared_ptr<jit_uni_vcvtneps2bf16> uni_vcvtneps2bf16;
std::shared_ptr<jit_uni_eltwise_injector_f32<isa>> exp_injector;
std::shared_ptr<jit_uni_eltwise_injector<isa>> exp_injector;

inline void reduce_main() {
// ================================================================
Expand Down Expand Up @@ -1206,11 +1205,11 @@ struct jit_uni_reduce_post_kernel_f32 : public jit_uni_reduce_post_kernel, publi
for (int i = 0; i < p.len(); i++) {
auto& post_op = p.entry_[i];
if (post_op.is_eltwise()) {
eltwise_injectors.push_back(std::make_shared<jit_uni_eltwise_injector_f32<isa>>(this,
post_op.eltwise.alg,
post_op.eltwise.alpha,
post_op.eltwise.beta,
post_op.eltwise.scale));
eltwise_injectors.push_back(std::make_shared<jit_uni_eltwise_injector<isa>>(this,
post_op.eltwise.alg,
post_op.eltwise.alpha,
post_op.eltwise.beta,
post_op.eltwise.scale));
} else if (post_op.is_depthwise()) {
depthwise_injectors.push_back(std::make_shared<jit_uni_depthwise_injector_f32<isa>>(this, post_op));
} else if (post_op.is_quantization()) {
Expand All @@ -1224,8 +1223,7 @@ struct jit_uni_reduce_post_kernel_f32 : public jit_uni_reduce_post_kernel, publi
}

if (jcp_.reduce_mode == Algorithm::ReduceLogSum || jcp_.reduce_mode == Algorithm::ReduceLogSumExp) {
log_injector =
std::make_shared<jit_uni_eltwise_injector_f32<isa>>(this, alg_kind::eltwise_log, 0.f, 0.f, 1.f);
log_injector = std::make_shared<jit_uni_eltwise_injector<isa>>(this, alg_kind::eltwise_log, 0.f, 0.f, 1.f);
}

if (mayiuse(avx512_core))
Expand Down Expand Up @@ -1336,9 +1334,9 @@ struct jit_uni_reduce_post_kernel_f32 : public jit_uni_reduce_post_kernel, publi
Vmm vmm_d_bias = Vmm(8);

std::shared_ptr<jit_uni_vcvtneps2bf16> uni_vcvtneps2bf16;
std::shared_ptr<jit_uni_eltwise_injector_f32<isa>> log_injector;
std::shared_ptr<jit_uni_eltwise_injector<isa>> log_injector;

std::vector<std::shared_ptr<jit_uni_eltwise_injector_f32<isa>>> eltwise_injectors;
std::vector<std::shared_ptr<jit_uni_eltwise_injector<isa>>> eltwise_injectors;
std::vector<std::shared_ptr<jit_uni_depthwise_injector_f32<isa>>> depthwise_injectors;
std::vector<std::shared_ptr<jit_uni_quantization_injector_f32<isa>>> quantization_injectors;

Expand Down
5 changes: 2 additions & 3 deletions src/plugins/intel_cpu/src/nodes/region_yolo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,7 @@ struct jit_uni_logistic_kernel_f32 : public jit_uni_logistic_kernel, public jit_
}

void generate() override {
exp_injector.reset(
new jit_uni_eltwise_injector_f32<isa>(this, dnnl::impl::alg_kind::eltwise_exp, 0.f, 0.f, 1.f));
exp_injector.reset(new jit_uni_eltwise_injector<isa>(this, dnnl::impl::alg_kind::eltwise_exp, 0.f, 0.f, 1.f));

if (mayiuse(avx512_core))
uni_vcvtneps2bf16.reset(new jit_uni_vcvtneps2bf16(this, isa));
Expand Down Expand Up @@ -134,7 +133,7 @@ struct jit_uni_logistic_kernel_f32 : public jit_uni_logistic_kernel, public jit_

Xbyak::Label l_table;

std::shared_ptr<jit_uni_eltwise_injector_f32<isa>> exp_injector;
std::shared_ptr<jit_uni_eltwise_injector<isa>> exp_injector;

jit_logistic_config_params jcp_;

Expand Down
3 changes: 2 additions & 1 deletion src/plugins/intel_cpu/src/utils/verbose.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,8 @@ void Verbose::printInfo() {
shift(written);
written = snprintf(portsInfo + written_total, CPU_VERBOSE_DAT_LEN - written_total, "%s", prefix.c_str());
shift(written);
std::string fmt_str = dnnl::impl::md2fmt_str(desc, dnnl::impl::format_kind_t::dnnl_format_kind_undef);
std::string fmt_str =
dnnl::impl::md2fmt_str(prefix.c_str(), desc, dnnl::impl::format_kind_t::dnnl_format_kind_undef);
written = snprintf(portsInfo + written_total, CPU_VERBOSE_DAT_LEN - written_total, "%s", fmt_str.c_str());
shift(written);
written = snprintf(portsInfo + written_total, CPU_VERBOSE_DAT_LEN - written_total, ":");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,9 @@ std::vector<std::string> disabledTestPatterns() {
// tests are useless on such platforms
retVector.emplace_back(R"(.*(BF|bf)16.*)");
retVector.emplace_back(R"(.*bfloat16.*)");
// Issue: MFDNN-12876
retVector.emplace_back(R"(.*smoke_LPT/RecurrentCellTransformation.CompareWithRefImpl/f32_\[1,1,3\]_CPU_f32FQ_X_level=256_.*_FQ_W_level=255.*)");
retVector.emplace_back(R"(.*smoke_static/ConvertFqRnnToQuantizedRnn.CompareWithRefs/Type=GRUSequence.*2.5.10.*2.1.4.*2.1.4.*)");
}
if (!ov::with_cpu_x86_avx2()) {
// MatMul in Snippets uses BRGEMM that is supported only on AVX2 (and newer) platforms
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_cpu/thirdparty/onednn
Submodule onednn updated 1679 files
Loading