Skip to content

Commit e571132

Browse files
xuxinzentprimak
authored andcommitted
cpu: x64: conv: disable zp with large zp buffer on AMX
1 parent 715b476 commit e571132

File tree

2 files changed

+14
-0
lines changed

2 files changed

+14
-0
lines changed

src/cpu/x64/jit_avx512_core_amx_conv_kernel.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2372,6 +2372,12 @@ status_t jit_avx512_core_amx_fwd_kernel_t::init_conf(jit_conv_conf_t &jcp,
23722372
is_int8_convolution)),
23732373
VERBOSE_UNSUPPORTED_ZP_CFG);
23742374

2375+
// Dispatch the shapes to VNNI for better performance
2376+
const bool req_zp_large_buffer = jcp.src_zero_point
2377+
&& jcp.oc * jcp.ow > 8192 && (jcp.r_pad > 0 || jcp.l_pad > 0);
2378+
VDISPATCH_CONV_IC(!req_zp_large_buffer, VERBOSE_IMPL_HEURISTIC_FAIL,
2379+
"no optimization for zero point on AMX");
2380+
23752381
// Calculate zero-point padding values outside of the main JIT-kernel
23762382
// and store the results in an auxiliary buffer.
23772383
jcp.req_zero_point_buffer = jcp.src_zero_point

src/cpu/x64/jit_brgemm_conv_utils.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2403,6 +2403,14 @@ status_t init_conf(jit_brgemm_conv_conf_t &jcp, cpu_isa_t isa,
24032403
* comp_buffer_ow * jcp.oc_block;
24042404
jcp.s8s8_comp_buffer_size = jcp.comp_a_buffer_size;
24052405

2406+
// Dispatch the shapes to VNNI for better performance
2407+
// TODO: optimize the perf for zero point with large buffer on AMX
2408+
if (is_amx(isa) && jcp.src_zero_point && jcp.exec_type == exec_trans
2409+
&& (jcp.l_pad > 0 || jcp.r_pad > 0) && jcp.oc * jcp.ow > 8192)
2410+
VDISPATCH_CONV_IC(!allow_perf_heuristics(jcp),
2411+
VERBOSE_IMPL_HEURISTIC_FAIL,
2412+
"no optimization for zero point on amx")
2413+
24062414
// For padding shapes, we calculate the comp along with the computation
24072415
// inside brgemm kernel when output size is small to get optimal perf
24082416
// For shapes with large ow we calculate the comp inside brgemm kernel too

0 commit comments

Comments
 (0)