Skip to content

Commit b453bb0

Browse files
inteldimitriustprimak
authored andcommitted
x64: matmul: Weight decompression fix group blocking
1 parent 240f653 commit b453bb0

File tree

2 files changed

+32
-6
lines changed

2 files changed

+32
-6
lines changed

src/cpu/x64/matmul/brgemm_matmul.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1304,19 +1304,19 @@ void brgemm_matmul_t<isa>::copy_b_chunk_in_buffer(
13041304

13051305
// Handle first block
13061306
if (k_start % adj_k_blk > 0) {
1307-
call_copy_kernel(k_start, k_start % adj_k_blk, 0);
1308-
k += k_start % adj_k_blk;
1307+
const auto first_blk_size = adj_k_blk - (k_start % adj_k_blk);
1308+
call_copy_kernel(k_start, first_blk_size, 0);
1309+
k += first_blk_size;
13091310
}
13101311
// Handle full blocks
1311-
for (; k < k_end; k += adj_k_blk) {
1312+
for (; (k + adj_k_blk) <= k_end; k += adj_k_blk) {
13121313
const auto gb = (k - k_start) / bgmmc.K_blk;
13131314
call_copy_kernel(k, adj_k_blk, gb);
13141315
}
13151316
// Handle last block
1316-
if (k_end % adj_k_blk > 0) {
1317-
k -= adj_k_blk;
1317+
if (k_end > k) {
13181318
const auto gb = (k - k_start) / bgmmc.K_blk;
1319-
call_copy_kernel(k, k_end % adj_k_blk, gb);
1319+
call_copy_kernel(k, k_end - k, gb);
13201320
}
13211321
} else { // Default case with k_blk blocking
13221322
for (int gb = 0; gb < gemm_batch; ++gb) {

tests/benchdnn/inputs/matmul/harness_matmul_decompression

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,3 +330,29 @@
330330
--attr-zero-points=,wei:common:2,wei:per_ocic:s8:32x1
331331
--attr-fpmath=strict:true
332332
1x4096:4096x4096
333+
334+
335+
## Additional grouped scales/ZP testing
336+
--reset
337+
--dt=f16:s4:f16
338+
--wtag=abc,acb
339+
--attr-scales=,wei:common:2,wei:per_oc:f16,wei:per_ocic:f16:192x1
340+
--attr-zero-points=,wei:common:2,wei:per_oc:s4,wei:per_ocic:s4:192x1
341+
--attr-fpmath=f16:true
342+
12x4x576:12x576x192
343+
344+
--reset
345+
--dt=bf16:s4:bf16
346+
--wtag=abc,acb
347+
--attr-scales=,wei:common:2,wei:per_oc:f16,wei:per_ocic:f16:192x1
348+
--attr-zero-points=,wei:common:2,wei:per_oc:s4,wei:per_ocic:s4:192x1
349+
--attr-fpmath=bf16:true
350+
12x4x576:12x576x192
351+
352+
--reset
353+
--dt=f32:s4:f32
354+
--wtag=abc,acb
355+
--attr-scales=,wei:common:2,wei:per_oc:f16,wei:per_ocic:f16:192x1
356+
--attr-zero-points=,wei:common:2,wei:per_oc:s4,wei:per_ocic:s4:192x1
357+
--attr-fpmath=strict:true
358+
12x4x576:12x576x192

0 commit comments

Comments
 (0)