Skip to content

Commit 75af08c

Browse files
CarterLi999carter.li
and
carter.li
authored
ggml: bugfix: fix the inactive elements is agnostic for risc-v vector (#8748)
In these codes, we want to retain the value that they previously held when mask[i] is false. So we should use undisturbed. With the default agnostic policy of rvv intrinsic, these values can be held or be written with 1s. Co-authored-by: carter.li <[email protected]>
1 parent 439b3fc commit 75af08c

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

ggml/src/ggml-quants.c

+6-6
Original file line numberDiff line numberDiff line change
@@ -6449,22 +6449,22 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * restrict s, size_t bs, const void * r
64496449
// compute mask for subtraction
64506450
vuint8m1_t qh_m0 = __riscv_vand_vx_u8m1(vqh, m, vl);
64516451
vbool8_t vmask_0 = __riscv_vmseq_vx_u8m1_b8(qh_m0, 0, vl);
6452-
vint8m1_t q3_m0 = __riscv_vsub_vx_i8m1_m(vmask_0, q3_0, 0x4, vl);
6452+
vint8m1_t q3_m0 = __riscv_vsub_vx_i8m1_mu(vmask_0, q3_0, q3_0, 0x4, vl);
64536453
m <<= 1;
64546454

64556455
vuint8m1_t qh_m1 = __riscv_vand_vx_u8m1(vqh, m, vl);
64566456
vbool8_t vmask_1 = __riscv_vmseq_vx_u8m1_b8(qh_m1, 0, vl);
6457-
vint8m1_t q3_m1 = __riscv_vsub_vx_i8m1_m(vmask_1, q3_1, 0x4, vl);
6457+
vint8m1_t q3_m1 = __riscv_vsub_vx_i8m1_mu(vmask_1, q3_1, q3_1, 0x4, vl);
64586458
m <<= 1;
64596459

64606460
vuint8m1_t qh_m2 = __riscv_vand_vx_u8m1(vqh, m, vl);
64616461
vbool8_t vmask_2 = __riscv_vmseq_vx_u8m1_b8(qh_m2, 0, vl);
6462-
vint8m1_t q3_m2 = __riscv_vsub_vx_i8m1_m(vmask_2, q3_2, 0x4, vl);
6462+
vint8m1_t q3_m2 = __riscv_vsub_vx_i8m1_mu(vmask_2, q3_2, q3_2, 0x4, vl);
64636463
m <<= 1;
64646464

64656465
vuint8m1_t qh_m3 = __riscv_vand_vx_u8m1(vqh, m, vl);
64666466
vbool8_t vmask_3 = __riscv_vmseq_vx_u8m1_b8(qh_m3, 0, vl);
6467-
vint8m1_t q3_m3 = __riscv_vsub_vx_i8m1_m(vmask_3, q3_3, 0x4, vl);
6467+
vint8m1_t q3_m3 = __riscv_vsub_vx_i8m1_mu(vmask_3, q3_3, q3_3, 0x4, vl);
64686468
m <<= 1;
64696469

64706470
// load Q8 and take product with Q3
@@ -7720,13 +7720,13 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, size_t bs, const void * r
77207720
vint8m1_t q5_a = __riscv_vreinterpret_v_u8m1_i8m1(__riscv_vand_vx_u8m1(q5_x, 0x0F, vl));
77217721
vuint8m1_t qh_m1 = __riscv_vand_vx_u8m1(vqh, m, vl);
77227722
vbool8_t vmask_1 = __riscv_vmsne_vx_u8m1_b8(qh_m1, 0, vl);
7723-
vint8m1_t q5_m1 = __riscv_vadd_vx_i8m1_m(vmask_1, q5_a, 16, vl);
7723+
vint8m1_t q5_m1 = __riscv_vadd_vx_i8m1_mu(vmask_1, q5_a, q5_a, 16, vl);
77247724
m <<= 1;
77257725

77267726
vint8m1_t q5_l = __riscv_vreinterpret_v_u8m1_i8m1(__riscv_vsrl_vx_u8m1(q5_x, 0x04, vl));
77277727
vuint8m1_t qh_m2 = __riscv_vand_vx_u8m1(vqh, m, vl);
77287728
vbool8_t vmask_2 = __riscv_vmsne_vx_u8m1_b8(qh_m2, 0, vl);
7729-
vint8m1_t q5_m2 = __riscv_vadd_vx_i8m1_m(vmask_2, q5_l, 16, vl);
7729+
vint8m1_t q5_m2 = __riscv_vadd_vx_i8m1_mu(vmask_2, q5_l, q5_l, 16, vl);
77307730
m <<= 1;
77317731

77327732
vint16m2_t v0 = __riscv_vwmul_vv_i16m2(q5_m1, q8_y1, vl);

0 commit comments

Comments
 (0)