Skip to content

Commit 087bdfc

Browse files
committed
Fix naming of mask
1 parent 695903a commit 087bdfc

File tree

3 files changed

+24
-30
lines changed

3 files changed

+24
-30
lines changed

src/qs8-packw/gen/qs8-packw-x8c8-gemm-goi-wasmrelaxedsimd.c

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -164,21 +164,20 @@ void xnn_qs8_packw_gemm_goi_ukernel_x8c8__wasmrelaxedsimd(
164164
if (k != 0) {
165165
assert(k >= 1 && k <= 7);
166166

167-
const v128_t all_one = wasm_i32x4_splat(-1);
168-
const v128_t mask = wasm_u64x2_shr(all_one, (8 - k) * sizeof(int8_t) * 8);
167+
const v128_t vmask = wasm_u64x2_shr(wasm_i32x4_splat(-1), (8 - k) * sizeof(int8_t) * 8);
169168

170169
v128_t v01 = wasm_v128_load64_splat(w0);
171170
v01 = wasm_i64x2_shuffle(v01, wasm_v128_load64_splat(w1), 0, 3);
172-
v01 = wasm_v128_and(v01, mask);
171+
v01 = wasm_v128_and(v01, vmask);
173172
v128_t v23 = wasm_v128_load64_splat(w2);
174173
v23 = wasm_i64x2_shuffle(v23, wasm_v128_load64_splat(w3), 0, 3);
175-
v23 = wasm_v128_and(v23, mask);
174+
v23 = wasm_v128_and(v23, vmask);
176175
v128_t v45 = wasm_v128_load64_splat(w4);
177176
v45 = wasm_i64x2_shuffle(v45, wasm_v128_load64_splat(w5), 0, 3);
178-
v45 = wasm_v128_and(v45, mask);
177+
v45 = wasm_v128_and(v45, vmask);
179178
v128_t v67 = wasm_v128_load64_splat(w6);
180179
v67 = wasm_i64x2_shuffle(v67, wasm_v128_load64_splat(w7), 0, 3);
181-
v67 = wasm_v128_and(v67, mask);
180+
v67 = wasm_v128_and(v67, vmask);
182181

183182
vacc01 = wasm_i32x4_relaxed_dot_i8x16_i7x16_add(v01, vone, vacc01);
184183
vacc23 = wasm_i32x4_relaxed_dot_i8x16_i7x16_add(v23, vone, vacc23);
@@ -308,21 +307,20 @@ void xnn_qs8_packw_gemm_goi_ukernel_x8c8__wasmrelaxedsimd(
308307
if (k != 0) {
309308
assert(k >= 1 && k <= 7);
310309

311-
const v128_t all_one = wasm_i32x4_splat(-1);
312-
const v128_t mask = wasm_u64x2_shr(all_one, (8 - k) * sizeof(int8_t) * 8);
310+
const v128_t vmask = wasm_u64x2_shr(wasm_i32x4_splat(-1), (8 - k) * sizeof(int8_t) * 8);
313311

314312
v128_t v01 = wasm_v128_load64_splat(w0);
315313
v01 = wasm_i64x2_shuffle(v01, wasm_v128_load64_splat(w1), 0, 3);
316-
v01 = wasm_v128_and(v01, mask);
314+
v01 = wasm_v128_and(v01, vmask);
317315
v128_t v23 = wasm_v128_load64_splat(w2);
318316
v23 = wasm_i64x2_shuffle(v23, wasm_v128_load64_splat(w3), 0, 3);
319-
v23 = wasm_v128_and(v23, mask);
317+
v23 = wasm_v128_and(v23, vmask);
320318
v128_t v45 = wasm_v128_load64_splat(w4);
321319
v45 = wasm_i64x2_shuffle(v45, wasm_v128_load64_splat(w5), 0, 3);
322-
v45 = wasm_v128_and(v45, mask);
320+
v45 = wasm_v128_and(v45, vmask);
323321
v128_t v67 = wasm_v128_load64_splat(w6);
324322
v67 = wasm_i64x2_shuffle(v67, wasm_v128_load64_splat(w7), 0, 3);
325-
v67 = wasm_v128_and(v67, mask);
323+
v67 = wasm_v128_and(v67, vmask);
326324

327325
vacc01 = wasm_i32x4_relaxed_dot_i8x16_i7x16_add(v01, vone, vacc01);
328326
vacc23 = wasm_i32x4_relaxed_dot_i8x16_i7x16_add(v23, vone, vacc23);

src/qs8-qu8-packw/gen/qs8-qu8-packw-x8c8-gemm-goi-wasmrelaxedsimd.c

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -164,21 +164,20 @@ void xnn_qs8_to_qu8_packw_gemm_goi_ukernel_x8c8__wasmrelaxedsimd(
164164
if (k != 0) {
165165
assert(k >= 1 && k <= 7);
166166

167-
const v128_t all_one = wasm_i32x4_splat(-1);
168-
const v128_t mask = wasm_u64x2_shr(all_one, (8 - k) * sizeof(int8_t) * 8);
167+
const v128_t vmask = wasm_u64x2_shr(wasm_i32x4_splat(-1), (8 - k) * sizeof(int8_t) * 8);
169168

170169
v128_t v01 = wasm_v128_load64_splat(w0);
171170
v01 = wasm_i64x2_shuffle(v01, wasm_v128_load64_splat(w1), 0, 3);
172-
v01 = wasm_v128_and(v01, mask);
171+
v01 = wasm_v128_and(v01, vmask);
173172
v128_t v23 = wasm_v128_load64_splat(w2);
174173
v23 = wasm_i64x2_shuffle(v23, wasm_v128_load64_splat(w3), 0, 3);
175-
v23 = wasm_v128_and(v23, mask);
174+
v23 = wasm_v128_and(v23, vmask);
176175
v128_t v45 = wasm_v128_load64_splat(w4);
177176
v45 = wasm_i64x2_shuffle(v45, wasm_v128_load64_splat(w5), 0, 3);
178-
v45 = wasm_v128_and(v45, mask);
177+
v45 = wasm_v128_and(v45, vmask);
179178
v128_t v67 = wasm_v128_load64_splat(w6);
180179
v67 = wasm_i64x2_shuffle(v67, wasm_v128_load64_splat(w7), 0, 3);
181-
v67 = wasm_v128_and(v67, mask);
180+
v67 = wasm_v128_and(v67, vmask);
182181

183182
vacc01 = wasm_i32x4_relaxed_dot_i8x16_i7x16_add(v01, vone, vacc01);
184183
vacc23 = wasm_i32x4_relaxed_dot_i8x16_i7x16_add(v23, vone, vacc23);
@@ -308,21 +307,20 @@ void xnn_qs8_to_qu8_packw_gemm_goi_ukernel_x8c8__wasmrelaxedsimd(
308307
if (k != 0) {
309308
assert(k >= 1 && k <= 7);
310309

311-
const v128_t all_one = wasm_i32x4_splat(-1);
312-
const v128_t mask = wasm_u64x2_shr(all_one, (8 - k) * sizeof(int8_t) * 8);
310+
const v128_t vmask = wasm_u64x2_shr(wasm_i32x4_splat(-1), (8 - k) * sizeof(int8_t) * 8);
313311

314312
v128_t v01 = wasm_v128_load64_splat(w0);
315313
v01 = wasm_i64x2_shuffle(v01, wasm_v128_load64_splat(w1), 0, 3);
316-
v01 = wasm_v128_and(v01, mask);
314+
v01 = wasm_v128_and(v01, vmask);
317315
v128_t v23 = wasm_v128_load64_splat(w2);
318316
v23 = wasm_i64x2_shuffle(v23, wasm_v128_load64_splat(w3), 0, 3);
319-
v23 = wasm_v128_and(v23, mask);
317+
v23 = wasm_v128_and(v23, vmask);
320318
v128_t v45 = wasm_v128_load64_splat(w4);
321319
v45 = wasm_i64x2_shuffle(v45, wasm_v128_load64_splat(w5), 0, 3);
322-
v45 = wasm_v128_and(v45, mask);
320+
v45 = wasm_v128_and(v45, vmask);
323321
v128_t v67 = wasm_v128_load64_splat(w6);
324322
v67 = wasm_i64x2_shuffle(v67, wasm_v128_load64_splat(w7), 0, 3);
325-
v67 = wasm_v128_and(v67, mask);
323+
v67 = wasm_v128_and(v67, vmask);
326324

327325
vacc01 = wasm_i32x4_relaxed_dot_i8x16_i7x16_add(v01, vone, vacc01);
328326
vacc23 = wasm_i32x4_relaxed_dot_i8x16_i7x16_add(v23, vone, vacc23);

src/x8-packw/kr-wasmdot.c.in

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -118,13 +118,12 @@ void xnn_qs8${"_to_qu8" if IZP == 128 else ""}_packw_gemm_goi_ukernel_x${NR}c${K
118118
if (k != 0) {
119119
assert(k >= 1 && k <= ${KR-1});
120120

121-
const v128_t all_one = wasm_i32x4_splat(-1);
122-
const v128_t mask = wasm_u64x2_shr(all_one, (${KR} - k) * sizeof(${WTYPE}) * 8);
121+
const v128_t vmask = wasm_u64x2_shr(wasm_i32x4_splat(-1), (${KR} - k) * sizeof(${WTYPE}) * 8);
123122

124123
$for N in range(0, NR, 2):
125124
v128_t v${ABC[N:N+2]} = wasm_v128_load64_splat(w${N});
126125
v${ABC[N:N+2]} = wasm_i64x2_shuffle(v${ABC[N:N+2]}, wasm_v128_load64_splat(w${N+1}), 0, 3);
127-
v${ABC[N:N+2]} = wasm_v128_and(v${ABC[N:N+2]}, mask);
126+
v${ABC[N:N+2]} = wasm_v128_and(v${ABC[N:N+2]}, vmask);
128127

129128
$for N in range(0, NR, 2):
130129
vacc${ABC[N:N+2]} = wasm_i32x4_relaxed_dot_i8x16_i7x16_add(v${ABC[N:N+2]}, vone, vacc${ABC[N:N+2]});
@@ -209,13 +208,12 @@ void xnn_qs8${"_to_qu8" if IZP == 128 else ""}_packw_gemm_goi_ukernel_x${NR}c${K
209208
if (k != 0) {
210209
assert(k >= 1 && k <= ${KR-1});
211210

212-
const v128_t all_one = wasm_i32x4_splat(-1);
213-
const v128_t mask = wasm_u64x2_shr(all_one, (${KR} - k) * sizeof(${WTYPE}) * 8);
211+
const v128_t vmask = wasm_u64x2_shr(wasm_i32x4_splat(-1), (${KR} - k) * sizeof(${WTYPE}) * 8);
214212

215213
$for N in range(0, NR, 2):
216214
v128_t v${ABC[N:N+2]} = wasm_v128_load64_splat(w${N});
217215
v${ABC[N:N+2]} = wasm_i64x2_shuffle(v${ABC[N:N+2]}, wasm_v128_load64_splat(w${N+1}), 0, 3);
218-
v${ABC[N:N+2]} = wasm_v128_and(v${ABC[N:N+2]}, mask);
216+
v${ABC[N:N+2]} = wasm_v128_and(v${ABC[N:N+2]}, vmask);
219217

220218
$for N in range(0, NR, 2):
221219
vacc${ABC[N:N+2]} = wasm_i32x4_relaxed_dot_i8x16_i7x16_add(v${ABC[N:N+2]}, vone, vacc${ABC[N:N+2]});

0 commit comments

Comments
 (0)