Skip to content

Commit

Permalink
Fix undefined behavior (left shift of negative value)
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 689234832
  • Loading branch information
dsharletg authored and xnnpack-bot committed Oct 24, 2024
1 parent e821f66 commit 2468fc2
Show file tree
Hide file tree
Showing 64 changed files with 64 additions and 64 deletions.
2 changes: 1 addition & 1 deletion src/qs16-qs8-vcvt/gen/qs16-qs8-vcvt-avx-u16.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ void xnn_qs16_qs8_vcvt_ukernel__avx_u16(
};

const __m128i vmultiplier = _mm_set1_epi32(params->scalar.multiplier);
const __m128i vbias = _mm_set1_epi64x((int64_t) ((uint64_t) params->scalar.output_zero_point << 32) + INT64_C(0x80000000));
const __m128i vbias = _mm_set1_epi64x((int64_t) (((uint64_t) (int64_t) params->scalar.output_zero_point) << 32) + INT64_C(0x80000000));
XNN_FORCE_REALIZATION(vmultiplier);
XNN_FORCE_REALIZATION(vbias);
const __m128i vshuffle01 = _mm_load_si128((const __m128i*) shuffle01);
Expand Down
2 changes: 1 addition & 1 deletion src/qs16-qs8-vcvt/gen/qs16-qs8-vcvt-avx-u4.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ void xnn_qs16_qs8_vcvt_ukernel__avx_u4(
};

const __m128i vmultiplier = _mm_set1_epi32(params->scalar.multiplier);
const __m128i vbias = _mm_set1_epi64x((int64_t) ((uint64_t) params->scalar.output_zero_point << 32) + INT64_C(0x80000000));
const __m128i vbias = _mm_set1_epi64x((int64_t) (((uint64_t) (int64_t) params->scalar.output_zero_point) << 32) + INT64_C(0x80000000));
XNN_FORCE_REALIZATION(vmultiplier);
XNN_FORCE_REALIZATION(vbias);
const __m128i vshuffle01 = _mm_load_si128((const __m128i*) shuffle01);
Expand Down
2 changes: 1 addition & 1 deletion src/qs16-qs8-vcvt/gen/qs16-qs8-vcvt-avx-u8.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ void xnn_qs16_qs8_vcvt_ukernel__avx_u8(
};

const __m128i vmultiplier = _mm_set1_epi32(params->scalar.multiplier);
const __m128i vbias = _mm_set1_epi64x((int64_t) ((uint64_t) params->scalar.output_zero_point << 32) + INT64_C(0x80000000));
const __m128i vbias = _mm_set1_epi64x((int64_t) (((uint64_t) (int64_t) params->scalar.output_zero_point) << 32) + INT64_C(0x80000000));
XNN_FORCE_REALIZATION(vmultiplier);
XNN_FORCE_REALIZATION(vbias);
const __m128i vshuffle01 = _mm_load_si128((const __m128i*) shuffle01);
Expand Down
2 changes: 1 addition & 1 deletion src/qs16-qs8-vcvt/gen/qs16-qs8-vcvt-scalar-u1.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ void xnn_qs16_qs8_vcvt_ukernel__scalar_u1(
assert(output != NULL);

const int32_t vmultiplier = params->scalar.multiplier;
const int64_t vbias = (int64_t) ((int32_t) (params->scalar.output_zero_point << 16) + 0x8000);
const int64_t vbias = (int64_t) ((int32_t) (((uint32_t) (int32_t) params->scalar.output_zero_point << 16)) + 0x8000);
do {
const int32_t vx = (int32_t) *input++;

Expand Down
2 changes: 1 addition & 1 deletion src/qs16-qs8-vcvt/gen/qs16-qs8-vcvt-scalar-u2.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ void xnn_qs16_qs8_vcvt_ukernel__scalar_u2(
assert(output != NULL);

const int32_t vmultiplier = params->scalar.multiplier;
const int64_t vbias = (int64_t) ((int32_t) (params->scalar.output_zero_point << 16) + 0x8000);
const int64_t vbias = (int64_t) ((int32_t) (((uint32_t) (int32_t) params->scalar.output_zero_point << 16)) + 0x8000);
for (; batch >= 2 * sizeof(int16_t); batch -= 2 * sizeof(int16_t)) {

const int32_t vx0 = (int32_t) input[0];
Expand Down
2 changes: 1 addition & 1 deletion src/qs16-qs8-vcvt/gen/qs16-qs8-vcvt-scalar-u4.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ void xnn_qs16_qs8_vcvt_ukernel__scalar_u4(
assert(output != NULL);

const int32_t vmultiplier = params->scalar.multiplier;
const int64_t vbias = (int64_t) ((int32_t) (params->scalar.output_zero_point << 16) + 0x8000);
const int64_t vbias = (int64_t) ((int32_t) (((uint32_t) (int32_t) params->scalar.output_zero_point << 16)) + 0x8000);
for (; batch >= 4 * sizeof(int16_t); batch -= 4 * sizeof(int16_t)) {

const int32_t vx0 = (int32_t) input[0];
Expand Down
2 changes: 1 addition & 1 deletion src/qs16-qs8-vcvt/gen/qs16-qs8-vcvt-sse2-u16.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ void xnn_qs16_qs8_vcvt_ukernel__sse2_u16(
const __m128i vinput_bias = _mm_set1_epi16(UINT16_C(0x8000));
const __m128i vmultiplier = _mm_set1_epi32(params->scalar.multiplier);
const __m128i vbias = _mm_set1_epi64x(
(int64_t) ((uint64_t) params->scalar.output_zero_point << 32) +
(int64_t) (((uint64_t) (int64_t) params->scalar.output_zero_point) << 32) +
INT64_C(0x80000000) -
(INT64_C(0x80000000) * (int64_t) params->scalar.multiplier));
const __m128i vzero = _mm_setzero_si128();
Expand Down
2 changes: 1 addition & 1 deletion src/qs16-qs8-vcvt/gen/qs16-qs8-vcvt-sse2-u4.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ void xnn_qs16_qs8_vcvt_ukernel__sse2_u4(
const __m128i vinput_bias = _mm_set1_epi16(UINT16_C(0x8000));
const __m128i vmultiplier = _mm_set1_epi32(params->scalar.multiplier);
const __m128i vbias = _mm_set1_epi64x(
(int64_t) ((uint64_t) params->scalar.output_zero_point << 32) +
(int64_t) (((uint64_t) (int64_t) params->scalar.output_zero_point) << 32) +
INT64_C(0x80000000) -
(INT64_C(0x80000000) * (int64_t) params->scalar.multiplier));
const __m128i vzero = _mm_setzero_si128();
Expand Down
2 changes: 1 addition & 1 deletion src/qs16-qs8-vcvt/gen/qs16-qs8-vcvt-sse2-u8.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ void xnn_qs16_qs8_vcvt_ukernel__sse2_u8(
const __m128i vinput_bias = _mm_set1_epi16(UINT16_C(0x8000));
const __m128i vmultiplier = _mm_set1_epi32(params->scalar.multiplier);
const __m128i vbias = _mm_set1_epi64x(
(int64_t) ((uint64_t) params->scalar.output_zero_point << 32) +
(int64_t) (((uint64_t) (int64_t) params->scalar.output_zero_point) << 32) +
INT64_C(0x80000000) -
(INT64_C(0x80000000) * (int64_t) params->scalar.multiplier));
const __m128i vzero = _mm_setzero_si128();
Expand Down
2 changes: 1 addition & 1 deletion src/qs16-qs8-vcvt/gen/qs16-qs8-vcvt-sse41-u16.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ void xnn_qs16_qs8_vcvt_ukernel__sse41_u16(
};

const __m128i vmultiplier = _mm_set1_epi32(params->scalar.multiplier);
const __m128i vbias = _mm_set1_epi64x((int64_t) ((uint64_t) params->scalar.output_zero_point << 32) + INT64_C(0x80000000));
const __m128i vbias = _mm_set1_epi64x((int64_t) (((uint64_t) (int64_t) params->scalar.output_zero_point) << 32) + INT64_C(0x80000000));
XNN_FORCE_REALIZATION(vmultiplier);
XNN_FORCE_REALIZATION(vbias);
const __m128i vshuffle01 = _mm_load_si128((const __m128i*) shuffle01);
Expand Down
2 changes: 1 addition & 1 deletion src/qs16-qs8-vcvt/gen/qs16-qs8-vcvt-sse41-u4.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ void xnn_qs16_qs8_vcvt_ukernel__sse41_u4(
};

const __m128i vmultiplier = _mm_set1_epi32(params->scalar.multiplier);
const __m128i vbias = _mm_set1_epi64x((int64_t) ((uint64_t) params->scalar.output_zero_point << 32) + INT64_C(0x80000000));
const __m128i vbias = _mm_set1_epi64x((int64_t) (((uint64_t) (int64_t) params->scalar.output_zero_point) << 32) + INT64_C(0x80000000));
XNN_FORCE_REALIZATION(vmultiplier);
XNN_FORCE_REALIZATION(vbias);
const __m128i vshuffle01 = _mm_load_si128((const __m128i*) shuffle01);
Expand Down
2 changes: 1 addition & 1 deletion src/qs16-qs8-vcvt/gen/qs16-qs8-vcvt-sse41-u8.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ void xnn_qs16_qs8_vcvt_ukernel__sse41_u8(
};

const __m128i vmultiplier = _mm_set1_epi32(params->scalar.multiplier);
const __m128i vbias = _mm_set1_epi64x((int64_t) ((uint64_t) params->scalar.output_zero_point << 32) + INT64_C(0x80000000));
const __m128i vbias = _mm_set1_epi64x((int64_t) (((uint64_t) (int64_t) params->scalar.output_zero_point) << 32) + INT64_C(0x80000000));
XNN_FORCE_REALIZATION(vmultiplier);
XNN_FORCE_REALIZATION(vbias);
const __m128i vshuffle01 = _mm_load_si128((const __m128i*) shuffle01);
Expand Down
2 changes: 1 addition & 1 deletion src/qs16-qs8-vcvt/gen/qs16-qs8-vcvt-ssse3-u16.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ void xnn_qs16_qs8_vcvt_ukernel__ssse3_u16(
const __m128i vinput_bias = _mm_set1_epi16(UINT16_C(0x8000));
const __m128i vmultiplier = _mm_set1_epi32(params->scalar.multiplier);
const __m128i vbias = _mm_set1_epi64x(
(int64_t) ((uint64_t) params->scalar.output_zero_point << 32) +
(int64_t) (((uint64_t) (int64_t) params->scalar.output_zero_point) << 32) +
INT64_C(0x80000000) -
(INT64_C(0x80000000) * (int64_t) params->scalar.multiplier));
XNN_FORCE_REALIZATION(vinput_bias);
Expand Down
2 changes: 1 addition & 1 deletion src/qs16-qs8-vcvt/gen/qs16-qs8-vcvt-ssse3-u4.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ void xnn_qs16_qs8_vcvt_ukernel__ssse3_u4(
const __m128i vinput_bias = _mm_set1_epi16(UINT16_C(0x8000));
const __m128i vmultiplier = _mm_set1_epi32(params->scalar.multiplier);
const __m128i vbias = _mm_set1_epi64x(
(int64_t) ((uint64_t) params->scalar.output_zero_point << 32) +
(int64_t) (((uint64_t) (int64_t) params->scalar.output_zero_point) << 32) +
INT64_C(0x80000000) -
(INT64_C(0x80000000) * (int64_t) params->scalar.multiplier));
XNN_FORCE_REALIZATION(vinput_bias);
Expand Down
2 changes: 1 addition & 1 deletion src/qs16-qs8-vcvt/gen/qs16-qs8-vcvt-ssse3-u8.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ void xnn_qs16_qs8_vcvt_ukernel__ssse3_u8(
const __m128i vinput_bias = _mm_set1_epi16(UINT16_C(0x8000));
const __m128i vmultiplier = _mm_set1_epi32(params->scalar.multiplier);
const __m128i vbias = _mm_set1_epi64x(
(int64_t) ((uint64_t) params->scalar.output_zero_point << 32) +
(int64_t) (((uint64_t) (int64_t) params->scalar.output_zero_point) << 32) +
INT64_C(0x80000000) -
(INT64_C(0x80000000) * (int64_t) params->scalar.multiplier));
XNN_FORCE_REALIZATION(vinput_bias);
Expand Down
2 changes: 1 addition & 1 deletion src/qs16-qs8-vcvt/gen/qs16-qs8-vcvt-wasmsimd-u16.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ void xnn_qs16_qs8_vcvt_ukernel__wasmsimd_u16(
assert(output != NULL);

const v128_t vmultiplier = wasm_v128_load32_splat(&params->scalar.multiplier);
const v128_t vbias = wasm_i64x2_splat(((int32_t) params->scalar.output_zero_point << 16) + INT32_C(0x8000));
const v128_t vbias = wasm_i64x2_splat((int32_t) (((uint32_t) (int32_t) params->scalar.output_zero_point) << 16) + INT32_C(0x8000));
XNN_FORCE_REALIZATION(vmultiplier);
XNN_FORCE_REALIZATION(vbias);
for (; batch >= 16 * sizeof(int16_t); batch -= 16 * sizeof(int16_t)) {
Expand Down
2 changes: 1 addition & 1 deletion src/qs16-qs8-vcvt/gen/qs16-qs8-vcvt-wasmsimd-u32.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ void xnn_qs16_qs8_vcvt_ukernel__wasmsimd_u32(
assert(output != NULL);

const v128_t vmultiplier = wasm_v128_load32_splat(&params->scalar.multiplier);
const v128_t vbias = wasm_i64x2_splat(((int32_t) params->scalar.output_zero_point << 16) + INT32_C(0x8000));
const v128_t vbias = wasm_i64x2_splat((int32_t) (((uint32_t) (int32_t) params->scalar.output_zero_point) << 16) + INT32_C(0x8000));
XNN_FORCE_REALIZATION(vmultiplier);
XNN_FORCE_REALIZATION(vbias);
for (; batch >= 32 * sizeof(int16_t); batch -= 32 * sizeof(int16_t)) {
Expand Down
2 changes: 1 addition & 1 deletion src/qs16-qs8-vcvt/gen/qs16-qs8-vcvt-wasmsimd-u8.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ void xnn_qs16_qs8_vcvt_ukernel__wasmsimd_u8(
assert(output != NULL);

const v128_t vmultiplier = wasm_v128_load32_splat(&params->scalar.multiplier);
const v128_t vbias = wasm_i64x2_splat(((int32_t) params->scalar.output_zero_point << 16) + INT32_C(0x8000));
const v128_t vbias = wasm_i64x2_splat((int32_t) (((uint32_t) (int32_t) params->scalar.output_zero_point) << 16) + INT32_C(0x8000));
XNN_FORCE_REALIZATION(vmultiplier);
XNN_FORCE_REALIZATION(vbias);
for (; batch >= 8 * sizeof(int16_t); batch -= 8 * sizeof(int16_t)) {
Expand Down
2 changes: 1 addition & 1 deletion src/qs16-qs8-vcvt/scalar.c.in
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ void xnn_qs16_qs8_vcvt_ukernel__scalar_u${BATCH_TILE}(
assert(output != NULL);

const int32_t vmultiplier = params->scalar.multiplier;
const int64_t vbias = (int64_t) ((int32_t) (params->scalar.output_zero_point << 16) + 0x8000);
const int64_t vbias = (int64_t) ((int32_t) (((uint32_t) (int32_t) params->scalar.output_zero_point << 16)) + 0x8000);
$if BATCH_TILE == 1:
do {
const int32_t vx = (int32_t) *input++;
Expand Down
2 changes: 1 addition & 1 deletion src/qs16-qs8-vcvt/sse2.c.in
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ void xnn_qs16_qs8_vcvt_ukernel__sse2_u${BATCH_TILE}(
const __m128i vinput_bias = _mm_set1_epi16(UINT16_C(0x8000));
const __m128i vmultiplier = _mm_set1_epi32(params->scalar.multiplier);
const __m128i vbias = _mm_set1_epi64x(
(int64_t) ((uint64_t) params->scalar.output_zero_point << 32) +
(int64_t) (((uint64_t) (int64_t) params->scalar.output_zero_point) << 32) +
INT64_C(0x80000000) -
(INT64_C(0x80000000) * (int64_t) params->scalar.multiplier));
const __m128i vzero = _mm_setzero_si128();
Expand Down
2 changes: 1 addition & 1 deletion src/qs16-qs8-vcvt/sse4.c.in
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ void xnn_qs16_qs8_vcvt_ukernel__${ISA}_u${BATCH_TILE}(
};

const __m128i vmultiplier = _mm_set1_epi32(params->scalar.multiplier);
const __m128i vbias = _mm_set1_epi64x((int64_t) ((uint64_t) params->scalar.output_zero_point << 32) + INT64_C(0x80000000));
const __m128i vbias = _mm_set1_epi64x((int64_t) (((uint64_t) (int64_t) params->scalar.output_zero_point) << 32) + INT64_C(0x80000000));
XNN_FORCE_REALIZATION(vmultiplier);
XNN_FORCE_REALIZATION(vbias);
const __m128i vshuffle01 = _mm_load_si128((const __m128i*) shuffle01);
Expand Down
2 changes: 1 addition & 1 deletion src/qs16-qs8-vcvt/ssse3.c.in
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ void xnn_qs16_qs8_vcvt_ukernel__ssse3_u${BATCH_TILE}(
const __m128i vinput_bias = _mm_set1_epi16(UINT16_C(0x8000));
const __m128i vmultiplier = _mm_set1_epi32(params->scalar.multiplier);
const __m128i vbias = _mm_set1_epi64x(
(int64_t) ((uint64_t) params->scalar.output_zero_point << 32) +
(int64_t) (((uint64_t) (int64_t) params->scalar.output_zero_point) << 32) +
INT64_C(0x80000000) -
(INT64_C(0x80000000) * (int64_t) params->scalar.multiplier));
XNN_FORCE_REALIZATION(vinput_bias);
Expand Down
2 changes: 1 addition & 1 deletion src/qs16-qs8-vcvt/wasmsimd.c.in
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ void xnn_qs16_qs8_vcvt_ukernel__wasmsimd_u${BATCH_TILE}(
assert(output != NULL);

const v128_t vmultiplier = wasm_v128_load32_splat(&params->scalar.multiplier);
const v128_t vbias = wasm_i64x2_splat(((int32_t) params->scalar.output_zero_point << 16) + INT32_C(0x8000));
const v128_t vbias = wasm_i64x2_splat((int32_t) (((uint32_t) (int32_t) params->scalar.output_zero_point) << 16) + INT32_C(0x8000));
XNN_FORCE_REALIZATION(vmultiplier);
XNN_FORCE_REALIZATION(vbias);
$if BATCH_TILE > 4:
Expand Down
2 changes: 1 addition & 1 deletion src/qs8-vcvt/armsimd32.c.in
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ void xnn_${DATATYPE.lower()}_vcvt_ukernel__armsimd32_u${BATCH_TILE}(
assert(output != NULL);

const ${XINT16X2_T} vminus_input_zero_point = (${XINT16X2_T}) broadcast2x_uint16(-params->scalar.input_zero_point);
const int32_t vbias = ((int32_t) params->scalar.output_zero_point << 1) + INT32_C(1);
const int32_t vbias = (int32_t) ((uint32_t) (int32_t) params->scalar.output_zero_point << 1) + INT32_C(1);
const int32_t vmultiplier = (int32_t) params->scalar.multiplier << 9;
$if BATCH_TILE > 4:
for (; batch >= ${BATCH_TILE} * sizeof(${XINT8_T}); batch -= ${BATCH_TILE} * sizeof(${XINT8_T})) {
Expand Down
2 changes: 1 addition & 1 deletion src/qs8-vcvt/gen/qs8-vcvt-armsimd32-u4.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ void xnn_qs8_vcvt_ukernel__armsimd32_u4(
assert(output != NULL);

const int16x2_t vminus_input_zero_point = (int16x2_t) broadcast2x_uint16(-params->scalar.input_zero_point);
const int32_t vbias = ((int32_t) params->scalar.output_zero_point << 1) + INT32_C(1);
const int32_t vbias = (int32_t) ((uint32_t) (int32_t) params->scalar.output_zero_point << 1) + INT32_C(1);
const int32_t vmultiplier = (int32_t) params->scalar.multiplier << 9;
for (; batch >= 4 * sizeof(int8_t); batch -= 4 * sizeof(int8_t)) {
const int8x4_t vx0123 = (int8x4_t) unaligned_load_u32(input);
Expand Down
2 changes: 1 addition & 1 deletion src/qs8-vcvt/gen/qs8-vcvt-armsimd32-u8.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ void xnn_qs8_vcvt_ukernel__armsimd32_u8(
assert(output != NULL);

const int16x2_t vminus_input_zero_point = (int16x2_t) broadcast2x_uint16(-params->scalar.input_zero_point);
const int32_t vbias = ((int32_t) params->scalar.output_zero_point << 1) + INT32_C(1);
const int32_t vbias = (int32_t) ((uint32_t) (int32_t) params->scalar.output_zero_point << 1) + INT32_C(1);
const int32_t vmultiplier = (int32_t) params->scalar.multiplier << 9;
for (; batch >= 8 * sizeof(int8_t); batch -= 8 * sizeof(int8_t)) {
const int8x4_t vx0123 = (int8x4_t) unaligned_indexed_load_u32(input, 0);
Expand Down
2 changes: 1 addition & 1 deletion src/qs8-vcvt/gen/qs8-vcvt-scalar-u1.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ void xnn_qs8_vcvt_ukernel__scalar_u1(
assert(output != NULL);

const int32_t vbias =
((int32_t) params->scalar.output_zero_point << 8) -
(int32_t) (((uint32_t) (int32_t) params->scalar.output_zero_point) << 8) -
(int32_t) params->scalar.multiplier * (int32_t) params->scalar.input_zero_point +
INT32_C(0x80);
const int32_t vmultiplier = params->scalar.multiplier;
Expand Down
2 changes: 1 addition & 1 deletion src/qs8-vcvt/gen/qs8-vcvt-scalar-u2.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ void xnn_qs8_vcvt_ukernel__scalar_u2(
assert(output != NULL);

const int32_t vbias =
((int32_t) params->scalar.output_zero_point << 8) -
(int32_t) (((uint32_t) (int32_t) params->scalar.output_zero_point) << 8) -
(int32_t) params->scalar.multiplier * (int32_t) params->scalar.input_zero_point +
INT32_C(0x80);
const int32_t vmultiplier = params->scalar.multiplier;
Expand Down
2 changes: 1 addition & 1 deletion src/qs8-vcvt/gen/qs8-vcvt-scalar-u4.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ void xnn_qs8_vcvt_ukernel__scalar_u4(
assert(output != NULL);

const int32_t vbias =
((int32_t) params->scalar.output_zero_point << 8) -
(int32_t) (((uint32_t) (int32_t) params->scalar.output_zero_point) << 8) -
(int32_t) params->scalar.multiplier * (int32_t) params->scalar.input_zero_point +
INT32_C(0x80);
const int32_t vmultiplier = params->scalar.multiplier;
Expand Down
2 changes: 1 addition & 1 deletion src/qs8-vcvt/gen/qs8-vcvt-sse2-u16.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ void xnn_qs8_vcvt_ukernel__sse2_u16(

const __m128i vmultiplier = _mm_set1_epi16(-params->scalar.multiplier);
const __m128i vbias = _mm_set1_epi32(
((int32_t) params->scalar.output_zero_point << 8) -
(int32_t) (((uint32_t) (int32_t) params->scalar.output_zero_point) << 8) -
(int32_t) params->scalar.multiplier * (int32_t) params->scalar.input_zero_point +
INT32_C(0x80));
XNN_FORCE_REALIZATION(vmultiplier);
Expand Down
2 changes: 1 addition & 1 deletion src/qs8-vcvt/gen/qs8-vcvt-sse2-u32.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ void xnn_qs8_vcvt_ukernel__sse2_u32(

const __m128i vmultiplier = _mm_set1_epi16(-params->scalar.multiplier);
const __m128i vbias = _mm_set1_epi32(
((int32_t) params->scalar.output_zero_point << 8) -
(int32_t) (((uint32_t) (int32_t) params->scalar.output_zero_point) << 8) -
(int32_t) params->scalar.multiplier * (int32_t) params->scalar.input_zero_point +
INT32_C(0x80));
XNN_FORCE_REALIZATION(vmultiplier);
Expand Down
2 changes: 1 addition & 1 deletion src/qs8-vcvt/scalar.c.in
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ void xnn_${DATATYPE.lower()}_vcvt_ukernel__scalar_u${BATCH_TILE}(
assert(output != NULL);

const int32_t vbias =
((int32_t) params->scalar.output_zero_point << 8) -
(int32_t) (((uint32_t) (int32_t) params->scalar.output_zero_point) << 8) -
(int32_t) params->scalar.multiplier * (int32_t) params->scalar.input_zero_point +
INT32_C(0x80);
const int32_t vmultiplier = params->scalar.multiplier;
Expand Down
2 changes: 1 addition & 1 deletion src/qs8-vcvt/sse2.c.in
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ void xnn_${DATATYPE.lower()}_vcvt_ukernel__sse2_u${BATCH_TILE}(
$else:
const __m128i vmultiplier = _mm_set1_epi16(-params->scalar.multiplier);
const __m128i vbias = _mm_set1_epi32(
((int32_t) params->scalar.output_zero_point << 8) -
(int32_t) (((uint32_t) (int32_t) params->scalar.output_zero_point) << 8) -
(int32_t) params->scalar.multiplier * (int32_t) params->scalar.input_zero_point +
INT32_C(0x80));
XNN_FORCE_REALIZATION(vmultiplier);
Expand Down
2 changes: 1 addition & 1 deletion src/qs8-vlrelu/armsimd32.c.in
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ void xnn_${DATATYPE.lower()}_vlrelu_ukernel__armsimd32_u${BATCH_TILE}(
const ${XINT16X2_T} vinput_zero_point = (${XINT16X2_T}) broadcast2x_uint16(params->scalar.input_zero_point);
const int16x2_t vpositive_multiplier = (int16x2_t) broadcast2x_uint16(-params->scalar.positive_multiplier);
const int16x2_t vnegative_multiplier = (int16x2_t) broadcast2x_uint16(-params->scalar.negative_multiplier);
const int32_t vbias = (params->scalar.output_zero_point << 8) + 0x80;
const int32_t vbias = (int32_t) (((uint32_t) (int32_t) params->scalar.output_zero_point) << 8) + 0x80;
$if BATCH_TILE > 4:
for (; batch >= ${BATCH_TILE} * sizeof(${XINT8_T}); batch -= ${BATCH_TILE} * sizeof(${XINT8_T})) {
$for N in range(SIMD_TILE):
Expand Down
2 changes: 1 addition & 1 deletion src/qs8-vlrelu/gen/qs8-vlrelu-armsimd32-u4.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ void xnn_qs8_vlrelu_ukernel__armsimd32_u4(
const int16x2_t vinput_zero_point = (int16x2_t) broadcast2x_uint16(params->scalar.input_zero_point);
const int16x2_t vpositive_multiplier = (int16x2_t) broadcast2x_uint16(-params->scalar.positive_multiplier);
const int16x2_t vnegative_multiplier = (int16x2_t) broadcast2x_uint16(-params->scalar.negative_multiplier);
const int32_t vbias = (params->scalar.output_zero_point << 8) + 0x80;
const int32_t vbias = (int32_t) (((uint32_t) (int32_t) params->scalar.output_zero_point) << 8) + 0x80;
for (; batch >= 4 * sizeof(int8_t); batch -= 4 * sizeof(int8_t)) {
const int8x4_t vx0123 = (int8x4_t) unaligned_load_u32(input);
input += 4;
Expand Down
2 changes: 1 addition & 1 deletion src/qs8-vlrelu/gen/qs8-vlrelu-armsimd32-u8.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ void xnn_qs8_vlrelu_ukernel__armsimd32_u8(
const int16x2_t vinput_zero_point = (int16x2_t) broadcast2x_uint16(params->scalar.input_zero_point);
const int16x2_t vpositive_multiplier = (int16x2_t) broadcast2x_uint16(-params->scalar.positive_multiplier);
const int16x2_t vnegative_multiplier = (int16x2_t) broadcast2x_uint16(-params->scalar.negative_multiplier);
const int32_t vbias = (params->scalar.output_zero_point << 8) + 0x80;
const int32_t vbias = (int32_t) (((uint32_t) (int32_t) params->scalar.output_zero_point) << 8) + 0x80;
for (; batch >= 8 * sizeof(int8_t); batch -= 8 * sizeof(int8_t)) {
const int8x4_t vx0123 = (int8x4_t) unaligned_indexed_load_u32(input, 0);
const int8x4_t vx4567 = (int8x4_t) unaligned_indexed_load_u32(input, 1);
Expand Down
2 changes: 1 addition & 1 deletion src/qs8-vlrelu/gen/qs8-vlrelu-rvv-u1v.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ void xnn_qs8_vlrelu_ukernel__rvv_u1v(
const int32_t input_zero_point = params->scalar.input_zero_point;
const int32_t multiplier_diff = params->scalar.negative_multiplier ^ params->scalar.positive_multiplier;
const int32_t multiplier_base = params->scalar.positive_multiplier;
const int32_t bias = (params->scalar.output_zero_point << 8) + 128;
const int32_t bias = (int32_t) (((uint32_t) (int32_t) params->scalar.output_zero_point) << 8) + 128;
int32_t n = __riscv_vsetvl_e8m1(batch);
vint32m4_t bias_i32v = __riscv_vmv_v_x_i32m4(bias, n);

Expand Down
Loading

0 comments on commit 2468fc2

Please sign in to comment.