Skip to content

Commit 9462b0a

Browse files
jinbosonmr-c
authored andcommitted
x86 avx for loongarch: use vfcmp_clt to save one instruction in _mm_cmp_{sd,ss} and _mm256_cmp_pd
1 parent 9e08f70 commit 9462b0a

File tree

1 file changed

+3
-6
lines changed

1 file changed

+3
-6
lines changed

simde/x86/avx.h

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2764,8 +2764,7 @@ simde_mm_cmp_sd (simde__m128d a, simde__m128d b, const int imm8)
27642764
case SIMDE_CMP_GT_OQ:
27652765
case SIMDE_CMP_GT_OS:
27662766
#if defined(SIMDE_LOONGARCH_LASX_NATIVE)
2767-
t_ = __lsx_vfcmp_cle_d(a_.lsx_f64, b_.lsx_f64);
2768-
a_.lsx_i64 = __lsx_vextrins_d(a_.lsx_i64, __lsx_vnor_v(t_, t_), 0x00);
2767+
a_.lsx_i64 = __lsx_vextrins_d(a_.lsx_i64, __lsx_vfcmp_clt_d(b_.lsx_f64, a_.lsx_f64), 0x00);
27692768
#else
27702769
a_.i64[0] = (a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0);
27712770
#endif
@@ -2934,8 +2933,7 @@ simde_mm_cmp_ss (simde__m128 a, simde__m128 b, const int imm8)
29342933
case SIMDE_CMP_GT_OQ:
29352934
case SIMDE_CMP_GT_OS:
29362935
#if defined(SIMDE_LOONGARCH_LASX_NATIVE)
2937-
t_ = __lsx_vfcmp_cle_s(a_.lsx_f32, b_.lsx_f32);
2938-
a_.lsx_i64 = __lsx_vextrins_w(a_.lsx_i64, __lsx_vnor_v(t_, t_), 0x00);
2936+
a_.lsx_i64 = __lsx_vextrins_w(a_.lsx_i64, __lsx_vfcmp_clt_s(b_.lsx_f32, a_.lsx_f32), 0x00);
29392937
#else
29402938
a_.i32[0] = (a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0);
29412939
#endif
@@ -3175,8 +3173,7 @@ simde_mm256_cmp_pd
31753173
case SIMDE_CMP_GT_OQ:
31763174
case SIMDE_CMP_GT_OS:
31773175
#if defined(SIMDE_LOONGARCH_LASX_NATIVE)
3178-
t_ = __lasx_xvfcmp_cle_d(a_.d256, b_.d256);
3179-
r_.i256 = __lasx_xvnor_v(t_, t_);
3176+
r_.i256 = __lasx_xvfcmp_clt_d(b_.d256, a_.d256);
31803177
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
31813178
r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64));
31823179
#else

0 commit comments

Comments
 (0)