Skip to content

Commit 0096220

Browse files
committed
add n-r for rvv target
Signed-off-by: Magnus Lundmark <[email protected]>
1 parent 630b436 commit 0096220

File tree

1 file changed

+5
-7
lines changed

1 file changed

+5
-7
lines changed

kernels/volk/volk_32f_invsqrt_32f.h

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -310,18 +310,16 @@ volk_32f_invsqrt_32f_rvv(float* cVector, const float* aVector, unsigned int num_
310310
for (size_t vl; n > 0; n -= vl, aVector += vl, cVector += vl) {
311311
vl = __riscv_vsetvl_e32m8(n);
312312
vfloat32m8_t a = __riscv_vle32_v_f32m8(aVector, vl);
313+
vfloat32m8_t half = __riscv_vfmv_v_f_f32m8(0.5f, vl);
314+
vfloat32m8_t three_halfs = __riscv_vfmv_v_f_f32m8(1.5f, vl);
313315
// Initial estimate (~7-bit precision)
314316
vfloat32m8_t x = __riscv_vfrsqrt7(a, vl);
315317
// Two Newton-Raphson iterations: x = x * (1.5 - 0.5 * a * x * x)
316-
vfloat32m8_t ax = __riscv_vfmul(a, x, vl);
317-
vfloat32m8_t half_ax = __riscv_vfmul_vf(ax, 0.5f, vl);
318+
vfloat32m8_t half_a = __riscv_vfmul(half, a, vl);
318319
x = __riscv_vfmul(
319-
x, __riscv_vfnmsac_vf(__riscv_vfmv_v_f_f32m8(1.5f, vl), half_ax, x, vl), vl);
320-
// Second iteration
321-
ax = __riscv_vfmul(a, x, vl);
322-
half_ax = __riscv_vfmul_vf(ax, 0.5f, vl);
320+
x, __riscv_vfnmsac(three_halfs, half_a, __riscv_vfmul(x, x, vl), vl), vl);
323321
x = __riscv_vfmul(
324-
x, __riscv_vfnmsac_vf(__riscv_vfmv_v_f_f32m8(1.5f, vl), half_ax, x, vl), vl);
322+
x, __riscv_vfnmsac(three_halfs, half_a, __riscv_vfmul(x, x, vl), vl), vl);
325323
__riscv_vse32(cVector, x, vl);
326324
}
327325
}

0 commit comments

Comments
 (0)