gnuradio
diff --git a/‎.github/workflows/run-tests-rvv.yml‎
Lines changed: 55 additions & 0 deletions b/‎.github/workflows/run-tests-rvv.yml‎
Lines changed: 55 additions & 0 deletions
diff --git a/‎cmake/Checks/check-rvv-intrinsics.c‎
Lines changed: 5 additions & 0 deletions b/‎cmake/Checks/check-rvv-intrinsics.c‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎cmake/Toolchains/rv64gcv-linux-gnu.cmake‎
Lines changed: 34 additions & 0 deletions b/‎cmake/Toolchains/rv64gcv-linux-gnu.cmake‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎gen/archs.xml‎
Lines changed: 44 additions & 0 deletions b/‎gen/archs.xml‎
Lines changed: 44 additions & 0 deletions
diff --git a/‎gen/machines.xml‎
Lines changed: 12 additions & 0 deletions b/‎gen/machines.xml‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎include/volk/volk_rvv_intrinsics.h‎
Lines changed: 77 additions & 0 deletions b/‎include/volk/volk_rvv_intrinsics.h‎
Lines changed: 77 additions & 0 deletions
diff --git a/‎kernels/volk/volk_16i_32fc_dot_prod_32fc.h‎
Lines changed: 61 additions & 0 deletions b/‎kernels/volk/volk_16i_32fc_dot_prod_32fc.h‎
Lines changed: 61 additions & 0 deletions
diff --git a/‎kernels/volk/volk_16i_branch_4_state_8.h‎
Lines changed: 4 additions & 0 deletions b/‎kernels/volk/volk_16i_branch_4_state_8.h‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎kernels/volk/volk_16i_convert_8i.h‎
Lines changed: 15 additions & 0 deletions b/‎kernels/volk/volk_16i_convert_8i.h‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎kernels/volk/volk_16i_max_star_16i.h‎
Lines changed: 4 additions & 0 deletions b/‎kernels/volk/volk_16i_max_star_16i.h‎
Lines changed: 4 additions & 0 deletions
@@ -0,0 +1,55 @@
+#
+# Copyright 2020 - 2022 Free Software Foundation, Inc.
+#
+# This file is part of VOLK
+#
+# SPDX-License-Identifier: LGPL-3.0-or-later
+#
+
+name: Run VOLK tests on different RVV configurations
+
+on: [push, pull_request]
+
+jobs:
+  Tests:
+    runs-on: ubuntu-24.04
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        submodules: "recursive"
+    - name: Install packages
+      run: |
+        sudo apt-get update -q -y
+        sudo apt-get install -y python3-mako cmake qemu-user-static g++-14-riscv64-linux-gnu clang-18
+        mkdir build
+        cd build
+    - name: Test gcc-14 VLEN=128
+      run: |
+        cd build; rm -rf *
+        CXX=riscv64-linux-gnu-g++-14 CC=riscv64-linux-gnu-gcc-14 VLEN=128 \
+        cmake -DCMAKE_TOOLCHAIN_FILE=../cmake/Toolchains/rv64gcv-linux-gnu.cmake ..
+        make -j$(nproc)
+        ARGS=-V make test
+    - name: Test gcc-14 VLEN=256
+      run: |
+        cd build; rm -rf *
+        CXX=riscv64-linux-gnu-g++-14 CC=riscv64-linux-gnu-gcc-14 VLEN=256 \
+        cmake -DCMAKE_TOOLCHAIN_FILE=../cmake/Toolchains/rv64gcv-linux-gnu.cmake .. -DCMAKE_BUILD_TYPE=Release
+        make -j$(nproc)
+        ARGS=-V make test
+    - name: Test clang-18 VLEN=512
+      run: |
+        cd build; rm -rf *
+        CXX=clang++-18 CC=clang-18 CFLAGS=--target=riscv64-linux-gnu VLEN=512 \
+        cmake -DCMAKE_TOOLCHAIN_FILE=../cmake/Toolchains/rv64gcv-linux-gnu.cmake ..
+        make -j$(nproc)
+        ARGS=-V make test
+    - name: Test clang-18 VLEN=1024
+      run: |
+        cd build; rm -rf *
+        CXX=clang++-18 CC=clang-18 CFLAGS=--target=riscv64-linux-gnu VLEN=1024 \
+        cmake -DCMAKE_TOOLCHAIN_FILE=../cmake/Toolchains/rv64gcv-linux-gnu.cmake .. -DCMAKE_BUILD_TYPE=Release
+        make -j$(nproc)
+        ARGS=-V make test
+
+
@@ -0,0 +1,5 @@
+#if (__riscv_v_intrinsic >= 1000000 || __clang_major__ >= 18 || __GNUC__ >= 14)
+int main() { return 0; }
+#else
+#error "rvv intrinsics aren't supported"
+#endif
@@ -0,0 +1,34 @@
+#
+# Copyright 2024 Free Software Foundation, Inc.
+#
+# This file is part of VOLK
+#
+# SPDX-License-Identifier: LGPL-3.0-or-later
+#
+
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_PROCESSOR riscv64)
+
+set(CMAKE_C_COMPILER $ENV{CC})
+set(CMAKE_ASM_COMPILER ${CMAKE_C_COMPILER})
+set(CMAKE_CXX_COMPILER $ENV{CXX})
+
+set(CMAKE_C_FLAGS "$ENV{CFLAGS} -march=rv64gcv" CACHE STRING "" FORCE)
+set(CMAKE_CXX_FLAGS ${CMAKE_C_FLAGS} CACHE STRING "" FORCE)
+set(CMAKE_ASM_FLAGS "${CMAKE_C_FLAGS} -g" CACHE STRING "" FORCE)
+
+set(CMAKE_OBJCOPY
+    ${RISCV64_TOOLCHAIN_DIR}/${TOOLCHAIN_PREFIX}objcopy
+    CACHE INTERNAL "objcopy tool")
+set(CMAKE_SIZE_UTIL
+    ${RISCV64_TOOLCHAIN_DIR}/${TOOLCHAIN_PREFIX}size
+    CACHE INTERNAL "size tool")
+
+set(CMAKE_FIND_ROOT_PATH ${BINUTILS_PATH})
+
+set(QEMU_VLEN $ENV{VLEN})
+if(NOT QEMU_VLEN)
+    set(QEMU_VLEN "128")
+endif()
+
+set(CMAKE_CROSSCOMPILING_EMULATOR "qemu-riscv64-static -L /usr/riscv64-linux-gnu/ -cpu rv64,zba=true,zbb=true,v=on,vlen=${QEMU_VLEN},rvv_ta_all_1s=on,rvv_ma_all_1s=on")
@@ -181,4 +181,48 @@ at the top, as a last resort.
 <arch name="riscv64">
 </arch>
 
+<!-->
+    tmpl/ currently assumes that every arch.name starting with "rv" requires
+    RVV intrinsics
+</-->
+<!-->
+    There is currently no mechanism in RISC-V to append extensions,
+    so each arch needs to specify all of them, and the order needs in the
+    machine definition needs to be from the fewest to the most extensions.
+    Fortunately, this maps quite well to the profiles concept.
+</-->
+<arch name="rvv">
+    <check name="V"></check>
+    <flag compiler="gnu">-march=rv64gcv</flag>
+    <flag compiler="clang">-march=rv64gcv</flag>
+</arch>
+
+<arch name="rvvseg">
+    <check name="V"></check>
+    <flag compiler="gnu">-march=rv64gcv</flag>
+    <flag compiler="clang">-march=rv64gcv</flag>
+    <!-->
+        It's unclear how performance portable segmented load/stores are, so the
+        default rvv implementations avoid using them.
+        This is a pseudo arch for separate segmented load/store implementations,
+        and is expected to never be used standalone without "rvv".
+    </-->
+</arch>
+
+<!-->
+    google/cpu_features currently doesn't support these extensions and profiles.
+</-->
+<!--arch name="rva22v">
+    <check name="V"></check>
+    <check name="B"></check>
+    <flag compiler="gnu">-march=rv64gcv_zba_zbb_zbs</flag>
+    <flag compiler="clang">-march=rv64gcv_zba_zbb_zbs</flag>
+</arch-->
+
+<!--arch name="rva23">
+    <check name="rva23"></check>
+    <flag compiler="gnu">-march=rva23u64</flag>
+    <flag compiler="clang">-march=rva23u64</flag>
+</arch-->
+
 </grammar>
@@ -33,6 +33,18 @@
 <archs>generic riscv64 orc|</archs>
 </machine>
 
+<machine name="rv64gcv">
+<archs>generic riscv64 rvv rvvseg orc|</archs>
+</machine>
+
+<!--machine name="rva22v">
+<archs>generic riscv64 rvv rvvseg rva22v orc|</archs>
+</machine-->
+
+<!--machine name="rva23">
+<archs>generic riscv64 rvv rvvseg rva22v rva23 orc|</archs>
+</machine-->
+
 <machine name="sse4_a">
 <archs>generic 32|64| mmx| sse sse2 sse3 sse4_a popcount orc|</archs>
 </machine>
 
@@ -0,0 +1,77 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2024 Free Software Foundation, Inc.
+ *
+ * This file is part of VOLK
+ *
+ * SPDX-License-Identifier: LGPL-3.0-or-later
+ */
+
+/*
+ * This file is intended to hold RVV intrinsics of intrinsics.
+ * They should be used in VOLK kernels to avoid copy-paste.
+ */
+
+#ifndef INCLUDE_VOLK_VOLK_RVV_INTRINSICS_H_
+#define INCLUDE_VOLK_VOLK_RVV_INTRINSICS_H_
+#include <riscv_vector.h>
+
+#define RISCV_SHRINK2(op, T, S, v)              \
+    __riscv_##op(__riscv_vget_##T##S##m1(v, 0), \
+                 __riscv_vget_##T##S##m1(v, 1), \
+                 __riscv_vsetvlmax_e##S##m1())
+
+#define RISCV_SHRINK4(op, T, S, v)                           \
+    __riscv_##op(__riscv_##op(__riscv_vget_##T##S##m1(v, 0), \
+                              __riscv_vget_##T##S##m1(v, 1), \
+                              __riscv_vsetvlmax_e##S##m1()), \
+                 __riscv_##op(__riscv_vget_##T##S##m1(v, 2), \
+                              __riscv_vget_##T##S##m1(v, 3), \
+                              __riscv_vsetvlmax_e##S##m1()), \
+                 __riscv_vsetvlmax_e##S##m1())
+
+#define RISCV_SHRINK8(op, T, S, v)                                        \
+    __riscv_##op(__riscv_##op(__riscv_##op(__riscv_vget_##T##S##m1(v, 0), \
+                                           __riscv_vget_##T##S##m1(v, 1), \
+                                           __riscv_vsetvlmax_e##S##m1()), \
+                              __riscv_##op(__riscv_vget_##T##S##m1(v, 2), \
+                                           __riscv_vget_##T##S##m1(v, 3), \
+                                           __riscv_vsetvlmax_e##S##m1()), \
+                              __riscv_vsetvlmax_e##S##m1()),              \
+                 __riscv_##op(__riscv_##op(__riscv_vget_##T##S##m1(v, 4), \
+                                           __riscv_vget_##T##S##m1(v, 5), \
+                                           __riscv_vsetvlmax_e##S##m1()), \
+                              __riscv_##op(__riscv_vget_##T##S##m1(v, 6), \
+                                           __riscv_vget_##T##S##m1(v, 7), \
+                                           __riscv_vsetvlmax_e##S##m1()), \
+                              __riscv_vsetvlmax_e##S##m1()),              \
+                 __riscv_vsetvlmax_e##S##m1())
+
+#define RISCV_PERM4(f, v, vidx)                                     \
+    __riscv_vcreate_v_u8m1_u8m4(                                    \
+        f(__riscv_vget_u8m1(v, 0), vidx, __riscv_vsetvlmax_e8m1()), \
+        f(__riscv_vget_u8m1(v, 1), vidx, __riscv_vsetvlmax_e8m1()), \
+        f(__riscv_vget_u8m1(v, 2), vidx, __riscv_vsetvlmax_e8m1()), \
+        f(__riscv_vget_u8m1(v, 3), vidx, __riscv_vsetvlmax_e8m1()))
+
+#define RISCV_LUT4(f, vtbl, v)                                      \
+    __riscv_vcreate_v_u8m1_u8m4(                                    \
+        f(vtbl, __riscv_vget_u8m1(v, 0), __riscv_vsetvlmax_e8m1()), \
+        f(vtbl, __riscv_vget_u8m1(v, 1), __riscv_vsetvlmax_e8m1()), \
+        f(vtbl, __riscv_vget_u8m1(v, 2), __riscv_vsetvlmax_e8m1()), \
+        f(vtbl, __riscv_vget_u8m1(v, 3), __riscv_vsetvlmax_e8m1()))
+
+#define RISCV_PERM8(f, v, vidx)                                     \
+    __riscv_vcreate_v_u8m1_u8m8(                                    \
+        f(__riscv_vget_u8m1(v, 0), vidx, __riscv_vsetvlmax_e8m1()), \
+        f(__riscv_vget_u8m1(v, 1), vidx, __riscv_vsetvlmax_e8m1()), \
+        f(__riscv_vget_u8m1(v, 2), vidx, __riscv_vsetvlmax_e8m1()), \
+        f(__riscv_vget_u8m1(v, 3), vidx, __riscv_vsetvlmax_e8m1()), \
+        f(__riscv_vget_u8m1(v, 4), vidx, __riscv_vsetvlmax_e8m1()), \
+        f(__riscv_vget_u8m1(v, 5), vidx, __riscv_vsetvlmax_e8m1()), \
+        f(__riscv_vget_u8m1(v, 6), vidx, __riscv_vsetvlmax_e8m1()), \
+        f(__riscv_vget_u8m1(v, 7), vidx, __riscv_vsetvlmax_e8m1()))
+
+#define RISCV_VMFLTZ(T, v, vl) __riscv_vmslt(__riscv_vreinterpret_i##T(v), 0, vl)
+
+#endif /* INCLUDE_VOLK_VOLK_RVV_INTRINSICS_H_ */
@@ -668,5 +668,66 @@ static inline void volk_16i_32fc_dot_prod_32fc_a_avx2_fma(lv_32fc_t* result,
 
 #endif /*LV_HAVE_AVX2 && LV_HAVE_FMA*/
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+#include <volk/volk_rvv_intrinsics.h>
+
+static inline void volk_16i_32fc_dot_prod_32fc_rvv(lv_32fc_t* result,
+                                                   const short* input,
+                                                   const lv_32fc_t* taps,
+                                                   unsigned int num_points)
+{
+    vfloat32m4_t vsumr = __riscv_vfmv_v_f_f32m4(0, __riscv_vsetvlmax_e32m4());
+    vfloat32m4_t vsumi = vsumr;
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, input += vl, taps += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vuint64m8_t vc = __riscv_vle64_v_u64m8((const uint64_t*)taps, vl);
+        vfloat32m4_t vr = __riscv_vreinterpret_f32m4(__riscv_vnsrl(vc, 0, vl));
+        vfloat32m4_t vi = __riscv_vreinterpret_f32m4(__riscv_vnsrl(vc, 32, vl));
+        vfloat32m4_t v =
+            __riscv_vfwcvt_f(__riscv_vle16_v_i16m2((const int16_t*)input, vl), vl);
+        vsumr = __riscv_vfmacc_tu(vsumr, vr, v, vl);
+        vsumi = __riscv_vfmacc_tu(vsumi, vi, v, vl);
+    }
+    size_t vl = __riscv_vsetvlmax_e32m1();
+    vfloat32m1_t vr = RISCV_SHRINK4(vfadd, f, 32, vsumr);
+    vfloat32m1_t vi = RISCV_SHRINK4(vfadd, f, 32, vsumi);
+    vfloat32m1_t z = __riscv_vfmv_s_f_f32m1(0, vl);
+    *result = lv_cmake(__riscv_vfmv_f(__riscv_vfredusum(vr, z, vl)),
+                       __riscv_vfmv_f(__riscv_vfredusum(vi, z, vl)));
+}
+#endif /*LV_HAVE_RVV*/
+
+#ifdef LV_HAVE_RVVSEG
+#include <riscv_vector.h>
+#include <volk/volk_rvv_intrinsics.h>
+
+static inline void volk_16i_32fc_dot_prod_32fc_rvvseg(lv_32fc_t* result,
+                                                      const short* input,
+                                                      const lv_32fc_t* taps,
+                                                      unsigned int num_points)
+{
+    vfloat32m4_t vsumr = __riscv_vfmv_v_f_f32m4(0, __riscv_vsetvlmax_e32m4());
+    vfloat32m4_t vsumi = vsumr;
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, input += vl, taps += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vfloat32m4x2_t vc = __riscv_vlseg2e32_v_f32m4x2((const float*)taps, vl);
+        vfloat32m4_t vr = __riscv_vget_f32m4(vc, 0);
+        vfloat32m4_t vi = __riscv_vget_f32m4(vc, 1);
+        vfloat32m4_t v =
+            __riscv_vfwcvt_f(__riscv_vle16_v_i16m2((const int16_t*)input, vl), vl);
+        vsumr = __riscv_vfmacc_tu(vsumr, vr, v, vl);
+        vsumi = __riscv_vfmacc_tu(vsumi, vi, v, vl);
+    }
+    size_t vl = __riscv_vsetvlmax_e32m1();
+    vfloat32m1_t vr = RISCV_SHRINK4(vfadd, f, 32, vsumr);
+    vfloat32m1_t vi = RISCV_SHRINK4(vfadd, f, 32, vsumi);
+    vfloat32m1_t z = __riscv_vfmv_s_f_f32m1(0, vl);
+    *result = lv_cmake(__riscv_vfmv_f(__riscv_vfredusum(vr, z, vl)),
+                       __riscv_vfmv_f(__riscv_vfredusum(vi, z, vl)));
+}
+#endif /*LV_HAVE_RVVSEG*/
 
 #endif /*INCLUDED_volk_16i_32fc_dot_prod_32fc_H*/
@@ -10,6 +10,10 @@
 /*!
  * \page volk_16i_branch_4_state_8
  *
+ * \b Deprecation
+ *
+ * This kernel is deprecated.
+ *
  * \b Overview
  *
  * <FIXME>
 
@@ -275,5 +275,20 @@ static inline void volk_16i_convert_8i_neon(int8_t* outputVector,
 }
 #endif /* LV_HAVE_NEON */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_16i_convert_8i_rvv(int8_t* outputVector,
+                                           const int16_t* inputVector,
+                                           unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, inputVector += vl, outputVector += vl) {
+        vl = __riscv_vsetvl_e16m8(n);
+        vint16m8_t v = __riscv_vle16_v_i16m8(inputVector, vl);
+        __riscv_vse8(outputVector, __riscv_vnsra(v, 8, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
 
 #endif /* INCLUDED_volk_16i_convert_8i_a_H */
@@ -10,6 +10,10 @@
 /*!
  * \page volk_16i_max_star_16i
  *
+ * \b Deprecation
+ *
+ * This kernel is deprecated.
+ *
  * \b Overview
  *
  * <FIXME>
Original file line number	Diff line number	Diff line change
`@@ -10,6 +10,10 @@`
`10`	`10`	`/*!`
`11`	`11`	`* \page volk_16i_branch_4_state_8`
`12`	`12`	`*`
	`13`	`+ * \b Deprecation`
	`14`	`+ *`
	`15`	`+ * This kernel is deprecated.`
	`16`	`+ *`
`13`	`17`	`* \b Overview`
`14`	`18`	`*`
`15`	`19`	`* <FIXME>`