| 
 | 1 | +/* -*- c++ -*- */  | 
 | 2 | +/*  | 
 | 3 | + * Copyright 2024 Free Software Foundation, Inc.  | 
 | 4 | + *  | 
 | 5 | + * This file is part of VOLK  | 
 | 6 | + *  | 
 | 7 | + * SPDX-License-Identifier: LGPL-3.0-or-later  | 
 | 8 | + */  | 
 | 9 | + | 
 | 10 | +/*  | 
 | 11 | + * This file is intended to hold RVV intrinsics of intrinsics.  | 
 | 12 | + * They should be used in VOLK kernels to avoid copy-paste.  | 
 | 13 | + */  | 
 | 14 | + | 
 | 15 | +#ifndef INCLUDE_VOLK_VOLK_RVV_INTRINSICS_H_  | 
 | 16 | +#define INCLUDE_VOLK_VOLK_RVV_INTRINSICS_H_  | 
 | 17 | +#include <riscv_vector.h>  | 
 | 18 | + | 
 | 19 | +#define RISCV_SHRINK2(op, T, S, v)              \  | 
 | 20 | +    __riscv_##op(__riscv_vget_##T##S##m1(v, 0), \  | 
 | 21 | +                 __riscv_vget_##T##S##m1(v, 1), \  | 
 | 22 | +                 __riscv_vsetvlmax_e##S##m1())  | 
 | 23 | + | 
 | 24 | +#define RISCV_SHRINK4(op, T, S, v)                           \  | 
 | 25 | +    __riscv_##op(__riscv_##op(__riscv_vget_##T##S##m1(v, 0), \  | 
 | 26 | +                              __riscv_vget_##T##S##m1(v, 1), \  | 
 | 27 | +                              __riscv_vsetvlmax_e##S##m1()), \  | 
 | 28 | +                 __riscv_##op(__riscv_vget_##T##S##m1(v, 2), \  | 
 | 29 | +                              __riscv_vget_##T##S##m1(v, 3), \  | 
 | 30 | +                              __riscv_vsetvlmax_e##S##m1()), \  | 
 | 31 | +                 __riscv_vsetvlmax_e##S##m1())  | 
 | 32 | + | 
 | 33 | +#define RISCV_SHRINK8(op, T, S, v)                                        \  | 
 | 34 | +    __riscv_##op(__riscv_##op(__riscv_##op(__riscv_vget_##T##S##m1(v, 0), \  | 
 | 35 | +                                           __riscv_vget_##T##S##m1(v, 1), \  | 
 | 36 | +                                           __riscv_vsetvlmax_e##S##m1()), \  | 
 | 37 | +                              __riscv_##op(__riscv_vget_##T##S##m1(v, 2), \  | 
 | 38 | +                                           __riscv_vget_##T##S##m1(v, 3), \  | 
 | 39 | +                                           __riscv_vsetvlmax_e##S##m1()), \  | 
 | 40 | +                              __riscv_vsetvlmax_e##S##m1()),              \  | 
 | 41 | +                 __riscv_##op(__riscv_##op(__riscv_vget_##T##S##m1(v, 4), \  | 
 | 42 | +                                           __riscv_vget_##T##S##m1(v, 5), \  | 
 | 43 | +                                           __riscv_vsetvlmax_e##S##m1()), \  | 
 | 44 | +                              __riscv_##op(__riscv_vget_##T##S##m1(v, 6), \  | 
 | 45 | +                                           __riscv_vget_##T##S##m1(v, 7), \  | 
 | 46 | +                                           __riscv_vsetvlmax_e##S##m1()), \  | 
 | 47 | +                              __riscv_vsetvlmax_e##S##m1()),              \  | 
 | 48 | +                 __riscv_vsetvlmax_e##S##m1())  | 
 | 49 | + | 
 | 50 | +#define RISCV_PERM4(f, v, vidx)                                     \  | 
 | 51 | +    __riscv_vcreate_v_u8m1_u8m4(                                    \  | 
 | 52 | +        f(__riscv_vget_u8m1(v, 0), vidx, __riscv_vsetvlmax_e8m1()), \  | 
 | 53 | +        f(__riscv_vget_u8m1(v, 1), vidx, __riscv_vsetvlmax_e8m1()), \  | 
 | 54 | +        f(__riscv_vget_u8m1(v, 2), vidx, __riscv_vsetvlmax_e8m1()), \  | 
 | 55 | +        f(__riscv_vget_u8m1(v, 3), vidx, __riscv_vsetvlmax_e8m1()))  | 
 | 56 | + | 
 | 57 | +#define RISCV_LUT4(f, vtbl, v)                                      \  | 
 | 58 | +    __riscv_vcreate_v_u8m1_u8m4(                                    \  | 
 | 59 | +        f(vtbl, __riscv_vget_u8m1(v, 0), __riscv_vsetvlmax_e8m1()), \  | 
 | 60 | +        f(vtbl, __riscv_vget_u8m1(v, 1), __riscv_vsetvlmax_e8m1()), \  | 
 | 61 | +        f(vtbl, __riscv_vget_u8m1(v, 2), __riscv_vsetvlmax_e8m1()), \  | 
 | 62 | +        f(vtbl, __riscv_vget_u8m1(v, 3), __riscv_vsetvlmax_e8m1()))  | 
 | 63 | + | 
 | 64 | +#define RISCV_PERM8(f, v, vidx)                                     \  | 
 | 65 | +    __riscv_vcreate_v_u8m1_u8m8(                                    \  | 
 | 66 | +        f(__riscv_vget_u8m1(v, 0), vidx, __riscv_vsetvlmax_e8m1()), \  | 
 | 67 | +        f(__riscv_vget_u8m1(v, 1), vidx, __riscv_vsetvlmax_e8m1()), \  | 
 | 68 | +        f(__riscv_vget_u8m1(v, 2), vidx, __riscv_vsetvlmax_e8m1()), \  | 
 | 69 | +        f(__riscv_vget_u8m1(v, 3), vidx, __riscv_vsetvlmax_e8m1()), \  | 
 | 70 | +        f(__riscv_vget_u8m1(v, 4), vidx, __riscv_vsetvlmax_e8m1()), \  | 
 | 71 | +        f(__riscv_vget_u8m1(v, 5), vidx, __riscv_vsetvlmax_e8m1()), \  | 
 | 72 | +        f(__riscv_vget_u8m1(v, 6), vidx, __riscv_vsetvlmax_e8m1()), \  | 
 | 73 | +        f(__riscv_vget_u8m1(v, 7), vidx, __riscv_vsetvlmax_e8m1()))  | 
 | 74 | + | 
 | 75 | +#define RISCV_VMFLTZ(T, v, vl) __riscv_vmslt(__riscv_vreinterpret_i##T(v), 0, vl)  | 
 | 76 | + | 
 | 77 | +#endif /* INCLUDE_VOLK_VOLK_RVV_INTRINSICS_H_ */  | 
0 commit comments