Skip to content

Commit 7de15ab

Browse files
yhng3010NonerKao
authored andcommitted
Add RVV f32-dwconv2d-chw kernel
1 parent f7e7eba commit 7de15ab

27 files changed

+8971
-18
lines changed

bench/f32-dwconv2d-chw.cc

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2837,6 +2837,156 @@ BENCHMARK_DWCONV(dwconv2d_chw_5x5s2p2__wasmsimd_x86_splat_2x4_acc3)
28372837
BENCHMARK_DWCONV(dwconv2d_chw_5x5s2p2__wasmsimd_x86_splat_3x4_acc2)
28382838
#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
28392839

2840+
#if XNN_ENABLE_RISCV_VECTOR && XNN_ARCH_RISCV
2841+
static void dwconv2d_chw_3x3p1__rvv_5x1v(benchmark::State& state,
2842+
const char* net) {
2843+
f32_dwconv2d_chw(state,
2844+
xnn_f32_dwconv2d_chw_ukernel_3x3p1__rvv_5x1v,
2845+
xnn_init_f32_minmax_scalar_params,
2846+
/*kernel_height=*/3, /*kernel_width=*/3, /*padding_width=*/1,
2847+
/*stride=*/1);
2848+
}
2849+
static void dwconv2d_chw_3x3p1__rvv_6x1v(benchmark::State& state,
2850+
const char* net) {
2851+
f32_dwconv2d_chw(state,
2852+
xnn_f32_dwconv2d_chw_ukernel_3x3p1__rvv_6x1v,
2853+
xnn_init_f32_minmax_scalar_params,
2854+
/*kernel_height=*/3, /*kernel_width=*/3, /*padding_width=*/1,
2855+
/*stride=*/1);
2856+
}
2857+
static void dwconv2d_chw_3x3p1__rvv_7x1v(benchmark::State& state,
2858+
const char* net) {
2859+
f32_dwconv2d_chw(state,
2860+
xnn_f32_dwconv2d_chw_ukernel_3x3p1__rvv_7x1v,
2861+
xnn_init_f32_minmax_scalar_params,
2862+
/*kernel_height=*/3, /*kernel_width=*/3, /*padding_width=*/1,
2863+
/*stride=*/1);
2864+
}
2865+
static void dwconv2d_chw_3x3p1__rvv_8x1v(benchmark::State& state,
2866+
const char* net) {
2867+
f32_dwconv2d_chw(state,
2868+
xnn_f32_dwconv2d_chw_ukernel_3x3p1__rvv_8x1v,
2869+
xnn_init_f32_minmax_scalar_params,
2870+
/*kernel_height=*/3, /*kernel_width=*/3, /*padding_width=*/1,
2871+
/*stride=*/1);
2872+
}
2873+
static void dwconv2d_chw_3x3p1__rvv_1x2v(benchmark::State& state,
2874+
const char* net) {
2875+
f32_dwconv2d_chw(state,
2876+
xnn_f32_dwconv2d_chw_ukernel_3x3p1__rvv_1x2v,
2877+
xnn_init_f32_minmax_scalar_params,
2878+
/*kernel_height=*/3, /*kernel_width=*/3, /*padding_width=*/1,
2879+
/*stride=*/1);
2880+
}
2881+
static void dwconv2d_chw_3x3p1__rvv_2x2v(benchmark::State& state,
2882+
const char* net) {
2883+
f32_dwconv2d_chw(state,
2884+
xnn_f32_dwconv2d_chw_ukernel_3x3p1__rvv_2x2v,
2885+
xnn_init_f32_minmax_scalar_params,
2886+
/*kernel_height=*/3, /*kernel_width=*/3, /*padding_width=*/1,
2887+
/*stride=*/1);
2888+
}
2889+
static void dwconv2d_chw_3x3p1__rvv_3x2v(benchmark::State& state,
2890+
const char* net) {
2891+
f32_dwconv2d_chw(state,
2892+
xnn_f32_dwconv2d_chw_ukernel_3x3p1__rvv_3x2v,
2893+
xnn_init_f32_minmax_scalar_params,
2894+
/*kernel_height=*/3, /*kernel_width=*/3, /*padding_width=*/1,
2895+
/*stride=*/1);
2896+
}
2897+
static void dwconv2d_chw_3x3p1__rvv_4x2v(benchmark::State& state,
2898+
const char* net) {
2899+
f32_dwconv2d_chw(state,
2900+
xnn_f32_dwconv2d_chw_ukernel_3x3p1__rvv_4x2v,
2901+
xnn_init_f32_minmax_scalar_params,
2902+
/*kernel_height=*/3, /*kernel_width=*/3, /*padding_width=*/1,
2903+
/*stride=*/1);
2904+
}
2905+
2906+
static void dwconv2d_chw_3x3s2p1__rvv_5x1v(benchmark::State& state,
2907+
const char* net) {
2908+
f32_dwconv2d_chw(state,
2909+
xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__rvv_5x1v,
2910+
xnn_init_f32_minmax_scalar_params,
2911+
/*kernel_height=*/3, /*kernel_width=*/3, /*padding_width=*/1,
2912+
/*stride=*/2);
2913+
}
2914+
static void dwconv2d_chw_3x3s2p1__rvv_6x1v(benchmark::State& state,
2915+
const char* net) {
2916+
f32_dwconv2d_chw(state,
2917+
xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__rvv_6x1v,
2918+
xnn_init_f32_minmax_scalar_params,
2919+
/*kernel_height=*/3, /*kernel_width=*/3, /*padding_width=*/1,
2920+
/*stride=*/2);
2921+
}
2922+
static void dwconv2d_chw_3x3s2p1__rvv_7x1v(benchmark::State& state,
2923+
const char* net) {
2924+
f32_dwconv2d_chw(state,
2925+
xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__rvv_7x1v,
2926+
xnn_init_f32_minmax_scalar_params,
2927+
/*kernel_height=*/3, /*kernel_width=*/3, /*padding_width=*/1,
2928+
/*stride=*/2);
2929+
}
2930+
static void dwconv2d_chw_3x3s2p1__rvv_8x1v(benchmark::State& state,
2931+
const char* net) {
2932+
f32_dwconv2d_chw(state,
2933+
xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__rvv_8x1v,
2934+
xnn_init_f32_minmax_scalar_params,
2935+
/*kernel_height=*/3, /*kernel_width=*/3, /*padding_width=*/1,
2936+
/*stride=*/2);
2937+
}
2938+
static void dwconv2d_chw_3x3s2p1__rvv_1x2v(benchmark::State& state,
2939+
const char* net) {
2940+
f32_dwconv2d_chw(state,
2941+
xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__rvv_1x2v,
2942+
xnn_init_f32_minmax_scalar_params,
2943+
/*kernel_height=*/3, /*kernel_width=*/3, /*padding_width=*/1,
2944+
/*stride=*/2);
2945+
}
2946+
static void dwconv2d_chw_3x3s2p1__rvv_2x2v(benchmark::State& state,
2947+
const char* net) {
2948+
f32_dwconv2d_chw(state,
2949+
xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__rvv_2x2v,
2950+
xnn_init_f32_minmax_scalar_params,
2951+
/*kernel_height=*/3, /*kernel_width=*/3, /*padding_width=*/1,
2952+
/*stride=*/2);
2953+
}
2954+
static void dwconv2d_chw_3x3s2p1__rvv_3x2v(benchmark::State& state,
2955+
const char* net) {
2956+
f32_dwconv2d_chw(state,
2957+
xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__rvv_3x2v,
2958+
xnn_init_f32_minmax_scalar_params,
2959+
/*kernel_height=*/3, /*kernel_width=*/3, /*padding_width=*/1,
2960+
/*stride=*/2);
2961+
}
2962+
static void dwconv2d_chw_3x3s2p1__rvv_4x2v(benchmark::State& state,
2963+
const char* net) {
2964+
f32_dwconv2d_chw(state,
2965+
xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__rvv_4x2v,
2966+
xnn_init_f32_minmax_scalar_params,
2967+
/*kernel_height=*/3, /*kernel_width=*/3, /*padding_width=*/1,
2968+
/*stride=*/2);
2969+
}
2970+
2971+
BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__rvv_5x1v)
2972+
BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__rvv_6x1v)
2973+
BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__rvv_7x1v)
2974+
BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__rvv_8x1v)
2975+
BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__rvv_1x2v)
2976+
BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__rvv_2x2v)
2977+
BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__rvv_3x2v)
2978+
BENCHMARK_DWCONV(dwconv2d_chw_3x3p1__rvv_4x2v)
2979+
2980+
BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__rvv_5x1v)
2981+
BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__rvv_6x1v)
2982+
BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__rvv_7x1v)
2983+
BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__rvv_8x1v)
2984+
BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__rvv_1x2v)
2985+
BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__rvv_2x2v)
2986+
BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__rvv_3x2v)
2987+
BENCHMARK_DWCONV(dwconv2d_chw_3x3s2p1__rvv_4x2v)
2988+
#endif // XNN_ENABLE_RISCV_VECTOR && XNN_ARCH_RISCV
2989+
28402990
static void dwconv2d_chw_3x3p1__scalar_1x1(benchmark::State& state,
28412991
const char* net) {
28422992
f32_dwconv2d_chw(state, xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1,

cmake/gen/rvv_microkernels.cmake

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ SET(PROD_RVV_MICROKERNEL_SRCS
2020
src/f32-dwconv/gen/f32-dwconv-9p8vc-rvv.c
2121
src/f32-dwconv/gen/f32-dwconv-25p8vc-minmax-rvv.c
2222
src/f32-dwconv/gen/f32-dwconv-25p8vc-rvv.c
23+
src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3p1-minmax-rvv-7x1v.c
24+
src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3s2p1-minmax-rvv-2x2v.c
2325
src/f32-gemm/gen/f32-gemm-1x4v-minmax-rvv.c
2426
src/f32-gemm/gen/f32-gemm-7x4v-minmax-rvv.c
2527
src/f32-igemm/gen/f32-igemm-1x4v-minmax-rvv.c
@@ -103,6 +105,20 @@ SET(PROD_RVV_MICROKERNEL_SRCS
103105
SET(NON_PROD_RVV_MICROKERNEL_SRCS
104106
src/f32-conv-hwc2chw/f32-conv-hwc2chw-3x3s2p1c3x2v-rvv-1x1.c
105107
src/f32-conv-hwc2chw/f32-conv-hwc2chw-3x3s2p1c3x2v-rvv-2x1.c
108+
src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3p1-minmax-rvv-1x2v.c
109+
src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3p1-minmax-rvv-2x2v.c
110+
src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3p1-minmax-rvv-3x2v.c
111+
src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3p1-minmax-rvv-4x2v.c
112+
src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3p1-minmax-rvv-5x1v.c
113+
src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3p1-minmax-rvv-6x1v.c
114+
src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3p1-minmax-rvv-8x1v.c
115+
src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3s2p1-minmax-rvv-1x2v.c
116+
src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3s2p1-minmax-rvv-3x2v.c
117+
src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3s2p1-minmax-rvv-4x2v.c
118+
src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3s2p1-minmax-rvv-5x1v.c
119+
src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3s2p1-minmax-rvv-6x1v.c
120+
src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3s2p1-minmax-rvv-7x1v.c
121+
src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3s2p1-minmax-rvv-8x1v.c
106122
src/f32-gemm/gen/f32-gemm-1x4v-relu-rvv.c
107123
src/f32-gemm/gen/f32-gemm-1x4v-rvv.c
108124
src/f32-gemm/gen/f32-gemm-7x4v-relu-rvv.c

gen/rvv_microkernels.bzl

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ PROD_RVV_MICROKERNEL_SRCS = [
1616
"src/f32-dwconv/gen/f32-dwconv-9p8vc-rvv.c",
1717
"src/f32-dwconv/gen/f32-dwconv-25p8vc-minmax-rvv.c",
1818
"src/f32-dwconv/gen/f32-dwconv-25p8vc-rvv.c",
19+
"src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3p1-minmax-rvv-7x1v.c",
20+
"src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3s2p1-minmax-rvv-2x2v.c",
1921
"src/f32-gemm/gen/f32-gemm-1x4v-minmax-rvv.c",
2022
"src/f32-gemm/gen/f32-gemm-7x4v-minmax-rvv.c",
2123
"src/f32-igemm/gen/f32-igemm-1x4v-minmax-rvv.c",
@@ -100,6 +102,20 @@ PROD_RVV_MICROKERNEL_SRCS = [
100102
NON_PROD_RVV_MICROKERNEL_SRCS = [
101103
"src/f32-conv-hwc2chw/f32-conv-hwc2chw-3x3s2p1c3x2v-rvv-1x1.c",
102104
"src/f32-conv-hwc2chw/f32-conv-hwc2chw-3x3s2p1c3x2v-rvv-2x1.c",
105+
"src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3p1-minmax-rvv-1x2v.c",
106+
"src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3p1-minmax-rvv-2x2v.c",
107+
"src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3p1-minmax-rvv-3x2v.c",
108+
"src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3p1-minmax-rvv-4x2v.c",
109+
"src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3p1-minmax-rvv-5x1v.c",
110+
"src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3p1-minmax-rvv-6x1v.c",
111+
"src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3p1-minmax-rvv-8x1v.c",
112+
"src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3s2p1-minmax-rvv-1x2v.c",
113+
"src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3s2p1-minmax-rvv-3x2v.c",
114+
"src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3s2p1-minmax-rvv-4x2v.c",
115+
"src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3s2p1-minmax-rvv-5x1v.c",
116+
"src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3s2p1-minmax-rvv-6x1v.c",
117+
"src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3s2p1-minmax-rvv-7x1v.c",
118+
"src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3s2p1-minmax-rvv-8x1v.c",
103119
"src/f32-gemm/gen/f32-gemm-1x4v-relu-rvv.c",
104120
"src/f32-gemm/gen/f32-gemm-1x4v-rvv.c",
105121
"src/f32-gemm/gen/f32-gemm-7x4v-relu-rvv.c",

scripts/generate-f32-dwconv2d-chw.sh

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -407,4 +407,23 @@ tools/xngen src/f32-dwconv2d-chw/5x5s2p2-wasmsimd-splat.c.in -D ROW_TILE=2 -D AC
407407
tools/xngen src/f32-dwconv2d-chw/5x5s2p2-wasmsimd-splat.c.in -D ROW_TILE=2 -D ACCUMULATORS=3 -D X86=1 -o src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-x86-splat-2x4-acc3.c &
408408
tools/xngen src/f32-dwconv2d-chw/5x5s2p2-wasmsimd-splat.c.in -D ROW_TILE=3 -D ACCUMULATORS=2 -D X86=1 -o src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-x86-splat-3x4-acc2.c &
409409

410+
################################## RISC-V RVV #################################
411+
tools/xngen src/f32-dwconv2d-chw/3x3p1-rvv.c.in -D ROW_TILE=5 -D COL_TILE=m1 -D ACCUMULATORS=1 -o src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3p1-minmax-rvv-5x1v.c &
412+
tools/xngen src/f32-dwconv2d-chw/3x3p1-rvv.c.in -D ROW_TILE=6 -D COL_TILE=m1 -D ACCUMULATORS=1 -o src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3p1-minmax-rvv-6x1v.c &
413+
tools/xngen src/f32-dwconv2d-chw/3x3p1-rvv.c.in -D ROW_TILE=7 -D COL_TILE=m1 -D ACCUMULATORS=1 -o src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3p1-minmax-rvv-7x1v.c &
414+
tools/xngen src/f32-dwconv2d-chw/3x3p1-rvv.c.in -D ROW_TILE=8 -D COL_TILE=m1 -D ACCUMULATORS=1 -o src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3p1-minmax-rvv-8x1v.c &
415+
tools/xngen src/f32-dwconv2d-chw/3x3p1-rvv.c.in -D ROW_TILE=1 -D COL_TILE=m2 -D ACCUMULATORS=1 -o src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3p1-minmax-rvv-1x2v.c &
416+
tools/xngen src/f32-dwconv2d-chw/3x3p1-rvv.c.in -D ROW_TILE=2 -D COL_TILE=m2 -D ACCUMULATORS=1 -o src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3p1-minmax-rvv-2x2v.c &
417+
tools/xngen src/f32-dwconv2d-chw/3x3p1-rvv.c.in -D ROW_TILE=3 -D COL_TILE=m2 -D ACCUMULATORS=1 -o src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3p1-minmax-rvv-3x2v.c &
418+
tools/xngen src/f32-dwconv2d-chw/3x3p1-rvv.c.in -D ROW_TILE=4 -D COL_TILE=m2 -D ACCUMULATORS=1 -o src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3p1-minmax-rvv-4x2v.c &
419+
420+
tools/xngen src/f32-dwconv2d-chw/3x3s2p1-rvv.c.in -D ROW_TILE=5 -D COL_TILE=m1 -D ACCUMULATORS=1 -o src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3s2p1-minmax-rvv-5x1v.c &
421+
tools/xngen src/f32-dwconv2d-chw/3x3s2p1-rvv.c.in -D ROW_TILE=6 -D COL_TILE=m1 -D ACCUMULATORS=1 -o src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3s2p1-minmax-rvv-6x1v.c &
422+
tools/xngen src/f32-dwconv2d-chw/3x3s2p1-rvv.c.in -D ROW_TILE=7 -D COL_TILE=m1 -D ACCUMULATORS=1 -o src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3s2p1-minmax-rvv-7x1v.c &
423+
tools/xngen src/f32-dwconv2d-chw/3x3s2p1-rvv.c.in -D ROW_TILE=8 -D COL_TILE=m1 -D ACCUMULATORS=1 -o src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3s2p1-minmax-rvv-8x1v.c &
424+
tools/xngen src/f32-dwconv2d-chw/3x3s2p1-rvv.c.in -D ROW_TILE=1 -D COL_TILE=m2 -D ACCUMULATORS=1 -o src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3s2p1-minmax-rvv-1x2v.c &
425+
tools/xngen src/f32-dwconv2d-chw/3x3s2p1-rvv.c.in -D ROW_TILE=2 -D COL_TILE=m2 -D ACCUMULATORS=1 -o src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3s2p1-minmax-rvv-2x2v.c &
426+
tools/xngen src/f32-dwconv2d-chw/3x3s2p1-rvv.c.in -D ROW_TILE=3 -D COL_TILE=m2 -D ACCUMULATORS=1 -o src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3s2p1-minmax-rvv-3x2v.c &
427+
tools/xngen src/f32-dwconv2d-chw/3x3s2p1-rvv.c.in -D ROW_TILE=4 -D COL_TILE=m2 -D ACCUMULATORS=1 -o src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3s2p1-minmax-rvv-4x2v.c &
428+
410429
wait

src/configs/dwconv2d-chw-config.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,23 @@ static void init_f32_dwconv2d_chw_config(void) {
189189
f32_dwconv2d_chw_config.dwconv2d_chw_5x5.init.f32 = xnn_init_f32_minmax_scalar_params;
190190
f32_dwconv2d_chw_config.dwconv2d_chw_5x5.output_width_tile = 1;
191191

192+
f32_dwconv2d_chw_config.dwconv2d_chw_5x5s2.ukernel = (xnn_dwconv2d_chw_ukernel_fn) xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5;
193+
f32_dwconv2d_chw_config.dwconv2d_chw_5x5s2.init.f32 = xnn_init_f32_minmax_scalar_params;
194+
f32_dwconv2d_chw_config.dwconv2d_chw_5x5s2.output_width_tile = 1;
195+
#elif XNN_ARCH_RISCV && XNN_ENABLE_RISCV_VECTOR
196+
const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
197+
f32_dwconv2d_chw_config.dwconv2d_chw_3x3.ukernel = (xnn_dwconv2d_chw_ukernel_fn) xnn_f32_dwconv2d_chw_ukernel_3x3p1__rvv_7x1v;
198+
f32_dwconv2d_chw_config.dwconv2d_chw_3x3.init.f32 = xnn_init_f32_minmax_scalar_params;
199+
f32_dwconv2d_chw_config.dwconv2d_chw_3x3.output_width_tile = 1 * hardware_config->vlenb / sizeof(float);
200+
201+
f32_dwconv2d_chw_config.dwconv2d_chw_3x3s2.ukernel = (xnn_dwconv2d_chw_ukernel_fn) xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__rvv_2x2v;
202+
f32_dwconv2d_chw_config.dwconv2d_chw_3x3s2.init.f32 = xnn_init_f32_minmax_scalar_params;
203+
f32_dwconv2d_chw_config.dwconv2d_chw_3x3s2.output_width_tile = 2 * hardware_config->vlenb / sizeof(float);
204+
205+
f32_dwconv2d_chw_config.dwconv2d_chw_5x5.ukernel = (xnn_dwconv2d_chw_ukernel_fn) xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5;
206+
f32_dwconv2d_chw_config.dwconv2d_chw_5x5.init.f32 = xnn_init_f32_minmax_scalar_params;
207+
f32_dwconv2d_chw_config.dwconv2d_chw_5x5.output_width_tile = 1;
208+
192209
f32_dwconv2d_chw_config.dwconv2d_chw_5x5s2.ukernel = (xnn_dwconv2d_chw_ukernel_fn) xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5;
193210
f32_dwconv2d_chw_config.dwconv2d_chw_5x5s2.init.f32 = xnn_init_f32_minmax_scalar_params;
194211
f32_dwconv2d_chw_config.dwconv2d_chw_5x5s2.output_width_tile = 1;

0 commit comments

Comments
 (0)