From 60ea4c8f8689a58c321fc0f1f8a18e83794bfc7f Mon Sep 17 00:00:00 2001 From: RahulSudarMCW Date: Fri, 25 Oct 2024 10:33:23 +0530 Subject: [PATCH] Replace vscaleextexp yaml with header table - Include table header and remove yaml file --- BUILD.bazel | 1 + scripts/generate-tests.sh | 2 +- src/f32-vscaleextexp/f32-vscaleextexp.h | 52 ++ test/f32-vscaleextexp.cc | 896 +----------------------- test/f32-vscaleextexp.yaml | 31 - test/vscaleextexp-microkernel-tester.h | 29 + tools/generate-vscaleextexp-test.py | 116 +-- 7 files changed, 125 insertions(+), 1002 deletions(-) create mode 100644 src/f32-vscaleextexp/f32-vscaleextexp.h delete mode 100644 test/f32-vscaleextexp.yaml diff --git a/BUILD.bazel b/BUILD.bazel index b8525a1e36c..fb28be8c0d0 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -151,6 +151,7 @@ MICROKERNEL_DEFS = [ "src/f32-vrnd/f32-vrndu.h", "src/f32-vrnd/f32-vrndz.h", "src/f32-vrsqrt/f32-vrsqrt.h", + "src/f32-vscaleextexp/f32-vscaleextexp.h", "src/f32-vsigmoid/f32-vsigmoid.h", "src/f32-vsqr/f32-vsqr.h", "src/f32-vsqrt/f32-vsqrt.h", diff --git a/scripts/generate-tests.sh b/scripts/generate-tests.sh index 75fc425003e..ae0ca1f6a93 100755 --- a/scripts/generate-tests.sh +++ b/scripts/generate-tests.sh @@ -273,7 +273,7 @@ tools/generate-raddstoreexpminusmax-test.py --spec test/f16-raddstoreexpminusmax tools/generate-raddstoreexpminusmax-test.py --spec test/f32-raddstoreexpminusmax.yaml --output test/f32-raddstoreexpminusmax.cc & ### Tests for VScaleExtExp micro-kernels -tools/generate-vscaleextexp-test.py --spec test/f32-vscaleextexp.yaml --output test/f32-vscaleextexp.cc & +tools/generate-vscaleextexp-test.py --tester VScaleExtExpMicrokernelTester --ukernel f32-vscaleextexp --output test/f32-vscaleextexp.cc & ### Tests for VScaleExpMinusMax micro-kernels tools/generate-vscaleexpminusmax-test.py --spec test/f32-vscaleexpminusmax.yaml --output test/f32-vscaleexpminusmax.cc & diff --git a/src/f32-vscaleextexp/f32-vscaleextexp.h b/src/f32-vscaleextexp/f32-vscaleextexp.h new file mode 100644 index 00000000000..eecada7b27b --- /dev/null +++ b/src/f32-vscaleextexp/f32-vscaleextexp.h @@ -0,0 +1,52 @@ +// Copyright 2023 Google LLC +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. +#ifndef XNN_UKERNEL_WITH_PARAMS +#define XNN_UKERNEL_WITH_PARAMS(arch_flags, ukernel, element_tile, datatype, params_type, init_params) \ + XNN_UKERNEL(arch_flags, ukernel, element_tile, datatype) +#define XNN_DEFINED_UKERNEL_WITH_PARAMS +#endif +#ifndef XNN_UKERNEL +#define XNN_UKERNEL(arch_flags, ukernel, element_tile, datatype) \ + XNN_UKERNEL_WITH_PARAMS(arch_flags, ukernel, element_tile, datatype, void, /*init_params=*/nullptr) +#define XNN_DEFINED_UKERNEL +#endif +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f32_vscaleextexp_ukernel__avx2_p5_u8, 8, float, struct xnn_f32_default_params, ((xnn_f32_vscaleextexp_ukernel_fn) NULL)) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f32_vscaleextexp_ukernel__avx2_p5_u16, 16, float, struct xnn_f32_default_params, ((xnn_f32_vscaleextexp_ukernel_fn) NULL)) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f32_vscaleextexp_ukernel__avx2_p5_u24, 24, float, struct xnn_f32_default_params, ((xnn_f32_vscaleextexp_ukernel_fn) NULL)) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f32_vscaleextexp_ukernel__avx2_p5_u32, 32, float, struct xnn_f32_default_params, ((xnn_f32_vscaleextexp_ukernel_fn) NULL)) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f32_vscaleextexp_ukernel__avx2_p5_u40, 40, float, struct xnn_f32_default_params, ((xnn_f32_vscaleextexp_ukernel_fn) NULL)) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f32_vscaleextexp_ukernel__avx2_p5_u48, 48, float, struct xnn_f32_default_params, ((xnn_f32_vscaleextexp_ukernel_fn) NULL)) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f32_vscaleextexp_ukernel__avx2_p5_u56, 56, float, struct xnn_f32_default_params, ((xnn_f32_vscaleextexp_ukernel_fn) NULL)) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f32_vscaleextexp_ukernel__avx2_p5_u64, 64, float, struct xnn_f32_default_params, ((xnn_f32_vscaleextexp_ukernel_fn) NULL)) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f32_vscaleextexp_ukernel__avx2_p5_u72, 72, float, struct xnn_f32_default_params, ((xnn_f32_vscaleextexp_ukernel_fn) NULL)) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f32_vscaleextexp_ukernel__avx2_p5_u80, 80, float, struct xnn_f32_default_params, ((xnn_f32_vscaleextexp_ukernel_fn) NULL)) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f32_vscaleextexp_ukernel__avx2_p5_u88, 88, float, struct xnn_f32_default_params, ((xnn_f32_vscaleextexp_ukernel_fn) NULL)) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f32_vscaleextexp_ukernel__avx2_p5_u96, 96, float, struct xnn_f32_default_params, ((xnn_f32_vscaleextexp_ukernel_fn) NULL)) +#endif //XNN_ENABLE_XNN_ARCH_X86 + +#if XNN_ENABLE_AVX512F_U32 && (XNN_ARCH_X86 || XNN_ARCH_X86_64) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx512f, xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u16, 16, float, struct xnn_f32_default_params, ((xnn_f32_vscaleextexp_ukernel_fn) NULL)) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx512f, xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u32, 32, float, struct xnn_f32_default_params, ((xnn_f32_vscaleextexp_ukernel_fn) NULL)) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx512f, xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u48, 48, float, struct xnn_f32_default_params, ((xnn_f32_vscaleextexp_ukernel_fn) NULL)) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx512f, xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u64, 64, float, struct xnn_f32_default_params, ((xnn_f32_vscaleextexp_ukernel_fn) NULL)) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx512f, xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u80, 80, float, struct xnn_f32_default_params, ((xnn_f32_vscaleextexp_ukernel_fn) NULL)) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx512f, xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u96, 96, float, struct xnn_f32_default_params, ((xnn_f32_vscaleextexp_ukernel_fn) NULL)) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx512f, xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u112, 112, float, struct xnn_f32_default_params, ((xnn_f32_vscaleextexp_ukernel_fn) NULL)) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx512f, xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u128, 128, float, struct xnn_f32_default_params, ((xnn_f32_vscaleextexp_ukernel_fn) NULL)) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx512f, xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u144, 144, float, struct xnn_f32_default_params, ((xnn_f32_vscaleextexp_ukernel_fn) NULL)) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx512f, xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u160, 160, float, struct xnn_f32_default_params, ((xnn_f32_vscaleextexp_ukernel_fn) NULL)) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx512f, xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u176, 176, float, struct xnn_f32_default_params, ((xnn_f32_vscaleextexp_ukernel_fn) NULL)) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx512f, xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u192, 192, float, struct xnn_f32_default_params, ((xnn_f32_vscaleextexp_ukernel_fn) NULL)) +#endif //XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) + +#ifdef XNN_DEFINED_UKERNEL_WITH_PARAMS +#undef XNN_DEFINED_UKERNEL_WITH_PARAMS +#undef XNN_UKERNEL_WITH_PARAMS +#endif +#ifdef XNN_DEFINED_UKERNEL +#undef XNN_DEFINED_UKERNEL +#undef XNN_UKERNEL +#endif diff --git a/test/f32-vscaleextexp.cc b/test/f32-vscaleextexp.cc index a289a14f7ba..0f315d5610b 100644 --- a/test/f32-vscaleextexp.cc +++ b/test/f32-vscaleextexp.cc @@ -4,7 +4,7 @@ // LICENSE file in the root directory of this source tree. // // Auto-generated file. Do not edit! -// Specification: test/f32-vscaleextexp.yaml +// Specification: f32-vscaleextexp // Generator: tools/generate-vscaleextexp-test.py @@ -14,890 +14,10 @@ #include "xnnpack/vscaleextexp.h" #include "vscaleextexp-microkernel-tester.h" - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_VSCALEEXTEXP__AVX2_P5_U8, elements_eq_8) { - TEST_REQUIRES_X86_AVX2; - VScaleExtExpMicrokernelTester() - .elements(8) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u8); - } - - TEST(F32_VSCALEEXTEXP__AVX2_P5_U8, elements_div_8) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 16; elements < 80; elements += 8) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u8); - } - } - - TEST(F32_VSCALEEXTEXP__AVX2_P5_U8, elements_lt_8) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 8; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u8); - } - } - - TEST(F32_VSCALEEXTEXP__AVX2_P5_U8, elements_gt_8) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 9; elements < 16; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u8); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_VSCALEEXTEXP__AVX2_P5_U16, elements_eq_16) { - TEST_REQUIRES_X86_AVX2; - VScaleExtExpMicrokernelTester() - .elements(16) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u16); - } - - TEST(F32_VSCALEEXTEXP__AVX2_P5_U16, elements_div_16) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 32; elements < 160; elements += 16) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u16); - } - } - - TEST(F32_VSCALEEXTEXP__AVX2_P5_U16, elements_lt_16) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 16; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u16); - } - } - - TEST(F32_VSCALEEXTEXP__AVX2_P5_U16, elements_gt_16) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 17; elements < 32; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u16); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_VSCALEEXTEXP__AVX2_P5_U24, elements_eq_24) { - TEST_REQUIRES_X86_AVX2; - VScaleExtExpMicrokernelTester() - .elements(24) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u24); - } - - TEST(F32_VSCALEEXTEXP__AVX2_P5_U24, elements_div_24) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 48; elements < 240; elements += 24) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u24); - } - } - - TEST(F32_VSCALEEXTEXP__AVX2_P5_U24, elements_lt_24) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 24; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u24); - } - } - - TEST(F32_VSCALEEXTEXP__AVX2_P5_U24, elements_gt_24) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 25; elements < 48; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u24); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_VSCALEEXTEXP__AVX2_P5_U32, elements_eq_32) { - TEST_REQUIRES_X86_AVX2; - VScaleExtExpMicrokernelTester() - .elements(32) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u32); - } - - TEST(F32_VSCALEEXTEXP__AVX2_P5_U32, elements_div_32) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 64; elements < 320; elements += 32) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u32); - } - } - - TEST(F32_VSCALEEXTEXP__AVX2_P5_U32, elements_lt_32) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 32; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u32); - } - } - - TEST(F32_VSCALEEXTEXP__AVX2_P5_U32, elements_gt_32) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 33; elements < 64; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u32); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_VSCALEEXTEXP__AVX2_P5_U40, elements_eq_40) { - TEST_REQUIRES_X86_AVX2; - VScaleExtExpMicrokernelTester() - .elements(40) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u40); - } - - TEST(F32_VSCALEEXTEXP__AVX2_P5_U40, elements_div_40) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 80; elements < 400; elements += 40) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u40); - } - } - - TEST(F32_VSCALEEXTEXP__AVX2_P5_U40, elements_lt_40) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 40; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u40); - } - } - - TEST(F32_VSCALEEXTEXP__AVX2_P5_U40, elements_gt_40) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 41; elements < 80; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u40); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_VSCALEEXTEXP__AVX2_P5_U48, elements_eq_48) { - TEST_REQUIRES_X86_AVX2; - VScaleExtExpMicrokernelTester() - .elements(48) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u48); - } - - TEST(F32_VSCALEEXTEXP__AVX2_P5_U48, elements_div_48) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 96; elements < 480; elements += 48) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u48); - } - } - - TEST(F32_VSCALEEXTEXP__AVX2_P5_U48, elements_lt_48) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 48; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u48); - } - } - - TEST(F32_VSCALEEXTEXP__AVX2_P5_U48, elements_gt_48) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 49; elements < 96; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u48); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_VSCALEEXTEXP__AVX2_P5_U56, elements_eq_56) { - TEST_REQUIRES_X86_AVX2; - VScaleExtExpMicrokernelTester() - .elements(56) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u56); - } - - TEST(F32_VSCALEEXTEXP__AVX2_P5_U56, elements_div_56) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 112; elements < 560; elements += 56) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u56); - } - } - - TEST(F32_VSCALEEXTEXP__AVX2_P5_U56, elements_lt_56) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 56; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u56); - } - } - - TEST(F32_VSCALEEXTEXP__AVX2_P5_U56, elements_gt_56) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 57; elements < 112; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u56); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_VSCALEEXTEXP__AVX2_P5_U64, elements_eq_64) { - TEST_REQUIRES_X86_AVX2; - VScaleExtExpMicrokernelTester() - .elements(64) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u64); - } - - TEST(F32_VSCALEEXTEXP__AVX2_P5_U64, elements_div_64) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 128; elements < 640; elements += 64) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u64); - } - } - - TEST(F32_VSCALEEXTEXP__AVX2_P5_U64, elements_lt_64) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 64; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u64); - } - } - - TEST(F32_VSCALEEXTEXP__AVX2_P5_U64, elements_gt_64) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 65; elements < 128; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u64); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_VSCALEEXTEXP__AVX2_P5_U72, elements_eq_72) { - TEST_REQUIRES_X86_AVX2; - VScaleExtExpMicrokernelTester() - .elements(72) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u72); - } - - TEST(F32_VSCALEEXTEXP__AVX2_P5_U72, elements_div_72) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 144; elements < 720; elements += 72) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u72); - } - } - - TEST(F32_VSCALEEXTEXP__AVX2_P5_U72, elements_lt_72) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 72; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u72); - } - } - - TEST(F32_VSCALEEXTEXP__AVX2_P5_U72, elements_gt_72) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 73; elements < 144; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u72); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_VSCALEEXTEXP__AVX2_P5_U80, elements_eq_80) { - TEST_REQUIRES_X86_AVX2; - VScaleExtExpMicrokernelTester() - .elements(80) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u80); - } - - TEST(F32_VSCALEEXTEXP__AVX2_P5_U80, elements_div_80) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 160; elements < 800; elements += 80) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u80); - } - } - - TEST(F32_VSCALEEXTEXP__AVX2_P5_U80, elements_lt_80) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 80; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u80); - } - } - - TEST(F32_VSCALEEXTEXP__AVX2_P5_U80, elements_gt_80) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 81; elements < 160; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u80); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_VSCALEEXTEXP__AVX2_P5_U88, elements_eq_88) { - TEST_REQUIRES_X86_AVX2; - VScaleExtExpMicrokernelTester() - .elements(88) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u88); - } - - TEST(F32_VSCALEEXTEXP__AVX2_P5_U88, elements_div_88) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 176; elements < 880; elements += 88) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u88); - } - } - - TEST(F32_VSCALEEXTEXP__AVX2_P5_U88, elements_lt_88) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 88; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u88); - } - } - - TEST(F32_VSCALEEXTEXP__AVX2_P5_U88, elements_gt_88) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 89; elements < 176; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u88); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_VSCALEEXTEXP__AVX2_P5_U96, elements_eq_96) { - TEST_REQUIRES_X86_AVX2; - VScaleExtExpMicrokernelTester() - .elements(96) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u96); - } - - TEST(F32_VSCALEEXTEXP__AVX2_P5_U96, elements_div_96) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 192; elements < 960; elements += 96) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u96); - } - } - - TEST(F32_VSCALEEXTEXP__AVX2_P5_U96, elements_lt_96) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 96; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u96); - } - } - - TEST(F32_VSCALEEXTEXP__AVX2_P5_U96, elements_gt_96) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 97; elements < 192; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx2_p5_u96); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U16, elements_eq_16) { - TEST_REQUIRES_X86_AVX512F; - VScaleExtExpMicrokernelTester() - .elements(16) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u16); - } - - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U16, elements_div_16) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 32; elements < 160; elements += 16) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u16); - } - } - - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U16, elements_lt_16) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 1; elements < 16; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u16); - } - } - - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U16, elements_gt_16) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 17; elements < 32; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u16); - } - } -#endif // XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - - -#if XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U32, elements_eq_32) { - TEST_REQUIRES_X86_AVX512F; - VScaleExtExpMicrokernelTester() - .elements(32) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u32); - } - - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U32, elements_div_32) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 64; elements < 320; elements += 32) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u32); - } - } - - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U32, elements_lt_32) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 1; elements < 32; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u32); - } - } - - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U32, elements_gt_32) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 33; elements < 64; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u32); - } - } -#endif // XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - - -#if XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U48, elements_eq_48) { - TEST_REQUIRES_X86_AVX512F; - VScaleExtExpMicrokernelTester() - .elements(48) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u48); - } - - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U48, elements_div_48) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 96; elements < 480; elements += 48) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u48); - } - } - - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U48, elements_lt_48) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 1; elements < 48; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u48); - } - } - - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U48, elements_gt_48) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 49; elements < 96; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u48); - } - } -#endif // XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - - -#if XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U64, elements_eq_64) { - TEST_REQUIRES_X86_AVX512F; - VScaleExtExpMicrokernelTester() - .elements(64) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u64); - } - - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U64, elements_div_64) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 128; elements < 640; elements += 64) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u64); - } - } - - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U64, elements_lt_64) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 1; elements < 64; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u64); - } - } - - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U64, elements_gt_64) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 65; elements < 128; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u64); - } - } -#endif // XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - - -#if XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U80, elements_eq_80) { - TEST_REQUIRES_X86_AVX512F; - VScaleExtExpMicrokernelTester() - .elements(80) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u80); - } - - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U80, elements_div_80) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 160; elements < 800; elements += 80) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u80); - } - } - - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U80, elements_lt_80) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 1; elements < 80; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u80); - } - } - - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U80, elements_gt_80) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 81; elements < 160; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u80); - } - } -#endif // XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - - -#if XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U96, elements_eq_96) { - TEST_REQUIRES_X86_AVX512F; - VScaleExtExpMicrokernelTester() - .elements(96) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u96); - } - - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U96, elements_div_96) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 192; elements < 960; elements += 96) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u96); - } - } - - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U96, elements_lt_96) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 1; elements < 96; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u96); - } - } - - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U96, elements_gt_96) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 97; elements < 192; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u96); - } - } -#endif // XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - - -#if XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U112, elements_eq_112) { - TEST_REQUIRES_X86_AVX512F; - VScaleExtExpMicrokernelTester() - .elements(112) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u112); - } - - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U112, elements_div_112) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 224; elements < 1120; elements += 112) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u112); - } - } - - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U112, elements_lt_112) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 1; elements < 112; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u112); - } - } - - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U112, elements_gt_112) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 113; elements < 224; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u112); - } - } -#endif // XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - - -#if XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U128, elements_eq_128) { - TEST_REQUIRES_X86_AVX512F; - VScaleExtExpMicrokernelTester() - .elements(128) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u128); - } - - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U128, elements_div_128) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 256; elements < 1280; elements += 128) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u128); - } - } - - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U128, elements_lt_128) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 1; elements < 128; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u128); - } - } - - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U128, elements_gt_128) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 129; elements < 256; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u128); - } - } -#endif // XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - - -#if XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U144, elements_eq_144) { - TEST_REQUIRES_X86_AVX512F; - VScaleExtExpMicrokernelTester() - .elements(144) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u144); - } - - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U144, elements_div_144) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 288; elements < 1440; elements += 144) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u144); - } - } - - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U144, elements_lt_144) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 1; elements < 144; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u144); - } - } - - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U144, elements_gt_144) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 145; elements < 288; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u144); - } - } -#endif // XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - - -#if XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U160, elements_eq_160) { - TEST_REQUIRES_X86_AVX512F; - VScaleExtExpMicrokernelTester() - .elements(160) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u160); - } - - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U160, elements_div_160) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 320; elements < 1600; elements += 160) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u160); - } - } - - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U160, elements_lt_160) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 1; elements < 160; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u160); - } - } - - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U160, elements_gt_160) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 161; elements < 320; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u160); - } - } -#endif // XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - - -#if XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U176, elements_eq_176) { - TEST_REQUIRES_X86_AVX512F; - VScaleExtExpMicrokernelTester() - .elements(176) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u176); - } - - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U176, elements_div_176) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 352; elements < 1760; elements += 176) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u176); - } - } - - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U176, elements_lt_176) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 1; elements < 176; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u176); - } - } - - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U176, elements_gt_176) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 177; elements < 352; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u176); - } - } -#endif // XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - - -#if XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U192, elements_eq_192) { - TEST_REQUIRES_X86_AVX512F; - VScaleExtExpMicrokernelTester() - .elements(192) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u192); - } - - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U192, elements_div_192) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 384; elements < 1920; elements += 192) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u192); - } - } - - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U192, elements_lt_192) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 1; elements < 192; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u192); - } - } - - TEST(F32_VSCALEEXTEXP__AVX512F_P5_SCALEF_U192, elements_gt_192) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 193; elements < 384; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u192); - } - } -#endif // XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) +#define XNN_UKERNEL_WITH_PARAMS(arch_flags, ukernel, element_tile, datatype, params_type, init_params) \ + XNN_TEST_VSCALEEXTEXP_ELEMENT_EQ(ukernel, arch_flags, element_tile, init_params); \ + XNN_TEST_VSCALEEXTEXP_ELEMENT_DIV(ukernel, arch_flags, element_tile, init_params); \ + XNN_TEST_VSCALEEXTEXP_ELEMENT_LT(ukernel, arch_flags, element_tile, init_params); \ + XNN_TEST_VSCALEEXTEXP_ELEMENT_GT(ukernel, arch_flags, element_tile, init_params); +#include "f32-vscaleextexp/f32-vscaleextexp.h" +#undef XNN_UKERNEL_WITH_PARAMS diff --git a/test/f32-vscaleextexp.yaml b/test/f32-vscaleextexp.yaml deleted file mode 100644 index f3340407cb4..00000000000 --- a/test/f32-vscaleextexp.yaml +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright 2019 Google LLC -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -# x86 AVX -- name: xnn_f32_vscaleextexp_ukernel__avx2_p5_u8 -- name: xnn_f32_vscaleextexp_ukernel__avx2_p5_u16 -- name: xnn_f32_vscaleextexp_ukernel__avx2_p5_u24 -- name: xnn_f32_vscaleextexp_ukernel__avx2_p5_u32 -- name: xnn_f32_vscaleextexp_ukernel__avx2_p5_u40 -- name: xnn_f32_vscaleextexp_ukernel__avx2_p5_u48 -- name: xnn_f32_vscaleextexp_ukernel__avx2_p5_u56 -- name: xnn_f32_vscaleextexp_ukernel__avx2_p5_u64 -- name: xnn_f32_vscaleextexp_ukernel__avx2_p5_u72 -- name: xnn_f32_vscaleextexp_ukernel__avx2_p5_u80 -- name: xnn_f32_vscaleextexp_ukernel__avx2_p5_u88 -- name: xnn_f32_vscaleextexp_ukernel__avx2_p5_u96 -# x86 AVX512 -- name: xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u16 -- name: xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u32 -- name: xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u48 -- name: xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u64 -- name: xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u80 -- name: xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u96 -- name: xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u112 -- name: xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u128 -- name: xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u144 -- name: xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u160 -- name: xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u176 -- name: xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_u192 diff --git a/test/vscaleextexp-microkernel-tester.h b/test/vscaleextexp-microkernel-tester.h index 2181a0a2166..864001bd5a1 100644 --- a/test/vscaleextexp-microkernel-tester.h +++ b/test/vscaleextexp-microkernel-tester.h @@ -86,3 +86,32 @@ class VScaleExtExpMicrokernelTester { size_t elements_{1}; size_t iterations_{15}; }; + + +#define XNN_TEST_VSCALEEXTEXP_ELEMENT_EQ(ukernel, arch_flags, element_tile, ...) \ + TEST(ukernel, element_eq) \ + { \ + VScaleExtExpMicrokernelTester().elements(element_tile).Test(ukernel); \ + } +#define XNN_TEST_VSCALEEXTEXP_ELEMENT_GT(ukernel, arch_flags, element_tile, ...) \ + TEST(ukernel, element_gt) \ + { \ + for (size_t element_size = element_tile + 1; element_size < ((element_tile == 1) ? 10 : element_tile * 2); \ + element_size++) { \ + VScaleExtExpMicrokernelTester().elements(element_size).Test(ukernel); \ + } \ + } +#define XNN_TEST_VSCALEEXTEXP_ELEMENT_LT(ukernel, arch_flags, element_tile, ...) \ + TEST(ukernel, element_lt) \ + { \ + for (size_t element_size = 1; element_size < element_tile; element_size++) { \ + VScaleExtExpMicrokernelTester().elements(element_size).Test(ukernel); \ + } \ + } +#define XNN_TEST_VSCALEEXTEXP_ELEMENT_DIV(ukernel, arch_flags, element_tile, ...) \ + TEST(ukernel, element_div) \ + { \ + for (size_t element_size = 2 * element_tile; element_size < 10 * element_tile; element_size += element_tile) { \ + VScaleExtExpMicrokernelTester().elements(element_size).Test(ukernel); \ + } \ + } diff --git a/tools/generate-vscaleextexp-test.py b/tools/generate-vscaleextexp-test.py index 9e3bfff92b1..53ea19d7a6f 100755 --- a/tools/generate-vscaleextexp-test.py +++ b/tools/generate-vscaleextexp-test.py @@ -19,8 +19,11 @@ parser = argparse.ArgumentParser( description='Vector ScaleExtExp microkernel test generator') -parser.add_argument("-s", "--spec", metavar="FILE", required=True, - help="Specification (YAML) file") +parser.add_argument("-t", "--tester", metavar="TESTER", required=True, + choices=["VScaleExtExpMicrokernelTester"], + help="Tester class to be used in the generated test") +parser.add_argument("-k", "--ukernel", metavar="FILE", required=True, + help="Microkernel type") parser.add_argument("-o", "--output", metavar="FILE", required=True, help='Output (C++ source) file') parser.set_defaults(defines=list()) @@ -36,81 +39,23 @@ def split_ukernel_name(name): return elements_tile, arch, isa -RADDEXTEXP_TEST_TEMPLATE = """\ -TEST(${TEST_NAME}, elements_eq_${ELEMENTS_TILE}) { - $if ISA_CHECK: - ${ISA_CHECK}; - VScaleExtExpMicrokernelTester() - .elements(${ELEMENTS_TILE}) - .Test(${TEST_FUNCTION}); -} - -$if ELEMENTS_TILE > 1: - TEST(${TEST_NAME}, elements_div_${ELEMENTS_TILE}) { - $if ISA_CHECK: - ${ISA_CHECK}; - for (size_t elements = ${ELEMENTS_TILE*2}; elements < ${ELEMENTS_TILE*10}; elements += ${ELEMENTS_TILE}) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(${TEST_FUNCTION}); - } - } - - TEST(${TEST_NAME}, elements_lt_${ELEMENTS_TILE}) { - $if ISA_CHECK: - ${ISA_CHECK}; - for (size_t elements = 1; elements < ${ELEMENTS_TILE}; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(${TEST_FUNCTION}); - } - } - -TEST(${TEST_NAME}, elements_gt_${ELEMENTS_TILE}) { - $if ISA_CHECK: - ${ISA_CHECK}; - for (size_t elements = ${ELEMENTS_TILE+1}; elements < ${10 if ELEMENTS_TILE == 1 else ELEMENTS_TILE*2}; elements++) { - VScaleExtExpMicrokernelTester() - .elements(elements) - .Test(${TEST_FUNCTION}); - } -} +VSCALEEXTEXP_TEST_TEMPLATE = """\ +#define XNN_UKERNEL_WITH_PARAMS(arch_flags, ukernel, element_tile, datatype, params_type, init_params) \ +XNN_TEST_VSCALEEXTEXP_ELEMENT_EQ(ukernel,arch_flags, ${", ".join(TEST_ARGS)}); +XNN_TEST_VSCALEEXTEXP_ELEMENT_DIV(ukernel,arch_flags, ${", ".join(TEST_ARGS)}); +XNN_TEST_VSCALEEXTEXP_ELEMENT_LT(ukernel,arch_flags, ${", ".join(TEST_ARGS)}); +XNN_TEST_VSCALEEXTEXP_ELEMENT_GT(ukernel,arch_flags, ${", ".join(TEST_ARGS)}); """ - -def generate_test_cases(ukernel, elements_tile, isa): - """Generates all tests cases for a Vector ScaleExtExp micro-kernel. - - Args: - ukernel: C name of the micro-kernel function. - elements_tile: Number of batch elements processed per one iteration of the - inner loop of the micro-kernel. - isa: instruction set required to run the micro-kernel. Generated unit test - will skip execution if the host processor doesn't support this ISA. - - Returns: - Code for the test case. - """ - _, test_name = ukernel.split("_", 1) - _, datatype, _ = ukernel.split("_", 2) - return xngen.preprocess(RADDEXTEXP_TEST_TEMPLATE, { - "TEST_FUNCTION": ukernel, - "TEST_NAME": test_name.upper().replace("UKERNEL_", ""), - "DATATYPE": datatype, - "ELEMENTS_TILE": elements_tile, - "ISA_CHECK": xnncommon.generate_isa_check_macro(isa), - }) - - def main(args): options = parser.parse_args(args) + tester = options.tester + tester_header = { + "VScaleExtExpMicrokernelTester": "vscaleextexp-microkernel-tester.h", + }[tester] + ukernel = options.ukernel - with codecs.open(options.spec, "r", encoding="utf-8") as spec_file: - spec_yaml = yaml.safe_load(spec_file) - if not isinstance(spec_yaml, list): - raise ValueError("expected a list of micro-kernels in the spec") - - tests = """\ + tests = """\ // Copyright 2019 Google LLC // // This source code is licensed under the BSD-style license found in the @@ -126,16 +71,23 @@ def main(args): #include "xnnpack/isa-checks.h" #include "xnnpack/vscaleextexp.h" #include "vscaleextexp-microkernel-tester.h" -""".format(specification=options.spec, generator=sys.argv[0]) - - for ukernel_spec in spec_yaml: - name = ukernel_spec["name"] - elements_tile, arch, isa = split_ukernel_name(name) - - test_case = generate_test_cases(name, elements_tile, isa) - tests += "\n\n" + xnncommon.postprocess_test_case(test_case, arch, isa) - - xnncommon.overwrite_if_changed(options.output, tests) +""".format(specification=options.ukernel, generator=sys.argv[0]) + ukernel_parts = options.ukernel.split("-") + datatype = ukernel_parts[0] + op = ukernel_parts[1] + test_args = ["element_tile"] + test_args.append("init_params") + tests += xnncommon.make_multiline_macro(xngen.preprocess( + VSCALEEXTEXP_TEST_TEMPLATE, + { + "TEST_ARGS": test_args, + "TESTER": tester, + "DATATYPE": datatype, + }, + )) + folder = datatype + "-" + ("vscaleextexp" if datatype.startswith("f") else op) + tests += f'#include "{xnncommon.xnnpack_src()}{folder}/{options.ukernel}.h"\n' + tests += "#undef XNN_UKERNEL_WITH_PARAMS\n" if __name__ == "__main__":