diff --git a/sycl/include/sycl/builtins.hpp b/sycl/include/sycl/builtins.hpp index 67083ea87d447..0519736d8d59e 100644 --- a/sycl/include/sycl/builtins.hpp +++ b/sycl/include/sycl/builtins.hpp @@ -12,6 +12,7 @@ #include #include #include +#include #include // TODO Decide whether to mark functions with this attribute. @@ -775,6 +776,95 @@ detail::enable_if_t::value, T> trunc(T x) __NOEXC { return __sycl_std::__invoke_trunc(x); } +// other marray math functions + +// TODO: can be optimized in the way marray math functions above are optimized +// (usage of vec) +#define __SYCL_MARRAY_MATH_FUNCTION_W_GENPTR_ARG_OVERLOAD_IMPL(NAME, ARGPTR, \ + ...) \ + marray res; \ + for (int j = 0; j < N; j++) { \ + res[j] = \ + NAME(__VA_ARGS__, \ + address_space_cast>(&(*ARGPTR)[j])); \ + } \ + return res; + +#define __SYCL_MARRAY_MATH_FUNCTION_BINOP_2ND_ARG_GENFLOATPTR_OVERLOAD( \ + NAME, ARG1, ARG2, ...) \ + template \ + std::enable_if_t< \ + detail::is_svgenfloat::value && \ + detail::is_genfloatptr_marray::value, \ + marray> \ + NAME(marray ARG1, multi_ptr ARG2) \ + __NOEXC { \ + __SYCL_MARRAY_MATH_FUNCTION_W_GENPTR_ARG_OVERLOAD_IMPL(NAME, ARG2, \ + __VA_ARGS__) \ + } + +__SYCL_MARRAY_MATH_FUNCTION_BINOP_2ND_ARG_GENFLOATPTR_OVERLOAD(fract, x, iptr, + x[j]) +__SYCL_MARRAY_MATH_FUNCTION_BINOP_2ND_ARG_GENFLOATPTR_OVERLOAD(modf, x, iptr, + x[j]) +__SYCL_MARRAY_MATH_FUNCTION_BINOP_2ND_ARG_GENFLOATPTR_OVERLOAD(sincos, x, + cosval, x[j]) + +#undef __SYCL_MARRAY_MATH_FUNCTION_BINOP_2ND_GENFLOATPTR_OVERLOAD + +#define __SYCL_MARRAY_MATH_FUNCTION_BINOP_2ND_ARG_GENINTPTR_OVERLOAD( \ + NAME, ARG1, ARG2, ...) \ + template \ + std::enable_if_t< \ + detail::is_svgenfloat::value && \ + detail::is_genintptr_marray::value, \ + marray> \ + NAME(marray ARG1, multi_ptr ARG2) \ + __NOEXC { \ + __SYCL_MARRAY_MATH_FUNCTION_W_GENPTR_ARG_OVERLOAD_IMPL(NAME, ARG2, \ + __VA_ARGS__) \ + } + +__SYCL_MARRAY_MATH_FUNCTION_BINOP_2ND_ARG_GENINTPTR_OVERLOAD(frexp, x, exp, + x[j]) +__SYCL_MARRAY_MATH_FUNCTION_BINOP_2ND_ARG_GENINTPTR_OVERLOAD(lgamma_r, x, signp, + x[j]) + +#undef __SYCL_MARRAY_MATH_FUNCTION_BINOP_2ND_GENINTPTR_OVERLOAD + +#define __SYCL_MARRAY_MATH_FUNCTION_REMQUO_OVERLOAD(NAME, ...) \ + template \ + std::enable_if_t< \ + detail::is_svgenfloat::value && \ + detail::is_genintptr_marray::value, \ + marray> \ + NAME(marray x, marray y, \ + multi_ptr quo) __NOEXC { \ + __SYCL_MARRAY_MATH_FUNCTION_W_GENPTR_ARG_OVERLOAD_IMPL(NAME, quo, \ + __VA_ARGS__) \ + } + +__SYCL_MARRAY_MATH_FUNCTION_REMQUO_OVERLOAD(remquo, x[j], y[j]) + +#undef __SYCL_MARRAY_MATH_FUNCTION_REMQUO_OVERLOAD + +#undef __SYCL_MARRAY_MATH_FUNCTION_W_GENPTR_ARG_OVERLOAD_IMPL + +template +std::enable_if_t::value, + marray, N>> +nan(marray nancode) __NOEXC { + marray, N> res; + for (int j = 0; j < N; j++) { + res[j] = nan(nancode[j]); + } + return res; +} + /* --------------- 4.13.5 Common functions. ---------------------------------*/ // svgenfloat clamp (svgenfloat x, svgenfloat minval, svgenfloat maxval) template diff --git a/sycl/include/sycl/detail/generic_type_traits.hpp b/sycl/include/sycl/detail/generic_type_traits.hpp index 62fe70e0491e9..a04ce83092c86 100644 --- a/sycl/include/sycl/detail/generic_type_traits.hpp +++ b/sycl/include/sycl/detail/generic_type_traits.hpp @@ -232,11 +232,30 @@ using is_genintptr = bool_constant< is_pointer::value && is_genint>::value && is_address_space_compliant::value>; +template +using is_genintptr_marray = bool_constant< + std::is_same, T::size()>>::value && + is_genint>>::value && + is_address_space_compliant, + gvl::nonconst_address_space_list>::value && + (IsDecorated == access::decorated::yes || + IsDecorated == access::decorated::no)>; + template using is_genfloatptr = bool_constant< is_pointer::value && is_genfloat>::value && is_address_space_compliant::value>; +template +using is_genfloatptr_marray = bool_constant< + is_mgenfloat::value && + is_address_space_compliant, + gvl::nonconst_address_space_list>::value && + (IsDecorated == access::decorated::yes || + IsDecorated == access::decorated::no)>; + template using is_genptr = bool_constant< is_pointer::value && is_gentype>::value && diff --git a/sycl/test-e2e/Basic/built-ins/marray_common.cpp b/sycl/test-e2e/Basic/built-ins/marray_common.cpp new file mode 100644 index 0000000000000..91fcc1ae9ba00 --- /dev/null +++ b/sycl/test-e2e/Basic/built-ins/marray_common.cpp @@ -0,0 +1,122 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +#ifdef _WIN32 +#define _USE_MATH_DEFINES // To use math constants +#include +#endif + +#include + +#define TEST(FUNC, MARRAY_ELEM_TYPE, DIM, EXPECTED, DELTA, ...) \ + { \ + { \ + MARRAY_ELEM_TYPE result[DIM]; \ + { \ + sycl::buffer b(result, sycl::range{DIM}); \ + deviceQueue.submit([&](sycl::handler &cgh) { \ + sycl::accessor res_access{b, cgh}; \ + cgh.single_task([=]() { \ + sycl::marray res = FUNC(__VA_ARGS__); \ + for (int i = 0; i < DIM; i++) \ + res_access[i] = res[i]; \ + }); \ + }); \ + } \ + for (int i = 0; i < DIM; i++) \ + assert(abs(result[i] - EXPECTED[i]) <= DELTA); \ + } \ + } + +#define EXPECTED(TYPE, ...) ((TYPE[]){__VA_ARGS__}) + +int main() { + sycl::queue deviceQueue; + sycl::device dev = deviceQueue.get_device(); + + sycl::marray ma1{1.0f, 2.0f}; + sycl::marray ma2{1.0f, 2.0f}; + sycl::marray ma3{3.0f, 2.0f}; + sycl::marray ma4{1.0, 2.0}; + sycl::marray ma5{M_PI, M_PI, M_PI}; + sycl::marray ma6{M_PI, M_PI, M_PI}; + sycl::marray ma7{M_PI, M_PI, M_PI}; + sycl::marray ma8{0.3f, 0.6f}; + sycl::marray ma9{5.0, 8.0}; + sycl::marray ma10{180, 180, 180}; + sycl::marray ma11{180, 180, 180}; + sycl::marray ma12{180, 180, 180}; + sycl::marray ma13{181, 179, 181}; + sycl::marray ma14{+0.0f, -0.6f}; + sycl::marray ma15{-0.0, 0.6f}; + + // sycl::clamp + TEST(sycl::clamp, float, 2, EXPECTED(float, 1.0f, 2.0f), 0, ma1, ma2, ma3); + TEST(sycl::clamp, float, 2, EXPECTED(float, 1.0f, 2.0f), 0, ma1, 1.0f, 3.0f); + if (dev.has(sycl::aspect::fp64)) + TEST(sycl::clamp, double, 2, EXPECTED(double, 1.0, 2.0), 0, ma4, 1.0, 3.0); + // sycl::degrees + TEST(sycl::degrees, float, 3, EXPECTED(float, 180, 180, 180), 0, ma5); + if (dev.has(sycl::aspect::fp64)) + TEST(sycl::degrees, double, 3, EXPECTED(double, 180, 180, 180), 0, ma6); + if (dev.has(sycl::aspect::fp16)) + TEST(sycl::degrees, sycl::half, 3, EXPECTED(sycl::half, 180, 180, 180), 0.2, + ma7); + // sycl::max + TEST(sycl::max, float, 2, EXPECTED(float, 3.0f, 2.0f), 0, ma1, ma3); + TEST(sycl::max, float, 2, EXPECTED(float, 1.5f, 2.0f), 0, ma1, 1.5f); + if (dev.has(sycl::aspect::fp64)) + TEST(sycl::max, double, 2, EXPECTED(double, 1.5, 2.0), 0, ma4, 1.5); + // sycl::min + TEST(sycl::min, float, 2, EXPECTED(float, 1.0f, 2.0f), 0, ma1, ma3); + TEST(sycl::min, float, 2, EXPECTED(float, 1.0f, 1.5f), 0, ma1, 1.5f); + if (dev.has(sycl::aspect::fp64)) + TEST(sycl::min, double, 2, EXPECTED(double, 1.0, 1.5), 0, ma4, 1.5); + // sycl::mix + TEST(sycl::mix, float, 2, EXPECTED(float, 1.6f, 2.0f), 0, ma1, ma3, ma8); + TEST(sycl::mix, float, 2, EXPECTED(float, 1.4f, 2.0f), 0, ma1, ma3, 0.2); + if (dev.has(sycl::aspect::fp64)) + TEST(sycl::mix, double, 2, EXPECTED(double, 3.0, 5.0), 0, ma4, ma9, 0.5); + // sycl::radians + TEST(sycl::radians, float, 3, EXPECTED(float, M_PI, M_PI, M_PI), 0, ma10); + if (dev.has(sycl::aspect::fp64)) + TEST(sycl::radians, double, 3, EXPECTED(double, M_PI, M_PI, M_PI), 0, ma11); + if (dev.has(sycl::aspect::fp16)) + TEST(sycl::radians, sycl::half, 3, EXPECTED(sycl::half, M_PI, M_PI, M_PI), + 0.002, ma12); + // sycl::step + TEST(sycl::step, float, 2, EXPECTED(float, 1.0f, 1.0f), 0, ma1, ma3); + if (dev.has(sycl::aspect::fp64)) + TEST(sycl::step, double, 2, EXPECTED(double, 1.0, 1.0), 0, ma4, ma9); + if (dev.has(sycl::aspect::fp16)) + TEST(sycl::step, sycl::half, 3, EXPECTED(sycl::half, 1.0, 0.0, 1.0), 0, + ma12, ma13); + TEST(sycl::step, float, 2, EXPECTED(float, 1.0f, 0.0f), 0, 2.5f, ma3); + if (dev.has(sycl::aspect::fp64)) + TEST(sycl::step, double, 2, EXPECTED(double, 0.0f, 1.0f), 0, 6.0f, ma9); + // sycl::smoothstep + TEST(sycl::smoothstep, float, 2, EXPECTED(float, 1.0f, 1.0f), 0, ma8, ma1, + ma2); + if (dev.has(sycl::aspect::fp64)) + TEST(sycl::smoothstep, double, 2, EXPECTED(double, 1.0, 1.0f), 0.00000001, + ma4, ma9, ma9); + if (dev.has(sycl::aspect::fp16)) + TEST(sycl::smoothstep, sycl::half, 3, EXPECTED(sycl::half, 1.0, 1.0, 1.0), + 0, ma7, ma12, ma13); + TEST(sycl::smoothstep, float, 2, EXPECTED(float, 0.0553936f, 0.0f), 0.0000001, + 2.5f, 6.0f, ma3); + if (dev.has(sycl::aspect::fp64)) + TEST(sycl::smoothstep, double, 2, EXPECTED(double, 0.0f, 1.0f), 0, 6.0f, + 8.0f, ma9); + // sign + TEST(sycl::sign, float, 2, EXPECTED(float, +0.0f, -1.0f), 0, ma14); + if (dev.has(sycl::aspect::fp64)) + TEST(sycl::sign, double, 2, EXPECTED(double, -0.0, 1.0), 0, ma15); + if (dev.has(sycl::aspect::fp16)) + TEST(sycl::sign, sycl::half, 3, EXPECTED(sycl::half, 1.0, 1.0, 1.0), 0, + ma12); + + return 0; +} diff --git a/sycl/test-e2e/Basic/built-ins/marray_math.cpp b/sycl/test-e2e/Basic/built-ins/marray_math.cpp new file mode 100644 index 0000000000000..8f4a69822003f --- /dev/null +++ b/sycl/test-e2e/Basic/built-ins/marray_math.cpp @@ -0,0 +1,124 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +#include + +#define TEST(FUNC, MARRAY_ELEM_TYPE, DIM, EXPECTED, DELTA, ...) \ + { \ + { \ + MARRAY_ELEM_TYPE result[DIM]; \ + { \ + sycl::buffer b(result, sycl::range{DIM}); \ + deviceQueue.submit([&](sycl::handler &cgh) { \ + sycl::accessor res_access{b, cgh}; \ + cgh.single_task([=]() { \ + sycl::marray res = FUNC(__VA_ARGS__); \ + for (int i = 0; i < DIM; i++) \ + res_access[i] = res[i]; \ + }); \ + }); \ + } \ + for (int i = 0; i < DIM; i++) \ + assert(abs(result[i] - EXPECTED[i]) <= DELTA); \ + } \ + } + +#define TEST2(FUNC, MARRAY_ELEM_TYPE, PTR_TYPE, DIM, EXPECTED_1, EXPECTED_2, \ + DELTA, ...) \ + { \ + { \ + MARRAY_ELEM_TYPE result[DIM]; \ + sycl::marray result_ptr; \ + { \ + sycl::buffer b(result, sycl::range{DIM}); \ + sycl::buffer, 1> b_ptr(&result_ptr, \ + sycl::range<1>(1)); \ + deviceQueue.submit([&](sycl::handler &cgh) { \ + sycl::accessor res_access{b, cgh}; \ + sycl::accessor res_ptr_access{b_ptr, cgh}; \ + cgh.single_task([=]() { \ + sycl::multi_ptr, \ + sycl::access::address_space::global_space, \ + sycl::access::decorated::yes> \ + ptr(res_ptr_access); \ + sycl::marray res = FUNC(__VA_ARGS__, ptr); \ + for (int i = 0; i < DIM; i++) \ + res_access[i] = res[i]; \ + }); \ + }); \ + } \ + for (int i = 0; i < DIM; i++) { \ + assert(std::abs(result[i] - EXPECTED_1[i]) <= DELTA); \ + assert(std::abs(result_ptr[i] - EXPECTED_2[i]) <= DELTA); \ + } \ + } \ + } + +#define TEST3(FUNC, MARRAY_ELEM_TYPE, DIM, ...) \ + { \ + { \ + MARRAY_ELEM_TYPE result[DIM]; \ + { \ + sycl::buffer b(result, sycl::range{DIM}); \ + deviceQueue.submit([&](sycl::handler &cgh) { \ + sycl::accessor res_access{b, cgh}; \ + cgh.single_task([=]() { \ + sycl::marray res = FUNC(__VA_ARGS__); \ + for (int i = 0; i < DIM; i++) \ + res_access[i] = res[i]; \ + }); \ + }); \ + } \ + } \ + } + +#define EXPECTED(TYPE, ...) ((TYPE[]){__VA_ARGS__}) + +int main() { + sycl::queue deviceQueue; + + sycl::marray ma1{1.0f, 2.0f}; + sycl::marray ma2{3.0f, 2.0f}; + sycl::marray ma3{180, 180, 180}; + sycl::marray ma4{1, 1, 1}; + sycl::marray ma5{180, -180, -180}; + sycl::marray ma6{1.4f, 4.2f, 5.3f}; + sycl::marray ma7{1, 2, 3}; + sycl::marray ma8{1, 2, 3}; + + TEST(sycl::fabs, float, 3, EXPECTED(float, 180, 180, 180), 0, ma5); + TEST(sycl::ilogb, int, 3, EXPECTED(int, 7, 7, 7), 0, ma3); + TEST(sycl::fmax, float, 2, EXPECTED(float, 3.0f, 2.0f), 0, ma1, ma2); + TEST(sycl::fmax, float, 2, EXPECTED(float, 5.0f, 5.0f), 0, ma1, 5.0f); + TEST(sycl::fmin, float, 2, EXPECTED(float, 1.0f, 2.0f), 0, ma1, ma2); + TEST(sycl::fmin, float, 2, EXPECTED(float, 1.0f, 2.0f), 0, ma1, 5.0f); + TEST(sycl::ldexp, float, 3, EXPECTED(float, 360, 360, 360), 0, ma3, ma4); + TEST(sycl::ldexp, float, 3, EXPECTED(float, 5760, 5760, 5760), 0, ma3, 5); + TEST(sycl::pown, float, 3, EXPECTED(float, 180, 180, 180), 0.1, ma3, ma4); + TEST(sycl::pown, float, 3, EXPECTED(float, 180, 180, 180), 0.1, ma3, 1); + TEST(sycl::rootn, float, 3, EXPECTED(float, 180, 180, 180), 0.1, ma3, ma4); + TEST(sycl::rootn, float, 3, EXPECTED(float, 2.82523, 2.82523, 2.82523), + 0.00001, ma3, 5); + TEST2(sycl::fract, float, float, 3, EXPECTED(float, 0.4f, 0.2f, 0.3f), + EXPECTED(float, 1, 4, 5), 0.0001, ma6); + TEST2(sycl::modf, float, float, 3, EXPECTED(float, 0.4f, 0.2f, 0.3f), + EXPECTED(float, 1, 4, 5), 0.0001, ma6); + TEST2(sycl::sincos, float, float, 3, + EXPECTED(float, 0.98545f, -0.871576f, -0.832267f), + EXPECTED(float, 0.169967, -0.490261, 0.554375), 0.0001, ma6); + TEST2(sycl::frexp, float, int, 3, EXPECTED(float, 0.7f, 0.525f, 0.6625f), + EXPECTED(int, 1, 3, 3), 0.0001, ma6); + TEST2(sycl::lgamma_r, float, int, 3, + EXPECTED(float, -0.119613f, 2.04856f, 3.63964f), EXPECTED(int, 1, 1, 1), + 0.0001, ma6); + TEST2(sycl::remquo, float, int, 3, EXPECTED(float, 1.4f, 4.2f, 5.3f), + EXPECTED(int, 0, 0, 0), 0.0001, ma6, ma3); + TEST3(sycl::nan, float, 3, ma7); + TEST3(sycl::nan, double, 3, ma8); + TEST(sycl::half_precision::exp10, float, 2, EXPECTED(float, 10, 100), 0.1, + ma1); + + return 0; +}