diff --git a/sycl/include/sycl/builtins.hpp b/sycl/include/sycl/builtins.hpp index 67083ea87d447..7f8f85e733a22 100644 --- a/sycl/include/sycl/builtins.hpp +++ b/sycl/include/sycl/builtins.hpp @@ -25,6 +25,18 @@ namespace detail { template vec to_vec2(marray x, size_t start) { return {x[start], x[start + 1]}; } +template vec to_vec(marray x) { + vec vec; + for (size_t i = 0; i < N; i++) + vec[i] = x[i]; + return vec; +} +template marray to_marray(vec x) { + marray marray; + for (size_t i = 0; i < N; i++) + marray[i] = x[i]; + return marray; +} } // namespace detail #ifdef __SYCL_DEVICE_ONLY__ @@ -1715,6 +1727,70 @@ fast_normalize(T p) __NOEXC { return __sycl_std::__invoke_fast_normalize(p); } +// marray geometric functions + +#define __SYCL_MARRAY_GEOMETRIC_FUNCTION_OVERLOAD_IMPL(NAME, ...) \ + vec, T::size()> result_v; \ + result_v = NAME(__VA_ARGS__); \ + return detail::to_marray(result_v); + +template +std::enable_if_t::value, T> cross(T p0, + T p1) __NOEXC { + __SYCL_MARRAY_GEOMETRIC_FUNCTION_OVERLOAD_IMPL(cross, detail::to_vec(p0), + detail::to_vec(p1)) +} + +template +std::enable_if_t::value, T> normalize(T p) __NOEXC { + __SYCL_MARRAY_GEOMETRIC_FUNCTION_OVERLOAD_IMPL(normalize, detail::to_vec(p)) +} + +template +std::enable_if_t::value, T> +fast_normalize(T p) __NOEXC { + __SYCL_MARRAY_GEOMETRIC_FUNCTION_OVERLOAD_IMPL(fast_normalize, + detail::to_vec(p)) +} + +#undef __SYCL_MARRAY_GEOMETRIC_FUNCTION_OVERLOAD_IMPL + +#define __SYCL_MARRAY_GEOMETRIC_FUNCTION_IS_GENGEOMARRAY_BINOP_OVERLOAD(NAME) \ + template \ + std::enable_if_t::value, \ + detail::marray_element_t> \ + NAME(T p0, T p1) __NOEXC { \ + return NAME(detail::to_vec(p0), detail::to_vec(p1)); \ + } + +// clang-format off +__SYCL_MARRAY_GEOMETRIC_FUNCTION_IS_GENGEOMARRAY_BINOP_OVERLOAD(dot) +__SYCL_MARRAY_GEOMETRIC_FUNCTION_IS_GENGEOMARRAY_BINOP_OVERLOAD(distance) +// clang-format on + +#undef __SYCL_MARRAY_GEOMETRIC_FUNCTION_IS_GENGEOMARRAY_BINOP_OVERLOAD + +template +std::enable_if_t::value, detail::marray_element_t> +length(T p) __NOEXC { + return __sycl_std::__invoke_length>( + detail::to_vec(p)); +} + +template +std::enable_if_t::value, + detail::marray_element_t> +fast_distance(T p0, T p1) __NOEXC { + return fast_distance(detail::to_vec(p0), detail::to_vec(p1)); +} + +template +std::enable_if_t::value, + detail::marray_element_t> +fast_length(T p) __NOEXC { + return fast_length(detail::to_vec(p)); +} + /* SYCL 1.2.1 ---- 4.13.7 Relational functions. -----------------------------*/ /* SYCL 2020 ---- 4.17.9 Relational functions. -----------------------------*/ diff --git a/sycl/include/sycl/detail/generic_type_lists.hpp b/sycl/include/sycl/detail/generic_type_lists.hpp index fa75c28aff2aa..d9fb70e5c86a9 100644 --- a/sycl/include/sycl/detail/generic_type_lists.hpp +++ b/sycl/include/sycl/detail/generic_type_lists.hpp @@ -108,6 +108,12 @@ using vector_geo_float_list = using vector_geo_double_list = type_list, vec, vec, vec>; +using marray_geo_float_list = + type_list, marray, marray>; + +using marray_geo_double_list = + type_list, marray, marray>; + using geo_half_list = type_list; using geo_float_list = type_list; @@ -121,6 +127,9 @@ using scalar_geo_list = type_list; +using marray_geo_list = + type_list; + using geo_list = type_list; // cross floating point types @@ -133,6 +142,9 @@ using cross_double_list = type_list, vec>; using cross_floating_list = type_list; +using cross_marray_list = type_list, marray, + marray, marray>; + using scalar_default_char_list = type_list; using vector_default_char_list = diff --git a/sycl/include/sycl/detail/generic_type_traits.hpp b/sycl/include/sycl/detail/generic_type_traits.hpp index 62fe70e0491e9..355899c4702c2 100644 --- a/sycl/include/sycl/detail/generic_type_traits.hpp +++ b/sycl/include/sycl/detail/generic_type_traits.hpp @@ -70,6 +70,12 @@ using is_gengeofloat = is_contained; template using is_gengeodouble = is_contained; +template +using is_gengeomarrayfloat = is_contained; + +template +using is_gengeomarray = is_contained; + template using is_gengeohalf = is_contained; template @@ -97,6 +103,9 @@ using is_gencrosshalf = is_contained; template using is_gencross = is_contained; +template +using is_gencrossmarray = is_contained; + template using is_charn = is_contained; diff --git a/sycl/test-e2e/Basic/built-ins/marray_geometric.cpp b/sycl/test-e2e/Basic/built-ins/marray_geometric.cpp new file mode 100644 index 0000000000000..133363d96ee83 --- /dev/null +++ b/sycl/test-e2e/Basic/built-ins/marray_geometric.cpp @@ -0,0 +1,137 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +#include + +#define TEST(FUNC, MARRAY_ELEM_TYPE, DIM, EXPECTED, DELTA, ...) \ + { \ + { \ + MARRAY_ELEM_TYPE result[DIM]; \ + { \ + sycl::buffer b(result, sycl::range{DIM}); \ + Queue.submit([&](sycl::handler &cgh) { \ + sycl::accessor res_access{b, cgh}; \ + cgh.single_task([=]() { \ + sycl::marray res = FUNC(__VA_ARGS__); \ + for (int i = 0; i < DIM; i++) \ + res_access[i] = res[i]; \ + }); \ + }); \ + } \ + for (int i = 0; i < DIM; i++) { \ + assert(abs(result[i] - EXPECTED[i]) <= DELTA); \ + } \ + } \ + } + +#define TEST2(FUNC, TYPE, EXPECTED, DELTA, ...) \ + { \ + { \ + TYPE result; \ + { \ + sycl::buffer b(&result, 1); \ + Queue.submit([&](sycl::handler &cgh) { \ + sycl::accessor res_access{b, cgh}; \ + cgh.single_task([=]() { res_access[0] = FUNC(__VA_ARGS__); }); \ + }); \ + } \ + assert(abs(result - EXPECTED) <= DELTA); \ + } \ + } + +#define EXPECTED(TYPE, ...) ((TYPE[]){__VA_ARGS__}) + +int main() { + sycl::device Dev; + sycl::queue Queue(Dev); + // clang-format off + sycl::marray MFloatD2 = {1.f, 2.f}; + sycl::marray MFloatD2_2 = {3.f, 5.f}; + sycl::marray MFloatD3 = {1.f, 2.f, 3.f}; + sycl::marray MFloatD3_2 = {1.f, 5.f, 7.f}; + sycl::marray MFloatD4 = {1.f, 2.f, 3.f, 4.f}; + sycl::marray MFloatD4_2 = {1.f, 5.f, 7.f, 4.f}; + + sycl::marray MDoubleD2 = {1.0, 2.0}; + sycl::marray MDoubleD2_2 = {3.0, 5.0}; + sycl::marray MDoubleD3 = {1.0, 2.0, 3.0}; + sycl::marray MDoubleD3_2 = {1.0, 5.0, 7.0}; + sycl::marray MDoubleD4 = {1.0, 2.0, 3.0, 4.0}; + sycl::marray MDoubleD4_2 = {1.0, 5.0, 7.0, 4.0}; + // clang-format on + + TEST(sycl::cross, float, 3, EXPECTED(float, -1.f, -4.f, 3.f), 0, MFloatD3, + MFloatD3_2); + TEST(sycl::cross, float, 4, EXPECTED(float, -1.f, -4.f, 3.f, 0.f), 0, + MFloatD4, MFloatD4_2); + if (Dev.has(sycl::aspect::fp64)) { + TEST(sycl::cross, double, 3, EXPECTED(double, -1.f, -4.f, 3.f), 0, + MDoubleD3, MDoubleD3_2); + TEST(sycl::cross, double, 4, EXPECTED(double, -1.f, -4.f, 3.f, 0.f), 0, + MDoubleD4, MDoubleD4_2); + } + + TEST2(sycl::dot, float, 13.f, 0, MFloatD2, MFloatD2_2); + TEST2(sycl::dot, float, 32.f, 0, MFloatD3, MFloatD3_2); + TEST2(sycl::dot, float, 48.f, 0, MFloatD4, MFloatD4_2); + if (Dev.has(sycl::aspect::fp64)) { + TEST2(sycl::dot, double, 13, 0, MDoubleD2, MDoubleD2_2); + TEST2(sycl::dot, double, 32, 0, MDoubleD3, MDoubleD3_2); + TEST2(sycl::dot, double, 48, 0, MDoubleD4, MDoubleD4_2); + } + + TEST2(sycl::length, float, 2.236068f, 1e-6, MFloatD2); + TEST2(sycl::length, float, 3.741657f, 1e-6, MFloatD3); + TEST2(sycl::length, float, 5.477225f, 1e-6, MFloatD4); + if (Dev.has(sycl::aspect::fp64)) { + TEST2(sycl::length, double, 2.236068, 1e-6, MDoubleD2); + TEST2(sycl::length, double, 3.741657, 1e-6, MDoubleD3); + TEST2(sycl::length, double, 5.477225, 1e-6, MDoubleD4); + } + + TEST2(sycl::distance, float, 3.605551f, 1e-6, MFloatD2, MFloatD2_2); + TEST2(sycl::distance, float, 5.f, 0, MFloatD3, MFloatD3_2); + TEST2(sycl::distance, float, 5.f, 0, MFloatD4, MFloatD4_2); + if (Dev.has(sycl::aspect::fp64)) { + TEST2(sycl::distance, double, 3.605551, 1e-6, MDoubleD2, MDoubleD2_2); + TEST2(sycl::distance, double, 5.0, 0, MDoubleD3, MDoubleD3_2); + TEST2(sycl::distance, double, 5.0, 0, MDoubleD4, MDoubleD4_2); + } + + TEST(sycl::normalize, float, 2, EXPECTED(float, 0.447213f, 0.894427f), 1e-6, + MFloatD2); + TEST(sycl::normalize, float, 3, + EXPECTED(float, 0.267261f, 0.534522f, 0.801784f), 1e-6, MFloatD3); + TEST(sycl::normalize, float, 4, + EXPECTED(float, 0.182574f, 0.365148f, 0.547723f, 0.730297f), 1e-6, + MFloatD4); + if (Dev.has(sycl::aspect::fp64)) { + TEST(sycl::normalize, double, 2, EXPECTED(double, 0.447213, 0.894427), 1e-6, + MDoubleD2); + TEST(sycl::normalize, double, 3, + EXPECTED(double, 0.267261, 0.534522, 0.801784), 1e-6, MDoubleD3); + TEST(sycl::normalize, double, 4, + EXPECTED(double, 0.182574, 0.365148, 0.547723, 0.730297), 1e-6, + MDoubleD4); + } + + TEST2(sycl::fast_distance, float, 3.605551f, 1e-6, MFloatD2, MFloatD2_2); + TEST2(sycl::fast_distance, float, 5.f, 0, MFloatD3, MFloatD3_2); + TEST2(sycl::fast_distance, float, 5.f, 0, MFloatD4, MFloatD4_2); + + TEST2(sycl::fast_length, float, 2.236068f, 1e-6, MFloatD2); + TEST2(sycl::fast_length, float, 3.741657f, 1e-6, MFloatD3); + TEST2(sycl::fast_length, float, 5.477225f, 1e-6, MFloatD4); + + TEST(sycl::fast_normalize, float, 2, EXPECTED(float, 0.447213f, 0.894427f), + 1e-3, MFloatD2); + TEST(sycl::fast_normalize, float, 3, + EXPECTED(float, 0.267261f, 0.534522f, 0.801784f), 1e-3, MFloatD3); + TEST(sycl::fast_normalize, float, 4, + EXPECTED(float, 0.182574f, 0.365148f, 0.547723f, 0.730297f), 1e-3, + MFloatD4); + + return 0; +}