Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 16 additions & 19 deletions include/oneapi/dpl/pstl/algorithm_fwd.h
Original file line number Diff line number Diff line change
Expand Up @@ -436,14 +436,15 @@ template <class _RandomAccessIterator, class _OutputIterator, class _UnaryPredic
_OutputIterator __brick_copy_if(_RandomAccessIterator, _RandomAccessIterator, _OutputIterator, _UnaryPredicate,
/*vector=*/::std::true_type) noexcept;

template <class _DifferenceType, class _ForwardIterator, class _UnaryPredicate>
::std::pair<_DifferenceType, _DifferenceType>
__brick_calc_mask_1(_ForwardIterator, _ForwardIterator, bool* __restrict, _UnaryPredicate,
/*vector=*/::std::false_type) noexcept;
template <class _DifferenceType, class _RandomAccessIterator, class _UnaryPredicate>
::std::pair<_DifferenceType, _DifferenceType>
__brick_calc_mask_1(_RandomAccessIterator, _RandomAccessIterator, bool* __restrict, _UnaryPredicate,
/*vector=*/::std::true_type) noexcept;
template <class _RandomAccessIterator, class _DifferenceType, class _IterPredicate>
std::pair<_DifferenceType, _DifferenceType>
__brick_compute_mask(_RandomAccessIterator, _DifferenceType, _IterPredicate, bool*,
/*vector=*/std::false_type) noexcept;

template <class _RandomAccessIterator, class _DifferenceType, class _IterPredicate>
std::pair<_DifferenceType, _DifferenceType>
__brick_compute_mask(_RandomAccessIterator, _DifferenceType, _IterPredicate, bool*,
/*vector=*/std::true_type) noexcept;

template <class _ForwardIterator, class _OutputIterator>
void
Expand All @@ -452,7 +453,7 @@ __brick_copy_by_mask(_ForwardIterator, _ForwardIterator, _OutputIterator, bool*,

template <class _RandomAccessIterator, class _OutputIterator>
void
__brick_copy_by_mask(_RandomAccessIterator, _RandomAccessIterator, _OutputIterator, bool* __restrict,
__brick_copy_by_mask(_RandomAccessIterator, _RandomAccessIterator, _OutputIterator, bool*,
/*vector=*/::std::true_type) noexcept;

template <class _ForwardIterator, class _OutputIterator1, class _OutputIterator2>
Expand All @@ -465,6 +466,12 @@ void
__brick_partition_by_mask(_RandomAccessIterator, _RandomAccessIterator, _OutputIterator1, _OutputIterator2, bool*,
/*vector=*/::std::true_type) noexcept;

template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _DifferenceType,
class _RandomAccessIterator2, class _IterPredicate>
_RandomAccessIterator2
__parallel_selective_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _DifferenceType,
_RandomAccessIterator2, _IterPredicate);

template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator, class _UnaryPredicate>
_OutputIterator
__pattern_copy_if(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator,
Expand Down Expand Up @@ -537,16 +544,6 @@ _OutputIterator
__pattern_unique_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator,
_BinaryPredicate) noexcept;

template <class _ExecutionPolicy, class _DifferenceType, class _RandomAccessIterator, class _BinaryPredicate>
_DifferenceType
__brick_calc_mask_2(_RandomAccessIterator, _RandomAccessIterator, bool* __restrict, _BinaryPredicate,
/*vector=*/::std::false_type) noexcept;

template <class _DifferenceType, class _RandomAccessIterator, class _BinaryPredicate>
_DifferenceType
__brick_calc_mask_2(_RandomAccessIterator, _RandomAccessIterator, bool* __restrict, _BinaryPredicate,
/*vector=*/::std::true_type) noexcept;

template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
class _BinaryPredicate>
_RandomAccessIterator2
Expand Down
173 changes: 62 additions & 111 deletions include/oneapi/dpl/pstl/algorithm_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1215,36 +1215,27 @@ __brick_copy_if(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _
#endif
}

// TODO: Try to use transform_reduce for combining __brick_copy_if_phase1 on IsVector.
template <class _DifferenceType, class _ForwardIterator, class _UnaryPredicate>
::std::pair<_DifferenceType, _DifferenceType>
__brick_calc_mask_1(_ForwardIterator __first, _ForwardIterator __last, bool* __restrict __mask, _UnaryPredicate __pred,
/*vector=*/::std::false_type) noexcept
template <class _RandomAccessIterator, class _DifferenceType, class _IterPredicate>
std::pair<_DifferenceType, _DifferenceType>
__brick_compute_mask(_RandomAccessIterator __first, _DifferenceType __len, _IterPredicate __pred, bool* __mask,
/*vector=*/std::false_type) noexcept
{
auto __count_true = _DifferenceType(0);
auto __size = __last - __first;

static_assert(__is_random_access_iterator_v<_ForwardIterator>,
"Pattern-brick error. Should be a random access iterator.");

for (; __first != __last; ++__first, (void)++__mask)
_DifferenceType __count_true = 0;
for (_DifferenceType __i = 0; __i < __len; ++__i)
{
*__mask = __pred(*__first);
if (*__mask)
{
++__count_true;
}
__mask[__i] = __pred(__first, __i);
__count_true += __mask[__i];
}
return ::std::make_pair(__count_true, __size - __count_true);
return std::make_pair(__count_true, __len - __count_true);
}

template <class _DifferenceType, class _RandomAccessIterator, class _UnaryPredicate>
::std::pair<_DifferenceType, _DifferenceType>
__brick_calc_mask_1(_RandomAccessIterator __first, _RandomAccessIterator __last, bool* __mask, _UnaryPredicate __pred,
/*vector=*/::std::true_type) noexcept
template <class _RandomAccessIterator, class _DifferenceType, class _IterPredicate>
std::pair<_DifferenceType, _DifferenceType>
__brick_compute_mask(_RandomAccessIterator __first, _DifferenceType __len, _IterPredicate __pred, bool* __mask,
/*vector=*/std::true_type) noexcept
{
auto __result = __unseq_backend::__simd_calc_mask_1(__first, __last - __first, __mask, __pred);
return ::std::make_pair(__result, (__last - __first) - __result);
auto __count_true = __unseq_backend::__simd_compute_mask(__first, __len, __pred, __mask);
return std::make_pair(__count_true, __len - __count_true);
}

template <class _ForwardIterator, class _OutputIterator, class _Assigner>
Expand All @@ -1265,7 +1256,7 @@ __brick_copy_by_mask(_ForwardIterator __first, _ForwardIterator __last, _OutputI
template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _Assigner>
void
__brick_copy_by_mask(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __result,
bool* __restrict __mask, _Assigner __assigner, /*vector=*/::std::true_type) noexcept
bool* __mask, _Assigner __assigner, /*vector=*/::std::true_type) noexcept
{
#if (_PSTL_MONOTONIC_PRESENT || _ONEDPL_MONOTONIC_PRESENT)
__unseq_backend::__simd_copy_by_mask(__first, __last - __first, __result, __mask, __assigner);
Expand Down Expand Up @@ -1307,6 +1298,34 @@ __brick_partition_by_mask(_RandomAccessIterator1 __first, _RandomAccessIterator1
#endif
}

template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _DifferenceType,
class _RandomAccessIterator2, class _IterPredicate>
_RandomAccessIterator2
__parallel_selective_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first,
_DifferenceType __n, _RandomAccessIterator2 __result, _IterPredicate __pred)
{
using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag;
__par_backend::__buffer<bool> __mask_buf(__n);
bool* __mask = __mask_buf.get();

return __internal::__except_handler([&__exec, __n, __first, __result, __pred, __mask]() {
_DifferenceType __m{};
__par_backend::__parallel_strict_scan(
__backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0),
[=](_DifferenceType __i, _DifferenceType __len) { // Reduce
return __internal::__brick_compute_mask(__first + __i, __len, __pred, __mask + __i, _IsVector{}).first;
},
std::plus<_DifferenceType>(), // Combine
[=](_DifferenceType __i, _DifferenceType __len, _DifferenceType __initial) { // Scan
__internal::__brick_copy_by_mask(
__first + __i, __first + (__i + __len), __result + __initial, __mask + __i,
[](_RandomAccessIterator1 __x, _RandomAccessIterator2 __z) { *__z = *__x; }, _IsVector{});
},
[&__m](_DifferenceType __total) { __m = __total; }); // Apex
return __result + __m;
});
}

template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator, class _UnaryPredicate>
_OutputIterator
__pattern_copy_if(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result,
Expand All @@ -1320,35 +1339,16 @@ __pattern_copy_if(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIt
template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
class _UnaryPredicate>
_RandomAccessIterator2
__pattern_copy_if(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first,
__pattern_copy_if(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first,
_RandomAccessIterator1 __last, _RandomAccessIterator2 __result, _UnaryPredicate __pred)
{
using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag;

using _DifferenceType = typename std::iterator_traits<_RandomAccessIterator1>::difference_type;
const _DifferenceType __n = __last - __first;
if (_DifferenceType(1) < __n)
{
__par_backend::__buffer<bool> __mask_buf(__n);
return __internal::__except_handler([&__exec, __n, __first, __result, __pred, &__mask_buf]() {
bool* __mask = __mask_buf.get();
_DifferenceType __m{};
__par_backend::__parallel_strict_scan(
__backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0),
[=](_DifferenceType __i, _DifferenceType __len) { // Reduce
return __internal::__brick_calc_mask_1<_DifferenceType>(__first + __i, __first + (__i + __len),
__mask + __i, __pred, _IsVector{})
.first;
},
::std::plus<_DifferenceType>(), // Combine
[=](_DifferenceType __i, _DifferenceType __len, _DifferenceType __initial) { // Scan
__internal::__brick_copy_by_mask(
__first + __i, __first + (__i + __len), __result + __initial, __mask + __i,
[](_RandomAccessIterator1 __x, _RandomAccessIterator2 __z) { *__z = *__x; }, _IsVector{});
},
[&__m](_DifferenceType __total) { __m = __total; });
return __result + __m;
});
return __parallel_selective_copy(
__tag, std::forward<_ExecutionPolicy>(__exec), __first, __n, __result,
[&__pred](_RandomAccessIterator1 __it, _DifferenceType __idx) { return __pred(__it[__idx]); });
}
// trivial sequence - use serial algorithm
return __internal::__brick_copy_if(__first, __last, __result, __pred, _IsVector{});
Expand Down Expand Up @@ -1574,74 +1574,23 @@ __pattern_unique_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _Forwa
return __internal::__brick_unique_copy(__first, __last, __result, __pred, typename _Tag::__is_vector{});
}

template <class _DifferenceType, class _RandomAccessIterator, class _BinaryPredicate>
_DifferenceType
__brick_calc_mask_2(_RandomAccessIterator __first, _RandomAccessIterator __last, bool* __restrict __mask,
_BinaryPredicate __pred, /*vector=*/::std::false_type) noexcept
{
_DifferenceType __count = 0;
for (; __first != __last; ++__first, (void)++__mask)
{
*__mask = !__pred(*__first, *(__first - 1));
__count += *__mask;
}
return __count;
}

template <class _DifferenceType, class _RandomAccessIterator, class _BinaryPredicate>
_DifferenceType
__brick_calc_mask_2(_RandomAccessIterator __first, _RandomAccessIterator __last, bool* __restrict __mask,
_BinaryPredicate __pred, /*vector=*/::std::true_type) noexcept
{
return __unseq_backend::__simd_calc_mask_2(__first, __last - __first, __mask, __pred);
}

template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
class _BinaryPredicate>
_RandomAccessIterator2
__pattern_unique_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first,
__pattern_unique_copy(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first,
_RandomAccessIterator1 __last, _RandomAccessIterator2 __result, _BinaryPredicate __pred)
{
using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag;

using _DifferenceType = typename std::iterator_traits<_RandomAccessIterator1>::difference_type;
const _DifferenceType __n = __last - __first;
_DifferenceType __n = __last - __first;
if (_DifferenceType(2) < __n)
{
__par_backend::__buffer<bool> __mask_buf(__n);
if (_DifferenceType(2) < __n)
{
return __internal::__except_handler([&__exec, __n, __first, __result, __pred, &__mask_buf]() {
bool* __mask = __mask_buf.get();
_DifferenceType __m{};
__par_backend::__parallel_strict_scan(
__backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0),
[=](_DifferenceType __i, _DifferenceType __len) -> _DifferenceType { // Reduce
_DifferenceType __extra = 0;
if (__i == 0)
{
// Special boundary case
__mask[__i] = true;
if (--__len == 0)
return 1;
++__i;
++__extra;
}
return __internal::__brick_calc_mask_2<_DifferenceType>(__first + __i, __first + (__i + __len),
__mask + __i, __pred, _IsVector{}) +
__extra;
},
::std::plus<_DifferenceType>(), // Combine
[=](_DifferenceType __i, _DifferenceType __len, _DifferenceType __initial) { // Scan
// Phase 2 is same as for __pattern_copy_if
__internal::__brick_copy_by_mask(
__first + __i, __first + (__i + __len), __result + __initial, __mask + __i,
[](_RandomAccessIterator1 __x, _RandomAccessIterator2 __z) { *__z = *__x; }, _IsVector{});
},
[&__m](_DifferenceType __total) { __m = __total; });
return __result + __m;
*__result++ = *__first++; // Always copy the first element
--__n;
return __parallel_selective_copy(
__tag, std::forward<_ExecutionPolicy>(__exec), __first, __n, __result,
[&__pred](_RandomAccessIterator1 __it, _DifferenceType __idx) {
return !__pred(__it[__idx], __it[__idx - 1]);
});
}
}
// trivial sequence - use serial algorithm
return __internal::__brick_unique_copy(__first, __last, __result, __pred, _IsVector{});
Expand Down Expand Up @@ -2370,15 +2319,17 @@ __pattern_partition_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _
if (_DifferenceType(1) < __n)
{
__par_backend::__buffer<bool> __mask_buf(__n);
return __internal::__except_handler([&__exec, __n, __first, __out_true, __out_false, __pred, &__mask_buf]() {
return __internal::__except_handler([&__exec, __n, __first, __out_true, __out_false, &__pred, &__mask_buf]() {
bool* __mask = __mask_buf.get();
_ReturnType __m{};
__par_backend::__parallel_strict_scan(
__backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __n,
std::make_pair(_DifferenceType(0), _DifferenceType(0)),
[=](_DifferenceType __i, _DifferenceType __len) { // Reduce
return __internal::__brick_calc_mask_1<_DifferenceType>(__first + __i, __first + (__i + __len),
__mask + __i, __pred, _IsVector{});
[=, &__pred](_DifferenceType __i, _DifferenceType __len) { // Reduce
return __internal::__brick_compute_mask(
__first + __i, __len,
[&__pred](_RandomAccessIterator1 __it, _DifferenceType __idx) { return __pred(__it[__idx]); },
__mask + __i, _IsVector{});
},
[](const _ReturnType& __x, const _ReturnType& __y) -> _ReturnType {
return ::std::make_pair(__x.first + __y.first, __x.second + __y.second);
Expand Down
21 changes: 3 additions & 18 deletions include/oneapi/dpl/pstl/unseq_backend_simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -282,31 +282,16 @@ __simd_copy_if(_InputIterator __first, _DifferenceType __n, _OutputIterator __re
return __result + __cnt;
}

template <class _InputIterator, class _DifferenceType, class _BinaryPredicate>
template <typename _Iterator, typename _DifferenceType, typename _IterPredicate>
_DifferenceType
__simd_calc_mask_2(_InputIterator __first, _DifferenceType __n, bool* __mask, _BinaryPredicate __pred) noexcept
__simd_compute_mask(_Iterator __first, _DifferenceType __n, _IterPredicate __pred, bool* __mask) noexcept
{
_DifferenceType __count = 0;

_ONEDPL_PRAGMA_SIMD_REDUCTION(+ : __count)
for (_DifferenceType __i = 0; __i < __n; ++__i)
{
__mask[__i] = !__pred(__first[__i], __first[__i - 1]);
__count += __mask[__i];
}
return __count;
}

template <class _InputIterator, class _DifferenceType, class _UnaryPredicate>
_DifferenceType
__simd_calc_mask_1(_InputIterator __first, _DifferenceType __n, bool* __mask, _UnaryPredicate __pred) noexcept
{
_DifferenceType __count = 0;

_ONEDPL_PRAGMA_SIMD_REDUCTION(+ : __count)
for (_DifferenceType __i = 0; __i < __n; ++__i)
{
__mask[__i] = __pred(__first[__i]);
__mask[__i] = __pred(__first, __i);
__count += __mask[__i];
}
return __count;
Expand Down
Loading