Skip to content

[libc++] Move the implementation of CPU-based basis operations to namespace __pstl #95267

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions libcxx/include/__pstl/cpu_algos/any_of.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,15 @@ _LIBCPP_PUSH_MACROS
# include <__undef_macros>

_LIBCPP_BEGIN_NAMESPACE_STD
namespace __pstl {

template <class _Backend, class _Index, class _Brick>
_LIBCPP_HIDE_FROM_ABI optional<bool> __parallel_or(_Index __first, _Index __last, _Brick __f) {
std::atomic<bool> __found(false);
auto __ret = __pstl::__cpu_traits<_Backend>::__for_each(__first, __last, [__f, &__found](_Index __i, _Index __j) {
auto __ret = __cpu_traits<_Backend>::__for_each(__first, __last, [__f, &__found](_Index __i, _Index __j) {
if (!__found.load(std::memory_order_relaxed) && __f(__i, __j)) {
__found.store(true, std::memory_order_relaxed);
__pstl::__cpu_traits<_Backend>::__cancel_execution();
__cpu_traits<_Backend>::__cancel_execution();
}
});
if (!__ret)
Expand Down Expand Up @@ -76,7 +77,7 @@ struct __cpu_parallel_any_of {
operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) const noexcept {
if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
return std::__parallel_or<_Backend>(
return __pstl::__parallel_or<_Backend>(
__first, __last, [&__policy, &__pred](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
using _AnyOfUnseq = __pstl::__any_of<_Backend, __remove_parallel_policy_t<_RawExecutionPolicy>>;
auto __res = _AnyOfUnseq()(std::__remove_parallel_policy(__policy), __brick_first, __brick_last, __pred);
Expand All @@ -85,13 +86,14 @@ struct __cpu_parallel_any_of {
});
} else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
return std::__simd_or(__first, __last - __first, __pred);
return __pstl::__simd_or(__first, __last - __first, __pred);
} else {
return std::any_of(__first, __last, __pred);
}
}
};

} // namespace __pstl
_LIBCPP_END_NAMESPACE_STD

_LIBCPP_POP_MACROS
Expand Down
6 changes: 4 additions & 2 deletions libcxx/include/__pstl/cpu_algos/fill.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17

_LIBCPP_BEGIN_NAMESPACE_STD
namespace __pstl {

template <class _Index, class _DifferenceType, class _Tp>
_LIBCPP_HIDE_FROM_ABI _Index __simd_fill_n(_Index __first, _DifferenceType __n, const _Tp& __value) noexcept {
Expand All @@ -43,7 +44,7 @@ struct __cpu_parallel_fill {
operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) const noexcept {
if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
return __pstl::__cpu_traits<_Backend>::__for_each(
return __cpu_traits<_Backend>::__for_each(
__first, __last, [&__policy, &__value](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
using _FillUnseq = __pstl::__fill<_Backend, __remove_parallel_policy_t<_RawExecutionPolicy>>;
[[maybe_unused]] auto __res =
Expand All @@ -52,7 +53,7 @@ struct __cpu_parallel_fill {
});
} else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
std::__simd_fill_n(__first, __last - __first, __value);
__pstl::__simd_fill_n(__first, __last - __first, __value);
return __empty{};
} else {
std::fill(__first, __last, __value);
Expand All @@ -61,6 +62,7 @@ struct __cpu_parallel_fill {
}
};

} // namespace __pstl
_LIBCPP_END_NAMESPACE_STD

#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
Expand Down
14 changes: 8 additions & 6 deletions libcxx/include/__pstl/cpu_algos/find_if.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ _LIBCPP_PUSH_MACROS
# include <__undef_macros>

_LIBCPP_BEGIN_NAMESPACE_STD
namespace __pstl {

template <class _Backend, class _Index, class _Brick, class _Compare>
_LIBCPP_HIDE_FROM_ABI optional<_Index>
Expand All @@ -43,8 +44,8 @@ __parallel_find(_Index __first, _Index __last, _Brick __f, _Compare __comp, bool
_DifferenceType __initial_dist = __b_first ? __n : -1;
std::atomic<_DifferenceType> __extremum(__initial_dist);
// TODO: find out what is better here: parallel_for or parallel_reduce
auto __res = __pstl::__cpu_traits<_Backend>::__for_each(
__first, __last, [__comp, __f, __first, &__extremum](_Index __i, _Index __j) {
auto __res =
__cpu_traits<_Backend>::__for_each(__first, __last, [__comp, __f, __first, &__extremum](_Index __i, _Index __j) {
// See "Reducing Contention Through Priority Updates", PPoPP '13, for discussion of
// why using a shared variable scales fairly well in this situation.
if (__comp(__i - __first, __extremum)) {
Expand All @@ -67,8 +68,8 @@ template <class _Backend, class _Index, class _DifferenceType, class _Compare>
_LIBCPP_HIDE_FROM_ABI _Index
__simd_first(_Index __first, _DifferenceType __begin, _DifferenceType __end, _Compare __comp) noexcept {
// Experiments show good block sizes like this
const _DifferenceType __block_size = 8;
alignas(__pstl::__cpu_traits<_Backend>::__lane_size) _DifferenceType __lane[__block_size] = {0};
const _DifferenceType __block_size = 8;
alignas(__cpu_traits<_Backend>::__lane_size) _DifferenceType __lane[__block_size] = {0};
while (__end - __begin >= __block_size) {
_DifferenceType __found = 0;
_PSTL_PRAGMA_SIMD_REDUCTION(| : __found) for (_DifferenceType __i = __begin; __i < __begin + __block_size; ++__i) {
Expand Down Expand Up @@ -106,7 +107,7 @@ struct __cpu_parallel_find_if {
operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) const noexcept {
if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
return std::__parallel_find<_Backend>(
return __pstl::__parallel_find<_Backend>(
__first,
__last,
[&__policy, &__pred](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
Expand All @@ -120,7 +121,7 @@ struct __cpu_parallel_find_if {
} else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
using __diff_t = __iter_diff_t<_ForwardIterator>;
return std::__simd_first<_Backend>(
return __pstl::__simd_first<_Backend>(
__first, __diff_t(0), __last - __first, [&__pred](_ForwardIterator __iter, __diff_t __i) {
return __pred(__iter[__i]);
});
Expand All @@ -130,6 +131,7 @@ struct __cpu_parallel_find_if {
}
};

} // namespace __pstl
_LIBCPP_END_NAMESPACE_STD

_LIBCPP_POP_MACROS
Expand Down
6 changes: 4 additions & 2 deletions libcxx/include/__pstl/cpu_algos/for_each.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17

_LIBCPP_BEGIN_NAMESPACE_STD
namespace __pstl {

template <class _Iterator, class _DifferenceType, class _Function>
_LIBCPP_HIDE_FROM_ABI _Iterator __simd_for_each(_Iterator __first, _DifferenceType __n, _Function __f) noexcept {
Expand All @@ -43,7 +44,7 @@ struct __cpu_parallel_for_each {
operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Function __func) const noexcept {
if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
return __pstl::__cpu_traits<_Backend>::__for_each(
return __cpu_traits<_Backend>::__for_each(
__first, __last, [&__policy, __func](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
using _ForEachUnseq = __pstl::__for_each<_Backend, __remove_parallel_policy_t<_RawExecutionPolicy>>;
[[maybe_unused]] auto __res =
Expand All @@ -52,7 +53,7 @@ struct __cpu_parallel_for_each {
});
} else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
std::__simd_for_each(__first, __last - __first, __func);
__pstl::__simd_for_each(__first, __last - __first, __func);
return __empty{};
} else {
std::for_each(__first, __last, __func);
Expand All @@ -61,6 +62,7 @@ struct __cpu_parallel_for_each {
}
};

} // namespace __pstl
_LIBCPP_END_NAMESPACE_STD

#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
Expand Down
4 changes: 3 additions & 1 deletion libcxx/include/__pstl/cpu_algos/merge.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ _LIBCPP_PUSH_MACROS
# include <__undef_macros>

_LIBCPP_BEGIN_NAMESPACE_STD
namespace __pstl {

template <class _Backend, class _RawExecutionPolicy>
struct __cpu_parallel_merge {
Expand All @@ -45,7 +46,7 @@ struct __cpu_parallel_merge {
__has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
__has_random_access_iterator_category_or_concept<_ForwardIterator2>::value &&
__has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) {
auto __res = __pstl::__cpu_traits<_Backend>::__merge(
auto __res = __cpu_traits<_Backend>::__merge(
__first1,
__last1,
__first2,
Expand Down Expand Up @@ -78,6 +79,7 @@ struct __cpu_parallel_merge {
}
};

} // namespace __pstl
_LIBCPP_END_NAMESPACE_STD

_LIBCPP_POP_MACROS
Expand Down
4 changes: 3 additions & 1 deletion libcxx/include/__pstl/cpu_algos/stable_sort.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,15 @@
#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17

_LIBCPP_BEGIN_NAMESPACE_STD
namespace __pstl {

template <class _Backend, class _RawExecutionPolicy>
struct __cpu_parallel_stable_sort {
template <class _Policy, class _RandomAccessIterator, class _Comp>
_LIBCPP_HIDE_FROM_ABI optional<__empty>
operator()(_Policy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, _Comp __comp) const noexcept {
if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy>) {
return __pstl::__cpu_traits<_Backend>::__stable_sort(
return __cpu_traits<_Backend>::__stable_sort(
__first, __last, __comp, [](_RandomAccessIterator __g_first, _RandomAccessIterator __g_last, _Comp __g_comp) {
std::stable_sort(__g_first, __g_last, __g_comp);
});
Expand All @@ -42,6 +43,7 @@ struct __cpu_parallel_stable_sort {
}
};

} // namespace __pstl
_LIBCPP_END_NAMESPACE_STD

#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
Expand Down
10 changes: 6 additions & 4 deletions libcxx/include/__pstl/cpu_algos/transform.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ _LIBCPP_PUSH_MACROS
# include <__undef_macros>

_LIBCPP_BEGIN_NAMESPACE_STD
namespace __pstl {

template <class _Iterator1, class _DifferenceType, class _Iterator2, class _Function>
_LIBCPP_HIDE_FROM_ABI _Iterator2
Expand Down Expand Up @@ -61,7 +62,7 @@ struct __cpu_parallel_transform {
if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value &&
__has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) {
__pstl::__cpu_traits<_Backend>::__for_each(
__cpu_traits<_Backend>::__for_each(
__first,
__last,
[&__policy, __op, __first, __result](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
Expand All @@ -79,7 +80,7 @@ struct __cpu_parallel_transform {
} else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value &&
__has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) {
return std::__simd_transform(
return __pstl::__simd_transform(
__first,
__last - __first,
__result,
Expand Down Expand Up @@ -110,7 +111,7 @@ struct __cpu_parallel_transform_binary {
__has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
__has_random_access_iterator_category_or_concept<_ForwardIterator2>::value &&
__has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) {
auto __res = __pstl::__cpu_traits<_Backend>::__for_each(
auto __res = __cpu_traits<_Backend>::__for_each(
__first1,
__last1,
[&__policy, __op, __first1, __first2, __result](
Expand All @@ -132,7 +133,7 @@ struct __cpu_parallel_transform_binary {
__has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
__has_random_access_iterator_category_or_concept<_ForwardIterator2>::value &&
__has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) {
return std::__simd_transform(
return __pstl::__simd_transform(
__first1,
__last1 - __first1,
__first2,
Expand All @@ -146,6 +147,7 @@ struct __cpu_parallel_transform_binary {
}
};

} // namespace __pstl
_LIBCPP_END_NAMESPACE_STD

_LIBCPP_POP_MACROS
Expand Down
12 changes: 7 additions & 5 deletions libcxx/include/__pstl/cpu_algos/transform_reduce.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ _LIBCPP_PUSH_MACROS
#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17

_LIBCPP_BEGIN_NAMESPACE_STD
namespace __pstl {

template <typename _Backend,
typename _DifferenceType,
Expand Down Expand Up @@ -63,7 +64,7 @@ template <typename _Backend,
int> = 0>
_LIBCPP_HIDE_FROM_ABI _Tp
__simd_transform_reduce(_Size __n, _Tp __init, _BinaryOperation __binary_op, _UnaryOperation __f) noexcept {
constexpr size_t __lane_size = __pstl::__cpu_traits<_Backend>::__lane_size;
constexpr size_t __lane_size = __cpu_traits<_Backend>::__lane_size;
const _Size __block_size = __lane_size / sizeof(_Tp);
if (__n > 2 * __block_size && __block_size > 1) {
alignas(__lane_size) char __lane_buffer[__lane_size];
Expand Down Expand Up @@ -124,7 +125,7 @@ struct __cpu_parallel_transform_reduce_binary {
if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
__has_random_access_iterator_category_or_concept<_ForwardIterator2>::value) {
return __pstl::__cpu_traits<_Backend>::__transform_reduce(
return __cpu_traits<_Backend>::__transform_reduce(
__first1,
std::move(__last1),
[__first1, __first2, __transform](_ForwardIterator1 __iter) {
Expand All @@ -148,7 +149,7 @@ struct __cpu_parallel_transform_reduce_binary {
} else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
__has_random_access_iterator_category_or_concept<_ForwardIterator2>::value) {
return std::__simd_transform_reduce<_Backend>(
return __pstl::__simd_transform_reduce<_Backend>(
__last1 - __first1, std::move(__init), std::move(__reduce), [&](__iter_diff_t<_ForwardIterator1> __i) {
return __transform(__first1[__i], __first2[__i]);
});
Expand Down Expand Up @@ -176,7 +177,7 @@ struct __cpu_parallel_transform_reduce {
_UnaryOperation __transform) const noexcept {
if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
return __pstl::__cpu_traits<_Backend>::__transform_reduce(
return __cpu_traits<_Backend>::__transform_reduce(
std::move(__first),
std::move(__last),
[__transform](_ForwardIterator __iter) { return __transform(*__iter); },
Expand All @@ -197,7 +198,7 @@ struct __cpu_parallel_transform_reduce {
});
} else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
return std::__simd_transform_reduce<_Backend>(
return __pstl::__simd_transform_reduce<_Backend>(
__last - __first,
std::move(__init),
std::move(__reduce),
Expand All @@ -209,6 +210,7 @@ struct __cpu_parallel_transform_reduce {
}
};

} // namespace __pstl
_LIBCPP_END_NAMESPACE_STD

#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
Expand Down
Loading