-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[libc++] Move the implementation of CPU-based basis operations to namespace __pstl #95267
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
…espace __pstl They were always intended to be in that namespace but I was trying to keep changes orthogonal.
@llvm/pr-subscribers-libcxx Author: Louis Dionne (ldionne) ChangesThey were always intended to be in that namespace but I was trying to keep changes orthogonal. Full diff: https://github.com/llvm/llvm-project/pull/95267.diff 8 Files Affected:
diff --git a/libcxx/include/__pstl/cpu_algos/any_of.h b/libcxx/include/__pstl/cpu_algos/any_of.h
index 3173eade7585b..cfc70c735dec6 100644
--- a/libcxx/include/__pstl/cpu_algos/any_of.h
+++ b/libcxx/include/__pstl/cpu_algos/any_of.h
@@ -29,14 +29,15 @@ _LIBCPP_PUSH_MACROS
# include <__undef_macros>
_LIBCPP_BEGIN_NAMESPACE_STD
+namespace __pstl {
template <class _Backend, class _Index, class _Brick>
_LIBCPP_HIDE_FROM_ABI optional<bool> __parallel_or(_Index __first, _Index __last, _Brick __f) {
std::atomic<bool> __found(false);
- auto __ret = __pstl::__cpu_traits<_Backend>::__for_each(__first, __last, [__f, &__found](_Index __i, _Index __j) {
+ auto __ret = __cpu_traits<_Backend>::__for_each(__first, __last, [__f, &__found](_Index __i, _Index __j) {
if (!__found.load(std::memory_order_relaxed) && __f(__i, __j)) {
__found.store(true, std::memory_order_relaxed);
- __pstl::__cpu_traits<_Backend>::__cancel_execution();
+ __cpu_traits<_Backend>::__cancel_execution();
}
});
if (!__ret)
@@ -76,7 +77,7 @@ struct __cpu_parallel_any_of {
operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) const noexcept {
if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
- return std::__parallel_or<_Backend>(
+ return __pstl::__parallel_or<_Backend>(
__first, __last, [&__policy, &__pred](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
using _AnyOfUnseq = __pstl::__any_of<_Backend, __remove_parallel_policy_t<_RawExecutionPolicy>>;
auto __res = _AnyOfUnseq()(std::__remove_parallel_policy(__policy), __brick_first, __brick_last, __pred);
@@ -85,13 +86,14 @@ struct __cpu_parallel_any_of {
});
} else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
- return std::__simd_or(__first, __last - __first, __pred);
+ return __pstl::__simd_or(__first, __last - __first, __pred);
} else {
return std::any_of(__first, __last, __pred);
}
}
};
+} // namespace __pstl
_LIBCPP_END_NAMESPACE_STD
_LIBCPP_POP_MACROS
diff --git a/libcxx/include/__pstl/cpu_algos/fill.h b/libcxx/include/__pstl/cpu_algos/fill.h
index b99a9d3c660d8..6037435df2748 100644
--- a/libcxx/include/__pstl/cpu_algos/fill.h
+++ b/libcxx/include/__pstl/cpu_algos/fill.h
@@ -26,6 +26,7 @@
#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
_LIBCPP_BEGIN_NAMESPACE_STD
+namespace __pstl {
template <class _Index, class _DifferenceType, class _Tp>
_LIBCPP_HIDE_FROM_ABI _Index __simd_fill_n(_Index __first, _DifferenceType __n, const _Tp& __value) noexcept {
@@ -43,7 +44,7 @@ struct __cpu_parallel_fill {
operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) const noexcept {
if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
- return __pstl::__cpu_traits<_Backend>::__for_each(
+ return __cpu_traits<_Backend>::__for_each(
__first, __last, [&__policy, &__value](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
using _FillUnseq = __pstl::__fill<_Backend, __remove_parallel_policy_t<_RawExecutionPolicy>>;
[[maybe_unused]] auto __res =
@@ -52,7 +53,7 @@ struct __cpu_parallel_fill {
});
} else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
- std::__simd_fill_n(__first, __last - __first, __value);
+ __pstl::__simd_fill_n(__first, __last - __first, __value);
return __empty{};
} else {
std::fill(__first, __last, __value);
@@ -61,6 +62,7 @@ struct __cpu_parallel_fill {
}
};
+} // namespace __pstl
_LIBCPP_END_NAMESPACE_STD
#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
diff --git a/libcxx/include/__pstl/cpu_algos/find_if.h b/libcxx/include/__pstl/cpu_algos/find_if.h
index 3ddbee44890f6..46b684bc37925 100644
--- a/libcxx/include/__pstl/cpu_algos/find_if.h
+++ b/libcxx/include/__pstl/cpu_algos/find_if.h
@@ -34,6 +34,7 @@ _LIBCPP_PUSH_MACROS
# include <__undef_macros>
_LIBCPP_BEGIN_NAMESPACE_STD
+namespace __pstl {
template <class _Backend, class _Index, class _Brick, class _Compare>
_LIBCPP_HIDE_FROM_ABI optional<_Index>
@@ -43,8 +44,8 @@ __parallel_find(_Index __first, _Index __last, _Brick __f, _Compare __comp, bool
_DifferenceType __initial_dist = __b_first ? __n : -1;
std::atomic<_DifferenceType> __extremum(__initial_dist);
// TODO: find out what is better here: parallel_for or parallel_reduce
- auto __res = __pstl::__cpu_traits<_Backend>::__for_each(
- __first, __last, [__comp, __f, __first, &__extremum](_Index __i, _Index __j) {
+ auto __res =
+ __cpu_traits<_Backend>::__for_each(__first, __last, [__comp, __f, __first, &__extremum](_Index __i, _Index __j) {
// See "Reducing Contention Through Priority Updates", PPoPP '13, for discussion of
// why using a shared variable scales fairly well in this situation.
if (__comp(__i - __first, __extremum)) {
@@ -67,8 +68,8 @@ template <class _Backend, class _Index, class _DifferenceType, class _Compare>
_LIBCPP_HIDE_FROM_ABI _Index
__simd_first(_Index __first, _DifferenceType __begin, _DifferenceType __end, _Compare __comp) noexcept {
// Experiments show good block sizes like this
- const _DifferenceType __block_size = 8;
- alignas(__pstl::__cpu_traits<_Backend>::__lane_size) _DifferenceType __lane[__block_size] = {0};
+ const _DifferenceType __block_size = 8;
+ alignas(__cpu_traits<_Backend>::__lane_size) _DifferenceType __lane[__block_size] = {0};
while (__end - __begin >= __block_size) {
_DifferenceType __found = 0;
_PSTL_PRAGMA_SIMD_REDUCTION(| : __found) for (_DifferenceType __i = __begin; __i < __begin + __block_size; ++__i) {
@@ -106,7 +107,7 @@ struct __cpu_parallel_find_if {
operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) const noexcept {
if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
- return std::__parallel_find<_Backend>(
+ return __pstl::__parallel_find<_Backend>(
__first,
__last,
[&__policy, &__pred](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
@@ -120,7 +121,7 @@ struct __cpu_parallel_find_if {
} else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
using __diff_t = __iter_diff_t<_ForwardIterator>;
- return std::__simd_first<_Backend>(
+ return __pstl::__simd_first<_Backend>(
__first, __diff_t(0), __last - __first, [&__pred](_ForwardIterator __iter, __diff_t __i) {
return __pred(__iter[__i]);
});
@@ -130,6 +131,7 @@ struct __cpu_parallel_find_if {
}
};
+} // namespace __pstl
_LIBCPP_END_NAMESPACE_STD
_LIBCPP_POP_MACROS
diff --git a/libcxx/include/__pstl/cpu_algos/for_each.h b/libcxx/include/__pstl/cpu_algos/for_each.h
index 1e5677d998994..0a1209cc25403 100644
--- a/libcxx/include/__pstl/cpu_algos/for_each.h
+++ b/libcxx/include/__pstl/cpu_algos/for_each.h
@@ -26,6 +26,7 @@
#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
_LIBCPP_BEGIN_NAMESPACE_STD
+namespace __pstl {
template <class _Iterator, class _DifferenceType, class _Function>
_LIBCPP_HIDE_FROM_ABI _Iterator __simd_for_each(_Iterator __first, _DifferenceType __n, _Function __f) noexcept {
@@ -43,7 +44,7 @@ struct __cpu_parallel_for_each {
operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Function __func) const noexcept {
if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
- return __pstl::__cpu_traits<_Backend>::__for_each(
+ return __cpu_traits<_Backend>::__for_each(
__first, __last, [&__policy, __func](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
using _ForEachUnseq = __pstl::__for_each<_Backend, __remove_parallel_policy_t<_RawExecutionPolicy>>;
[[maybe_unused]] auto __res =
@@ -52,7 +53,7 @@ struct __cpu_parallel_for_each {
});
} else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
- std::__simd_for_each(__first, __last - __first, __func);
+ __pstl::__simd_for_each(__first, __last - __first, __func);
return __empty{};
} else {
std::for_each(__first, __last, __func);
@@ -61,6 +62,7 @@ struct __cpu_parallel_for_each {
}
};
+} // namespace __pstl
_LIBCPP_END_NAMESPACE_STD
#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
diff --git a/libcxx/include/__pstl/cpu_algos/merge.h b/libcxx/include/__pstl/cpu_algos/merge.h
index 4f4192cccb3e8..cfd006971bfaa 100644
--- a/libcxx/include/__pstl/cpu_algos/merge.h
+++ b/libcxx/include/__pstl/cpu_algos/merge.h
@@ -29,6 +29,7 @@ _LIBCPP_PUSH_MACROS
# include <__undef_macros>
_LIBCPP_BEGIN_NAMESPACE_STD
+namespace __pstl {
template <class _Backend, class _RawExecutionPolicy>
struct __cpu_parallel_merge {
@@ -45,7 +46,7 @@ struct __cpu_parallel_merge {
__has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
__has_random_access_iterator_category_or_concept<_ForwardIterator2>::value &&
__has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) {
- auto __res = __pstl::__cpu_traits<_Backend>::__merge(
+ auto __res = __cpu_traits<_Backend>::__merge(
__first1,
__last1,
__first2,
@@ -78,6 +79,7 @@ struct __cpu_parallel_merge {
}
};
+} // namespace __pstl
_LIBCPP_END_NAMESPACE_STD
_LIBCPP_POP_MACROS
diff --git a/libcxx/include/__pstl/cpu_algos/stable_sort.h b/libcxx/include/__pstl/cpu_algos/stable_sort.h
index 8ea5e8a01d2ce..90774c6e88fa6 100644
--- a/libcxx/include/__pstl/cpu_algos/stable_sort.h
+++ b/libcxx/include/__pstl/cpu_algos/stable_sort.h
@@ -24,6 +24,7 @@
#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
_LIBCPP_BEGIN_NAMESPACE_STD
+namespace __pstl {
template <class _Backend, class _RawExecutionPolicy>
struct __cpu_parallel_stable_sort {
@@ -31,7 +32,7 @@ struct __cpu_parallel_stable_sort {
_LIBCPP_HIDE_FROM_ABI optional<__empty>
operator()(_Policy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, _Comp __comp) const noexcept {
if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy>) {
- return __pstl::__cpu_traits<_Backend>::__stable_sort(
+ return __cpu_traits<_Backend>::__stable_sort(
__first, __last, __comp, [](_RandomAccessIterator __g_first, _RandomAccessIterator __g_last, _Comp __g_comp) {
std::stable_sort(__g_first, __g_last, __g_comp);
});
@@ -42,6 +43,7 @@ struct __cpu_parallel_stable_sort {
}
};
+} // namespace __pstl
_LIBCPP_END_NAMESPACE_STD
#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
diff --git a/libcxx/include/__pstl/cpu_algos/transform.h b/libcxx/include/__pstl/cpu_algos/transform.h
index 440368d97f182..85bd990199195 100644
--- a/libcxx/include/__pstl/cpu_algos/transform.h
+++ b/libcxx/include/__pstl/cpu_algos/transform.h
@@ -30,6 +30,7 @@ _LIBCPP_PUSH_MACROS
# include <__undef_macros>
_LIBCPP_BEGIN_NAMESPACE_STD
+namespace __pstl {
template <class _Iterator1, class _DifferenceType, class _Iterator2, class _Function>
_LIBCPP_HIDE_FROM_ABI _Iterator2
@@ -61,7 +62,7 @@ struct __cpu_parallel_transform {
if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value &&
__has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) {
- __pstl::__cpu_traits<_Backend>::__for_each(
+ __cpu_traits<_Backend>::__for_each(
__first,
__last,
[&__policy, __op, __first, __result](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
@@ -79,7 +80,7 @@ struct __cpu_parallel_transform {
} else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value &&
__has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) {
- return std::__simd_transform(
+ return __pstl::__simd_transform(
__first,
__last - __first,
__result,
@@ -110,7 +111,7 @@ struct __cpu_parallel_transform_binary {
__has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
__has_random_access_iterator_category_or_concept<_ForwardIterator2>::value &&
__has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) {
- auto __res = __pstl::__cpu_traits<_Backend>::__for_each(
+ auto __res = __cpu_traits<_Backend>::__for_each(
__first1,
__last1,
[&__policy, __op, __first1, __first2, __result](
@@ -132,7 +133,7 @@ struct __cpu_parallel_transform_binary {
__has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
__has_random_access_iterator_category_or_concept<_ForwardIterator2>::value &&
__has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) {
- return std::__simd_transform(
+ return __pstl::__simd_transform(
__first1,
__last1 - __first1,
__first2,
@@ -146,6 +147,7 @@ struct __cpu_parallel_transform_binary {
}
};
+} // namespace __pstl
_LIBCPP_END_NAMESPACE_STD
_LIBCPP_POP_MACROS
diff --git a/libcxx/include/__pstl/cpu_algos/transform_reduce.h b/libcxx/include/__pstl/cpu_algos/transform_reduce.h
index 914c46dcd6dcf..4dde257ca9518 100644
--- a/libcxx/include/__pstl/cpu_algos/transform_reduce.h
+++ b/libcxx/include/__pstl/cpu_algos/transform_reduce.h
@@ -34,6 +34,7 @@ _LIBCPP_PUSH_MACROS
#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
_LIBCPP_BEGIN_NAMESPACE_STD
+namespace __pstl {
template <typename _Backend,
typename _DifferenceType,
@@ -63,7 +64,7 @@ template <typename _Backend,
int> = 0>
_LIBCPP_HIDE_FROM_ABI _Tp
__simd_transform_reduce(_Size __n, _Tp __init, _BinaryOperation __binary_op, _UnaryOperation __f) noexcept {
- constexpr size_t __lane_size = __pstl::__cpu_traits<_Backend>::__lane_size;
+ constexpr size_t __lane_size = __cpu_traits<_Backend>::__lane_size;
const _Size __block_size = __lane_size / sizeof(_Tp);
if (__n > 2 * __block_size && __block_size > 1) {
alignas(__lane_size) char __lane_buffer[__lane_size];
@@ -124,7 +125,7 @@ struct __cpu_parallel_transform_reduce_binary {
if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
__has_random_access_iterator_category_or_concept<_ForwardIterator2>::value) {
- return __pstl::__cpu_traits<_Backend>::__transform_reduce(
+ return __cpu_traits<_Backend>::__transform_reduce(
__first1,
std::move(__last1),
[__first1, __first2, __transform](_ForwardIterator1 __iter) {
@@ -148,7 +149,7 @@ struct __cpu_parallel_transform_reduce_binary {
} else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
__has_random_access_iterator_category_or_concept<_ForwardIterator2>::value) {
- return std::__simd_transform_reduce<_Backend>(
+ return __pstl::__simd_transform_reduce<_Backend>(
__last1 - __first1, std::move(__init), std::move(__reduce), [&](__iter_diff_t<_ForwardIterator1> __i) {
return __transform(__first1[__i], __first2[__i]);
});
@@ -176,7 +177,7 @@ struct __cpu_parallel_transform_reduce {
_UnaryOperation __transform) const noexcept {
if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
- return __pstl::__cpu_traits<_Backend>::__transform_reduce(
+ return __cpu_traits<_Backend>::__transform_reduce(
std::move(__first),
std::move(__last),
[__transform](_ForwardIterator __iter) { return __transform(*__iter); },
@@ -197,7 +198,7 @@ struct __cpu_parallel_transform_reduce {
});
} else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
- return std::__simd_transform_reduce<_Backend>(
+ return __pstl::__simd_transform_reduce<_Backend>(
__last - __first,
std::move(__init),
std::move(__reduce),
@@ -209,6 +210,7 @@ struct __cpu_parallel_transform_reduce {
}
};
+} // namespace __pstl
_LIBCPP_END_NAMESPACE_STD
#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
They were always intended to be in that namespace but I was trying to keep changes orthogonal.