Skip to content

[libc++] Move the implementation of CPU-based basis operations to namespace __pstl #95267

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 12, 2024

Conversation

ldionne
Copy link
Member

@ldionne ldionne commented Jun 12, 2024

They were always intended to be in that namespace but I was trying to keep changes orthogonal.

…espace __pstl

They were always intended to be in that namespace but I was trying to
keep changes orthogonal.
@ldionne ldionne requested a review from a team as a code owner June 12, 2024 16:39
@llvmbot llvmbot added the libc++ libc++ C++ Standard Library. Not GNU libstdc++. Not libc++abi. label Jun 12, 2024
@llvmbot
Copy link
Member

llvmbot commented Jun 12, 2024

@llvm/pr-subscribers-libcxx

Author: Louis Dionne (ldionne)

Changes

They were always intended to be in that namespace but I was trying to keep changes orthogonal.


Full diff: https://github.com/llvm/llvm-project/pull/95267.diff

8 Files Affected:

  • (modified) libcxx/include/__pstl/cpu_algos/any_of.h (+6-4)
  • (modified) libcxx/include/__pstl/cpu_algos/fill.h (+4-2)
  • (modified) libcxx/include/__pstl/cpu_algos/find_if.h (+8-6)
  • (modified) libcxx/include/__pstl/cpu_algos/for_each.h (+4-2)
  • (modified) libcxx/include/__pstl/cpu_algos/merge.h (+3-1)
  • (modified) libcxx/include/__pstl/cpu_algos/stable_sort.h (+3-1)
  • (modified) libcxx/include/__pstl/cpu_algos/transform.h (+6-4)
  • (modified) libcxx/include/__pstl/cpu_algos/transform_reduce.h (+7-5)
diff --git a/libcxx/include/__pstl/cpu_algos/any_of.h b/libcxx/include/__pstl/cpu_algos/any_of.h
index 3173eade7585b..cfc70c735dec6 100644
--- a/libcxx/include/__pstl/cpu_algos/any_of.h
+++ b/libcxx/include/__pstl/cpu_algos/any_of.h
@@ -29,14 +29,15 @@ _LIBCPP_PUSH_MACROS
 #  include <__undef_macros>
 
 _LIBCPP_BEGIN_NAMESPACE_STD
+namespace __pstl {
 
 template <class _Backend, class _Index, class _Brick>
 _LIBCPP_HIDE_FROM_ABI optional<bool> __parallel_or(_Index __first, _Index __last, _Brick __f) {
   std::atomic<bool> __found(false);
-  auto __ret = __pstl::__cpu_traits<_Backend>::__for_each(__first, __last, [__f, &__found](_Index __i, _Index __j) {
+  auto __ret = __cpu_traits<_Backend>::__for_each(__first, __last, [__f, &__found](_Index __i, _Index __j) {
     if (!__found.load(std::memory_order_relaxed) && __f(__i, __j)) {
       __found.store(true, std::memory_order_relaxed);
-      __pstl::__cpu_traits<_Backend>::__cancel_execution();
+      __cpu_traits<_Backend>::__cancel_execution();
     }
   });
   if (!__ret)
@@ -76,7 +77,7 @@ struct __cpu_parallel_any_of {
   operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) const noexcept {
     if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
                   __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
-      return std::__parallel_or<_Backend>(
+      return __pstl::__parallel_or<_Backend>(
           __first, __last, [&__policy, &__pred](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
             using _AnyOfUnseq = __pstl::__any_of<_Backend, __remove_parallel_policy_t<_RawExecutionPolicy>>;
             auto __res = _AnyOfUnseq()(std::__remove_parallel_policy(__policy), __brick_first, __brick_last, __pred);
@@ -85,13 +86,14 @@ struct __cpu_parallel_any_of {
           });
     } else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
                          __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
-      return std::__simd_or(__first, __last - __first, __pred);
+      return __pstl::__simd_or(__first, __last - __first, __pred);
     } else {
       return std::any_of(__first, __last, __pred);
     }
   }
 };
 
+} // namespace __pstl
 _LIBCPP_END_NAMESPACE_STD
 
 _LIBCPP_POP_MACROS
diff --git a/libcxx/include/__pstl/cpu_algos/fill.h b/libcxx/include/__pstl/cpu_algos/fill.h
index b99a9d3c660d8..6037435df2748 100644
--- a/libcxx/include/__pstl/cpu_algos/fill.h
+++ b/libcxx/include/__pstl/cpu_algos/fill.h
@@ -26,6 +26,7 @@
 #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
 
 _LIBCPP_BEGIN_NAMESPACE_STD
+namespace __pstl {
 
 template <class _Index, class _DifferenceType, class _Tp>
 _LIBCPP_HIDE_FROM_ABI _Index __simd_fill_n(_Index __first, _DifferenceType __n, const _Tp& __value) noexcept {
@@ -43,7 +44,7 @@ struct __cpu_parallel_fill {
   operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) const noexcept {
     if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
                   __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
-      return __pstl::__cpu_traits<_Backend>::__for_each(
+      return __cpu_traits<_Backend>::__for_each(
           __first, __last, [&__policy, &__value](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
             using _FillUnseq = __pstl::__fill<_Backend, __remove_parallel_policy_t<_RawExecutionPolicy>>;
             [[maybe_unused]] auto __res =
@@ -52,7 +53,7 @@ struct __cpu_parallel_fill {
           });
     } else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
                          __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
-      std::__simd_fill_n(__first, __last - __first, __value);
+      __pstl::__simd_fill_n(__first, __last - __first, __value);
       return __empty{};
     } else {
       std::fill(__first, __last, __value);
@@ -61,6 +62,7 @@ struct __cpu_parallel_fill {
   }
 };
 
+} // namespace __pstl
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
diff --git a/libcxx/include/__pstl/cpu_algos/find_if.h b/libcxx/include/__pstl/cpu_algos/find_if.h
index 3ddbee44890f6..46b684bc37925 100644
--- a/libcxx/include/__pstl/cpu_algos/find_if.h
+++ b/libcxx/include/__pstl/cpu_algos/find_if.h
@@ -34,6 +34,7 @@ _LIBCPP_PUSH_MACROS
 #  include <__undef_macros>
 
 _LIBCPP_BEGIN_NAMESPACE_STD
+namespace __pstl {
 
 template <class _Backend, class _Index, class _Brick, class _Compare>
 _LIBCPP_HIDE_FROM_ABI optional<_Index>
@@ -43,8 +44,8 @@ __parallel_find(_Index __first, _Index __last, _Brick __f, _Compare __comp, bool
   _DifferenceType __initial_dist = __b_first ? __n : -1;
   std::atomic<_DifferenceType> __extremum(__initial_dist);
   // TODO: find out what is better here: parallel_for or parallel_reduce
-  auto __res = __pstl::__cpu_traits<_Backend>::__for_each(
-      __first, __last, [__comp, __f, __first, &__extremum](_Index __i, _Index __j) {
+  auto __res =
+      __cpu_traits<_Backend>::__for_each(__first, __last, [__comp, __f, __first, &__extremum](_Index __i, _Index __j) {
         // See "Reducing Contention Through Priority Updates", PPoPP '13, for discussion of
         // why using a shared variable scales fairly well in this situation.
         if (__comp(__i - __first, __extremum)) {
@@ -67,8 +68,8 @@ template <class _Backend, class _Index, class _DifferenceType, class _Compare>
 _LIBCPP_HIDE_FROM_ABI _Index
 __simd_first(_Index __first, _DifferenceType __begin, _DifferenceType __end, _Compare __comp) noexcept {
   // Experiments show good block sizes like this
-  const _DifferenceType __block_size                                                        = 8;
-  alignas(__pstl::__cpu_traits<_Backend>::__lane_size) _DifferenceType __lane[__block_size] = {0};
+  const _DifferenceType __block_size                                                = 8;
+  alignas(__cpu_traits<_Backend>::__lane_size) _DifferenceType __lane[__block_size] = {0};
   while (__end - __begin >= __block_size) {
     _DifferenceType __found = 0;
     _PSTL_PRAGMA_SIMD_REDUCTION(| : __found) for (_DifferenceType __i = __begin; __i < __begin + __block_size; ++__i) {
@@ -106,7 +107,7 @@ struct __cpu_parallel_find_if {
   operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) const noexcept {
     if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
                   __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
-      return std::__parallel_find<_Backend>(
+      return __pstl::__parallel_find<_Backend>(
           __first,
           __last,
           [&__policy, &__pred](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
@@ -120,7 +121,7 @@ struct __cpu_parallel_find_if {
     } else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
                          __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
       using __diff_t = __iter_diff_t<_ForwardIterator>;
-      return std::__simd_first<_Backend>(
+      return __pstl::__simd_first<_Backend>(
           __first, __diff_t(0), __last - __first, [&__pred](_ForwardIterator __iter, __diff_t __i) {
             return __pred(__iter[__i]);
           });
@@ -130,6 +131,7 @@ struct __cpu_parallel_find_if {
   }
 };
 
+} // namespace __pstl
 _LIBCPP_END_NAMESPACE_STD
 
 _LIBCPP_POP_MACROS
diff --git a/libcxx/include/__pstl/cpu_algos/for_each.h b/libcxx/include/__pstl/cpu_algos/for_each.h
index 1e5677d998994..0a1209cc25403 100644
--- a/libcxx/include/__pstl/cpu_algos/for_each.h
+++ b/libcxx/include/__pstl/cpu_algos/for_each.h
@@ -26,6 +26,7 @@
 #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
 
 _LIBCPP_BEGIN_NAMESPACE_STD
+namespace __pstl {
 
 template <class _Iterator, class _DifferenceType, class _Function>
 _LIBCPP_HIDE_FROM_ABI _Iterator __simd_for_each(_Iterator __first, _DifferenceType __n, _Function __f) noexcept {
@@ -43,7 +44,7 @@ struct __cpu_parallel_for_each {
   operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Function __func) const noexcept {
     if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
                   __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
-      return __pstl::__cpu_traits<_Backend>::__for_each(
+      return __cpu_traits<_Backend>::__for_each(
           __first, __last, [&__policy, __func](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
             using _ForEachUnseq = __pstl::__for_each<_Backend, __remove_parallel_policy_t<_RawExecutionPolicy>>;
             [[maybe_unused]] auto __res =
@@ -52,7 +53,7 @@ struct __cpu_parallel_for_each {
           });
     } else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
                          __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
-      std::__simd_for_each(__first, __last - __first, __func);
+      __pstl::__simd_for_each(__first, __last - __first, __func);
       return __empty{};
     } else {
       std::for_each(__first, __last, __func);
@@ -61,6 +62,7 @@ struct __cpu_parallel_for_each {
   }
 };
 
+} // namespace __pstl
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
diff --git a/libcxx/include/__pstl/cpu_algos/merge.h b/libcxx/include/__pstl/cpu_algos/merge.h
index 4f4192cccb3e8..cfd006971bfaa 100644
--- a/libcxx/include/__pstl/cpu_algos/merge.h
+++ b/libcxx/include/__pstl/cpu_algos/merge.h
@@ -29,6 +29,7 @@ _LIBCPP_PUSH_MACROS
 #  include <__undef_macros>
 
 _LIBCPP_BEGIN_NAMESPACE_STD
+namespace __pstl {
 
 template <class _Backend, class _RawExecutionPolicy>
 struct __cpu_parallel_merge {
@@ -45,7 +46,7 @@ struct __cpu_parallel_merge {
                   __has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
                   __has_random_access_iterator_category_or_concept<_ForwardIterator2>::value &&
                   __has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) {
-      auto __res = __pstl::__cpu_traits<_Backend>::__merge(
+      auto __res = __cpu_traits<_Backend>::__merge(
           __first1,
           __last1,
           __first2,
@@ -78,6 +79,7 @@ struct __cpu_parallel_merge {
   }
 };
 
+} // namespace __pstl
 _LIBCPP_END_NAMESPACE_STD
 
 _LIBCPP_POP_MACROS
diff --git a/libcxx/include/__pstl/cpu_algos/stable_sort.h b/libcxx/include/__pstl/cpu_algos/stable_sort.h
index 8ea5e8a01d2ce..90774c6e88fa6 100644
--- a/libcxx/include/__pstl/cpu_algos/stable_sort.h
+++ b/libcxx/include/__pstl/cpu_algos/stable_sort.h
@@ -24,6 +24,7 @@
 #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
 
 _LIBCPP_BEGIN_NAMESPACE_STD
+namespace __pstl {
 
 template <class _Backend, class _RawExecutionPolicy>
 struct __cpu_parallel_stable_sort {
@@ -31,7 +32,7 @@ struct __cpu_parallel_stable_sort {
   _LIBCPP_HIDE_FROM_ABI optional<__empty>
   operator()(_Policy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, _Comp __comp) const noexcept {
     if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy>) {
-      return __pstl::__cpu_traits<_Backend>::__stable_sort(
+      return __cpu_traits<_Backend>::__stable_sort(
           __first, __last, __comp, [](_RandomAccessIterator __g_first, _RandomAccessIterator __g_last, _Comp __g_comp) {
             std::stable_sort(__g_first, __g_last, __g_comp);
           });
@@ -42,6 +43,7 @@ struct __cpu_parallel_stable_sort {
   }
 };
 
+} // namespace __pstl
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
diff --git a/libcxx/include/__pstl/cpu_algos/transform.h b/libcxx/include/__pstl/cpu_algos/transform.h
index 440368d97f182..85bd990199195 100644
--- a/libcxx/include/__pstl/cpu_algos/transform.h
+++ b/libcxx/include/__pstl/cpu_algos/transform.h
@@ -30,6 +30,7 @@ _LIBCPP_PUSH_MACROS
 #  include <__undef_macros>
 
 _LIBCPP_BEGIN_NAMESPACE_STD
+namespace __pstl {
 
 template <class _Iterator1, class _DifferenceType, class _Iterator2, class _Function>
 _LIBCPP_HIDE_FROM_ABI _Iterator2
@@ -61,7 +62,7 @@ struct __cpu_parallel_transform {
     if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
                   __has_random_access_iterator_category_or_concept<_ForwardIterator>::value &&
                   __has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) {
-      __pstl::__cpu_traits<_Backend>::__for_each(
+      __cpu_traits<_Backend>::__for_each(
           __first,
           __last,
           [&__policy, __op, __first, __result](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
@@ -79,7 +80,7 @@ struct __cpu_parallel_transform {
     } else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
                          __has_random_access_iterator_category_or_concept<_ForwardIterator>::value &&
                          __has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) {
-      return std::__simd_transform(
+      return __pstl::__simd_transform(
           __first,
           __last - __first,
           __result,
@@ -110,7 +111,7 @@ struct __cpu_parallel_transform_binary {
                   __has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
                   __has_random_access_iterator_category_or_concept<_ForwardIterator2>::value &&
                   __has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) {
-      auto __res = __pstl::__cpu_traits<_Backend>::__for_each(
+      auto __res = __cpu_traits<_Backend>::__for_each(
           __first1,
           __last1,
           [&__policy, __op, __first1, __first2, __result](
@@ -132,7 +133,7 @@ struct __cpu_parallel_transform_binary {
                          __has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
                          __has_random_access_iterator_category_or_concept<_ForwardIterator2>::value &&
                          __has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) {
-      return std::__simd_transform(
+      return __pstl::__simd_transform(
           __first1,
           __last1 - __first1,
           __first2,
@@ -146,6 +147,7 @@ struct __cpu_parallel_transform_binary {
   }
 };
 
+} // namespace __pstl
 _LIBCPP_END_NAMESPACE_STD
 
 _LIBCPP_POP_MACROS
diff --git a/libcxx/include/__pstl/cpu_algos/transform_reduce.h b/libcxx/include/__pstl/cpu_algos/transform_reduce.h
index 914c46dcd6dcf..4dde257ca9518 100644
--- a/libcxx/include/__pstl/cpu_algos/transform_reduce.h
+++ b/libcxx/include/__pstl/cpu_algos/transform_reduce.h
@@ -34,6 +34,7 @@ _LIBCPP_PUSH_MACROS
 #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
 
 _LIBCPP_BEGIN_NAMESPACE_STD
+namespace __pstl {
 
 template <typename _Backend,
           typename _DifferenceType,
@@ -63,7 +64,7 @@ template <typename _Backend,
                         int>    = 0>
 _LIBCPP_HIDE_FROM_ABI _Tp
 __simd_transform_reduce(_Size __n, _Tp __init, _BinaryOperation __binary_op, _UnaryOperation __f) noexcept {
-  constexpr size_t __lane_size = __pstl::__cpu_traits<_Backend>::__lane_size;
+  constexpr size_t __lane_size = __cpu_traits<_Backend>::__lane_size;
   const _Size __block_size     = __lane_size / sizeof(_Tp);
   if (__n > 2 * __block_size && __block_size > 1) {
     alignas(__lane_size) char __lane_buffer[__lane_size];
@@ -124,7 +125,7 @@ struct __cpu_parallel_transform_reduce_binary {
     if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
                   __has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
                   __has_random_access_iterator_category_or_concept<_ForwardIterator2>::value) {
-      return __pstl::__cpu_traits<_Backend>::__transform_reduce(
+      return __cpu_traits<_Backend>::__transform_reduce(
           __first1,
           std::move(__last1),
           [__first1, __first2, __transform](_ForwardIterator1 __iter) {
@@ -148,7 +149,7 @@ struct __cpu_parallel_transform_reduce_binary {
     } else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
                          __has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
                          __has_random_access_iterator_category_or_concept<_ForwardIterator2>::value) {
-      return std::__simd_transform_reduce<_Backend>(
+      return __pstl::__simd_transform_reduce<_Backend>(
           __last1 - __first1, std::move(__init), std::move(__reduce), [&](__iter_diff_t<_ForwardIterator1> __i) {
             return __transform(__first1[__i], __first2[__i]);
           });
@@ -176,7 +177,7 @@ struct __cpu_parallel_transform_reduce {
              _UnaryOperation __transform) const noexcept {
     if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
                   __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
-      return __pstl::__cpu_traits<_Backend>::__transform_reduce(
+      return __cpu_traits<_Backend>::__transform_reduce(
           std::move(__first),
           std::move(__last),
           [__transform](_ForwardIterator __iter) { return __transform(*__iter); },
@@ -197,7 +198,7 @@ struct __cpu_parallel_transform_reduce {
           });
     } else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
                          __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
-      return std::__simd_transform_reduce<_Backend>(
+      return __pstl::__simd_transform_reduce<_Backend>(
           __last - __first,
           std::move(__init),
           std::move(__reduce),
@@ -209,6 +210,7 @@ struct __cpu_parallel_transform_reduce {
   }
 };
 
+} // namespace __pstl
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17

@ldionne ldionne merged commit 7b80384 into llvm:main Jun 12, 2024
51 of 59 checks passed
@ldionne ldionne deleted the review/pstl-cpu-algos branch June 12, 2024 19:23
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
libc++ libc++ C++ Standard Library. Not GNU libstdc++. Not libc++abi.
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants