-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[libc++] Optimize ranges::swap_ranges for vector<bool>::iterator #121150
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
55c9ee7
to
4267694
Compare
b82d52e
to
0bfcf34
Compare
@llvm/pr-subscribers-libcxx Author: Peng Liu (winner245) ChangesThis PR optimizes the performance of
Patch is 35.62 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/121150.diff 6 Files Affected:
diff --git a/libcxx/include/__algorithm/swap_ranges.h b/libcxx/include/__algorithm/swap_ranges.h
index 54b453b72360e0..2731d4feaa63d4 100644
--- a/libcxx/include/__algorithm/swap_ranges.h
+++ b/libcxx/include/__algorithm/swap_ranges.h
@@ -10,9 +10,12 @@
#define _LIBCPP___ALGORITHM_SWAP_RANGES_H
#include <__algorithm/iterator_operations.h>
+#include <__algorithm/min.h>
#include <__config>
+#include <__fwd/bit_reference.h>
#include <__utility/move.h>
#include <__utility/pair.h>
+#include <__utility/swap.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
@@ -23,6 +26,165 @@ _LIBCPP_PUSH_MACROS
_LIBCPP_BEGIN_NAMESPACE_STD
+template <class _Cl, class _Cr>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cr, false> __swap_ranges_aligned(
+ __bit_iterator<_Cl, false> __first, __bit_iterator<_Cl, false> __last, __bit_iterator<_Cr, false> __result) {
+ using _I1 = __bit_iterator<_Cl, false>;
+ using difference_type = typename _I1::difference_type;
+ using __storage_type = typename _I1::__storage_type;
+
+ const int __bits_per_word = _I1::__bits_per_word;
+ difference_type __n = __last - __first;
+ if (__n > 0) {
+ // do first word
+ if (__first.__ctz_ != 0) {
+ unsigned __clz = __bits_per_word - __first.__ctz_;
+ difference_type __dn = std::min(static_cast<difference_type>(__clz), __n);
+ __n -= __dn;
+ __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz - __dn));
+ __storage_type __b1 = *__first.__seg_ & __m;
+ *__first.__seg_ &= ~__m;
+ __storage_type __b2 = *__result.__seg_ & __m;
+ *__result.__seg_ &= ~__m;
+ *__result.__seg_ |= __b1;
+ *__first.__seg_ |= __b2;
+ __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
+ __result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
+ ++__first.__seg_;
+ // __first.__ctz_ = 0;
+ }
+ // __first.__ctz_ == 0;
+ // do middle words
+ for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_, ++__result.__seg_)
+ swap(*__first.__seg_, *__result.__seg_);
+ // do last word
+ if (__n > 0) {
+ __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
+ __storage_type __b1 = *__first.__seg_ & __m;
+ *__first.__seg_ &= ~__m;
+ __storage_type __b2 = *__result.__seg_ & __m;
+ *__result.__seg_ &= ~__m;
+ *__result.__seg_ |= __b1;
+ *__first.__seg_ |= __b2;
+ __result.__ctz_ = static_cast<unsigned>(__n);
+ }
+ }
+ return __result;
+}
+
+template <class _Cl, class _Cr>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cr, false> __swap_ranges_unaligned(
+ __bit_iterator<_Cl, false> __first, __bit_iterator<_Cl, false> __last, __bit_iterator<_Cr, false> __result) {
+ using _I1 = __bit_iterator<_Cl, false>;
+ using difference_type = typename _I1::difference_type;
+ using __storage_type = typename _I1::__storage_type;
+
+ const int __bits_per_word = _I1::__bits_per_word;
+ difference_type __n = __last - __first;
+ if (__n > 0) {
+ // do first word
+ if (__first.__ctz_ != 0) {
+ unsigned __clz_f = __bits_per_word - __first.__ctz_;
+ difference_type __dn = std::min(static_cast<difference_type>(__clz_f), __n);
+ __n -= __dn;
+ __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
+ __storage_type __b1 = *__first.__seg_ & __m;
+ *__first.__seg_ &= ~__m;
+ unsigned __clz_r = __bits_per_word - __result.__ctz_;
+ __storage_type __ddn = std::min<__storage_type>(__dn, __clz_r);
+ __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn));
+ __storage_type __b2 = *__result.__seg_ & __m;
+ *__result.__seg_ &= ~__m;
+ if (__result.__ctz_ > __first.__ctz_) {
+ unsigned __s = __result.__ctz_ - __first.__ctz_;
+ *__result.__seg_ |= __b1 << __s;
+ *__first.__seg_ |= __b2 >> __s;
+ } else {
+ unsigned __s = __first.__ctz_ - __result.__ctz_;
+ *__result.__seg_ |= __b1 >> __s;
+ *__first.__seg_ |= __b2 << __s;
+ }
+ __result.__seg_ += (__ddn + __result.__ctz_) / __bits_per_word;
+ __result.__ctz_ = static_cast<unsigned>((__ddn + __result.__ctz_) % __bits_per_word);
+ __dn -= __ddn;
+ if (__dn > 0) {
+ __m = ~__storage_type(0) >> (__bits_per_word - __dn);
+ __b2 = *__result.__seg_ & __m;
+ *__result.__seg_ &= ~__m;
+ unsigned __s = __first.__ctz_ + __ddn;
+ *__result.__seg_ |= __b1 >> __s;
+ *__first.__seg_ |= __b2 << __s;
+ __result.__ctz_ = static_cast<unsigned>(__dn);
+ }
+ ++__first.__seg_;
+ // __first.__ctz_ = 0;
+ }
+ // __first.__ctz_ == 0;
+ // do middle words
+ __storage_type __m = ~__storage_type(0) << __result.__ctz_;
+ unsigned __clz_r = __bits_per_word - __result.__ctz_;
+ for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_) {
+ __storage_type __b1 = *__first.__seg_;
+ __storage_type __b2 = *__result.__seg_ & __m;
+ *__result.__seg_ &= ~__m;
+ *__result.__seg_ |= __b1 << __result.__ctz_;
+ *__first.__seg_ = __b2 >> __result.__ctz_;
+ ++__result.__seg_;
+ __b2 = *__result.__seg_ & ~__m;
+ *__result.__seg_ &= __m;
+ *__result.__seg_ |= __b1 >> __clz_r;
+ *__first.__seg_ |= __b2 << __clz_r;
+ }
+ // do last word
+ if (__n > 0) {
+ __m = ~__storage_type(0) >> (__bits_per_word - __n);
+ __storage_type __b1 = *__first.__seg_ & __m;
+ *__first.__seg_ &= ~__m;
+ __storage_type __dn = std::min<__storage_type>(__n, __clz_r);
+ __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn));
+ __storage_type __b2 = *__result.__seg_ & __m;
+ *__result.__seg_ &= ~__m;
+ *__result.__seg_ |= __b1 << __result.__ctz_;
+ *__first.__seg_ |= __b2 >> __result.__ctz_;
+ __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
+ __result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
+ __n -= __dn;
+ if (__n > 0) {
+ __m = ~__storage_type(0) >> (__bits_per_word - __n);
+ __b2 = *__result.__seg_ & __m;
+ *__result.__seg_ &= ~__m;
+ *__result.__seg_ |= __b1 >> __dn;
+ *__first.__seg_ |= __b2 << __dn;
+ __result.__ctz_ = static_cast<unsigned>(__n);
+ }
+ }
+ }
+ return __result;
+}
+
+// 2+1 iterators: size2 >= size1; used by std::swap_ranges.
+template <class, class _Cl, class _Cr>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<__bit_iterator<_Cl, false>, __bit_iterator<_Cr, false> >
+__swap_ranges(__bit_iterator<_Cl, false> __first1,
+ __bit_iterator<_Cl, false> __last1,
+ __bit_iterator<_Cr, false> __first2) {
+ if (__first1.__ctz_ == __first2.__ctz_)
+ return std::make_pair(__last1, std::__swap_ranges_aligned(__first1, __last1, __first2));
+ return std::make_pair(__last1, std::__swap_ranges_unaligned(__first1, __last1, __first2));
+}
+
+// 2+2 iterators: used by std::ranges::swap_ranges.
+template <class _AlgPolicy, class _Cl, class _Cr>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<__bit_iterator<_Cl, false>, __bit_iterator<_Cr, false> >
+__swap_ranges(__bit_iterator<_Cl, false> __first1,
+ __bit_iterator<_Cl, false> __last1,
+ __bit_iterator<_Cr, false> __first2,
+ __bit_iterator<_Cr, false> __last2) {
+ if (__last1 - __first1 < __last2 - __first2)
+ return std::make_pair(__last1, std::__swap_ranges<_AlgPolicy>(__first1, __last1, __first2).second);
+ return std::make_pair(std::__swap_ranges<_AlgPolicy>(__first2, __last2, __first1).second, __last2);
+}
+
// 2+2 iterators: the shorter size will be used.
template <class _AlgPolicy, class _ForwardIterator1, class _Sentinel1, class _ForwardIterator2, class _Sentinel2>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_ForwardIterator1, _ForwardIterator2>
diff --git a/libcxx/include/__bit_reference b/libcxx/include/__bit_reference
index 67abb023122edf..26dfb330384a36 100644
--- a/libcxx/include/__bit_reference
+++ b/libcxx/include/__bit_reference
@@ -12,6 +12,7 @@
#include <__algorithm/copy_n.h>
#include <__algorithm/min.h>
+#include <__algorithm/swap_ranges.h>
#include <__bit/countr.h>
#include <__compare/ordering.h>
#include <__config>
@@ -24,6 +25,7 @@
#include <__type_traits/conditional.h>
#include <__type_traits/is_constant_evaluated.h>
#include <__type_traits/void_t.h>
+#include <__utility/pair.h>
#include <__utility/swap.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -451,152 +453,6 @@ inline _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> move_backward(
return std::copy_backward(__first, __last, __result);
}
-// swap_ranges
-
-template <class _Cl, class _Cr>
-_LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cr, false> __swap_ranges_aligned(
- __bit_iterator<_Cl, false> __first, __bit_iterator<_Cl, false> __last, __bit_iterator<_Cr, false> __result) {
- using _I1 = __bit_iterator<_Cl, false>;
- using difference_type = typename _I1::difference_type;
- using __storage_type = typename _I1::__storage_type;
-
- const int __bits_per_word = _I1::__bits_per_word;
- difference_type __n = __last - __first;
- if (__n > 0) {
- // do first word
- if (__first.__ctz_ != 0) {
- unsigned __clz = __bits_per_word - __first.__ctz_;
- difference_type __dn = std::min(static_cast<difference_type>(__clz), __n);
- __n -= __dn;
- __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz - __dn));
- __storage_type __b1 = *__first.__seg_ & __m;
- *__first.__seg_ &= ~__m;
- __storage_type __b2 = *__result.__seg_ & __m;
- *__result.__seg_ &= ~__m;
- *__result.__seg_ |= __b1;
- *__first.__seg_ |= __b2;
- __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
- __result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
- ++__first.__seg_;
- // __first.__ctz_ = 0;
- }
- // __first.__ctz_ == 0;
- // do middle words
- for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_, ++__result.__seg_)
- swap(*__first.__seg_, *__result.__seg_);
- // do last word
- if (__n > 0) {
- __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
- __storage_type __b1 = *__first.__seg_ & __m;
- *__first.__seg_ &= ~__m;
- __storage_type __b2 = *__result.__seg_ & __m;
- *__result.__seg_ &= ~__m;
- *__result.__seg_ |= __b1;
- *__first.__seg_ |= __b2;
- __result.__ctz_ = static_cast<unsigned>(__n);
- }
- }
- return __result;
-}
-
-template <class _Cl, class _Cr>
-_LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cr, false> __swap_ranges_unaligned(
- __bit_iterator<_Cl, false> __first, __bit_iterator<_Cl, false> __last, __bit_iterator<_Cr, false> __result) {
- using _I1 = __bit_iterator<_Cl, false>;
- using difference_type = typename _I1::difference_type;
- using __storage_type = typename _I1::__storage_type;
-
- const int __bits_per_word = _I1::__bits_per_word;
- difference_type __n = __last - __first;
- if (__n > 0) {
- // do first word
- if (__first.__ctz_ != 0) {
- unsigned __clz_f = __bits_per_word - __first.__ctz_;
- difference_type __dn = std::min(static_cast<difference_type>(__clz_f), __n);
- __n -= __dn;
- __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
- __storage_type __b1 = *__first.__seg_ & __m;
- *__first.__seg_ &= ~__m;
- unsigned __clz_r = __bits_per_word - __result.__ctz_;
- __storage_type __ddn = std::min<__storage_type>(__dn, __clz_r);
- __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn));
- __storage_type __b2 = *__result.__seg_ & __m;
- *__result.__seg_ &= ~__m;
- if (__result.__ctz_ > __first.__ctz_) {
- unsigned __s = __result.__ctz_ - __first.__ctz_;
- *__result.__seg_ |= __b1 << __s;
- *__first.__seg_ |= __b2 >> __s;
- } else {
- unsigned __s = __first.__ctz_ - __result.__ctz_;
- *__result.__seg_ |= __b1 >> __s;
- *__first.__seg_ |= __b2 << __s;
- }
- __result.__seg_ += (__ddn + __result.__ctz_) / __bits_per_word;
- __result.__ctz_ = static_cast<unsigned>((__ddn + __result.__ctz_) % __bits_per_word);
- __dn -= __ddn;
- if (__dn > 0) {
- __m = ~__storage_type(0) >> (__bits_per_word - __dn);
- __b2 = *__result.__seg_ & __m;
- *__result.__seg_ &= ~__m;
- unsigned __s = __first.__ctz_ + __ddn;
- *__result.__seg_ |= __b1 >> __s;
- *__first.__seg_ |= __b2 << __s;
- __result.__ctz_ = static_cast<unsigned>(__dn);
- }
- ++__first.__seg_;
- // __first.__ctz_ = 0;
- }
- // __first.__ctz_ == 0;
- // do middle words
- __storage_type __m = ~__storage_type(0) << __result.__ctz_;
- unsigned __clz_r = __bits_per_word - __result.__ctz_;
- for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_) {
- __storage_type __b1 = *__first.__seg_;
- __storage_type __b2 = *__result.__seg_ & __m;
- *__result.__seg_ &= ~__m;
- *__result.__seg_ |= __b1 << __result.__ctz_;
- *__first.__seg_ = __b2 >> __result.__ctz_;
- ++__result.__seg_;
- __b2 = *__result.__seg_ & ~__m;
- *__result.__seg_ &= __m;
- *__result.__seg_ |= __b1 >> __clz_r;
- *__first.__seg_ |= __b2 << __clz_r;
- }
- // do last word
- if (__n > 0) {
- __m = ~__storage_type(0) >> (__bits_per_word - __n);
- __storage_type __b1 = *__first.__seg_ & __m;
- *__first.__seg_ &= ~__m;
- __storage_type __dn = std::min<__storage_type>(__n, __clz_r);
- __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn));
- __storage_type __b2 = *__result.__seg_ & __m;
- *__result.__seg_ &= ~__m;
- *__result.__seg_ |= __b1 << __result.__ctz_;
- *__first.__seg_ |= __b2 >> __result.__ctz_;
- __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
- __result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
- __n -= __dn;
- if (__n > 0) {
- __m = ~__storage_type(0) >> (__bits_per_word - __n);
- __b2 = *__result.__seg_ & __m;
- *__result.__seg_ &= ~__m;
- *__result.__seg_ |= __b1 >> __dn;
- *__first.__seg_ |= __b2 << __dn;
- __result.__ctz_ = static_cast<unsigned>(__n);
- }
- }
- }
- return __result;
-}
-
-template <class _Cl, class _Cr>
-inline _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cr, false> swap_ranges(
- __bit_iterator<_Cl, false> __first1, __bit_iterator<_Cl, false> __last1, __bit_iterator<_Cr, false> __first2) {
- if (__first1.__ctz_ == __first2.__ctz_)
- return std::__swap_ranges_aligned(__first1, __last1, __first2);
- return std::__swap_ranges_unaligned(__first1, __last1, __first2);
-}
-
// rotate
template <class _Cp>
@@ -1001,14 +857,14 @@ private:
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false>
copy_backward(__bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result);
template <class _Cl, class _Cr>
- friend __bit_iterator<_Cr, false>
+ _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Cr, false>
__swap_ranges_aligned(__bit_iterator<_Cl, false>, __bit_iterator<_Cl, false>, __bit_iterator<_Cr, false>);
template <class _Cl, class _Cr>
- friend __bit_iterator<_Cr, false>
+ _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Cr, false>
__swap_ranges_unaligned(__bit_iterator<_Cl, false>, __bit_iterator<_Cl, false>, __bit_iterator<_Cr, false>);
- template <class _Cl, class _Cr>
- friend __bit_iterator<_Cr, false>
- swap_ranges(__bit_iterator<_Cl, false>, __bit_iterator<_Cl, false>, __bit_iterator<_Cr, false>);
+ template <class, class _Cl, class _Cr>
+ _LIBCPP_CONSTEXPR_SINCE_CXX20 friend pair<__bit_iterator<_Cl, false>, __bit_iterator<_Cr, false> >
+ __swap_ranges(__bit_iterator<_Cl, false>, __bit_iterator<_Cl, false>, __bit_iterator<_Cr, false>);
template <class _Dp>
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false>
rotate(__bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>);
diff --git a/libcxx/test/benchmarks/algorithms/swap_ranges.bench.cpp b/libcxx/test/benchmarks/algorithms/swap_ranges.bench.cpp
new file mode 100644
index 00000000000000..66629799ac5ee0
--- /dev/null
+++ b/libcxx/test/benchmarks/algorithms/swap_ranges.bench.cpp
@@ -0,0 +1,66 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+
+#include <algorithm>
+#include <benchmark/benchmark.h>
+#include <vector>
+
+static void bm_ranges_swap_ranges_vb_aligned(benchmark::State& state) {
+ auto n = state.range();
+ std::vector<bool> vec1(n, true);
+ std::vector<bool> vec2(n, false);
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(std::ranges::swap_ranges(vec1, vec2));
+ benchmark::DoNotOptimize(&vec1);
+ benchmark::DoNotOptimize(&vec2);
+ }
+}
+
+static void bm_ranges_swap_ranges_vb_unaligned(benchmark::State& state) {
+ auto n = state.range();
+ std::vector<bool> vec1(n, true);
+ std::vector<bool> vec2(n + 8, true);
+ auto beg1 = std::ranges::begin(vec1);
+ auto end1 = std::ranges::end(vec1);
+ auto beg2 = std::ranges::begin(vec2) + 4;
+ auto end2 = std::ranges::end(vec2) - 4;
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(std::ranges::swap_ranges(beg1, end1, beg2, end2));
+ benchmark::DoNotOptimize(&vec1);
+ benchmark::DoNotOptimize(&vec2);
+ }
+}
+
+// Test std::ranges::swap_ranges for vector<bool>::iterator
+BENCHMARK(bm_ranges_swap_ranges_vb_aligned)->Range(8, 1 << 20);
+BENCHMARK(bm_ranges_swap_ranges_vb_unaligned)->Range(8, 1 << 20);
+
+static void bm_swap_ranges_vb(benchmark::State& state, bool aligned) {
+ auto n = state.range();
+ std::vector<bool> vec1(n, true);
+ std::vector<bool> vec2(aligned ? n : n + 8, true);
+ auto beg1 = vec1.begin();
+ auto end1 = vec1.end();
+ auto beg2 = aligned ? vec2.begin() : vec2.begin() + 4;
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(std::swap_ranges(beg1, end1, beg2));
+ benchmark::DoNotOptimize(&vec1);
+ benchmark::DoNotOptimize(&vec2);
+ }
+}
+
+static void bm_swap_ranges_vb_aligned(benchmark::State& state) { bm_swap_ranges_vb(state, true); }
+static void bm_swap_ranges_vb_unaligned(benchmark::State& state) { bm_swap_ranges_vb(state, false); }
+
+// Test std::swap_ranges for vector<bool>::iterator
+BENCHMARK(bm_swap_ranges_vb_aligned)->Range(8, 1 << 20);
+BENCHMARK(bm_swap_ranges_vb_unaligned)->Range(8, 1 << 20);
+
+BENCHMARK_MAIN();
diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.swap/iter_swap.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.swap/iter_swap.pass.cpp
index b3a9f5fc259ef7..0394a48a0bb9a2 100644
--- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.swap/iter_swap.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.swap/iter_swap.pass.cpp
@@ -19,25 +19,23 @@
#include "test_macros.h"
#if TEST_STD_VER > 17
-constexpr bool test_swap_constexpr()
-{
- int i = 1;
- int j = 2;
- std::iter_swap(&i, &j);
- return i == 2 && j == 1;
+constexpr bool test_swap_constexpr() {
+ int i = 1;
+ int j = 2;
+ std::iter_swap(&i, &j);
+ return i == 2 && j == 1;
}
#endif // TEST_STD_VER > 17
-int main(int, char**)
-{
- int i = 1;
- int j = 2;
- std::iter_swap(&i, &j);
- assert(i == 2);
- assert(j == 1);
+int main(int, char**) {
+ int i = 1;
+...
[truncated]
|
423af4f
to
61aaca6
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks!
libcxx/test/std/algorithms/alg.modifying.operations/alg.swap/iter_swap.pass.cpp
Outdated
Show resolved
Hide resolved
61aaca6
to
facac24
Compare
facac24
to
ca03497
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for fixing this! LGTM modulo green CI.
ca03497
to
da8cb0a
Compare
…m#121150) This PR optimizes the performance of `std::ranges::swap_ranges` for `vector<bool>::iterator`, addressing a subtask outlined in issue llvm#64038. The optimizations yield performance improvements of up to **611x** for aligned range swap and **78x** for unaligned range swap comparison. Additionally, comprehensive tests covering up to 4 storage words (256 bytes) with odd and even bit sizes are provided, which validate the proposed optimizations in this patch.
This PR optimizes the performance of
std::ranges::swap_ranges
forvector<bool>::iterator
, addressing a subtask outlined in issue #64038. The optimizations yield performance improvements of up to 611x for aligned range swap and 78x for unaligned range swap comparison. Additionally, comprehensive tests covering up to 4 storage words (256 bytes) with odd and even bit sizes are provided, which validate the proposed optimizations in this patch.