Skip to content

Commit c81bfc6

Browse files
committed
[libc++] Optimize for_each for segmented iterators
``` --------------------------------------------------- Benchmark old new --------------------------------------------------- bm_for_each/1 3.00 ns 2.98 ns bm_for_each/2 4.53 ns 4.57 ns bm_for_each/3 5.82 ns 5.82 ns bm_for_each/4 6.94 ns 6.91 ns bm_for_each/5 7.55 ns 7.75 ns bm_for_each/6 7.06 ns 7.45 ns bm_for_each/7 6.69 ns 7.14 ns bm_for_each/8 6.86 ns 4.06 ns bm_for_each/16 11.5 ns 5.73 ns bm_for_each/64 43.7 ns 4.06 ns bm_for_each/512 356 ns 7.98 ns bm_for_each/4096 2787 ns 53.6 ns bm_for_each/32768 20836 ns 438 ns bm_for_each/262144 195362 ns 4945 ns bm_for_each/1048576 685482 ns 19822 ns ``` Reviewed By: ldionne, Mordante, #libc Spies: bgraur, sberg, arichardson, libcxx-commits Differential Revision: https://reviews.llvm.org/D151274
1 parent 6726c99 commit c81bfc6

File tree

7 files changed

+155
-61
lines changed

7 files changed

+155
-61
lines changed

libcxx/benchmarks/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,7 @@ set(BENCHMARK_TESTS
176176
algorithms/count.bench.cpp
177177
algorithms/equal.bench.cpp
178178
algorithms/find.bench.cpp
179+
algorithms/for_each.bench.cpp
179180
algorithms/lower_bound.bench.cpp
180181
algorithms/make_heap.bench.cpp
181182
algorithms/make_heap_then_sort_heap.bench.cpp
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <algorithm>
10+
#include <benchmark/benchmark.h>
11+
#include <deque>
12+
13+
static void bm_deque_for_each(benchmark::State& state) {
14+
std::deque<char> vec1(state.range(), '1');
15+
for (auto _ : state) {
16+
benchmark::DoNotOptimize(vec1);
17+
benchmark::DoNotOptimize(
18+
std::for_each(vec1.begin(), vec1.end(), [](char& v) { v = std::clamp(v, (char)10, (char)100); }));
19+
}
20+
}
21+
BENCHMARK(bm_deque_for_each)->DenseRange(1, 8)->Range(16, 1 << 20);
22+
23+
BENCHMARK_MAIN();

libcxx/docs/ReleaseNotes/18.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ Improvements and New Features
5656
- ``std::ranges::count`` is now optimized for ``vector<bool>::iterator``, which
5757
can lead up to 350x performance improvements.
5858

59+
- ``std::for_each`` has been optimized for segmented iterators like ``std::deque::iterator`` in C++23 and
60+
later, which can lead up to 40x performance improvements.
61+
5962
- The library now provides several hardening modes under which common cases of library undefined behavior will be turned
6063
into a reliable program termination. The ``fast`` hardening mode enables a set of security-critical checks with
6164
minimal runtime overhead; the ``extensive`` hardening mode additionally enables relatively cheap checks that catch

libcxx/include/__algorithm/for_each.h

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,23 +10,48 @@
1010
#ifndef _LIBCPP___ALGORITHM_FOR_EACH_H
1111
#define _LIBCPP___ALGORITHM_FOR_EACH_H
1212

13+
#include <__algorithm/for_each_segment.h>
1314
#include <__config>
15+
#include <__iterator/segmented_iterator.h>
16+
#include <__ranges/movable_box.h>
17+
#include <__type_traits/enable_if.h>
18+
#include <__utility/in_place.h>
19+
#include <__utility/move.h>
1420

1521
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
1622
# pragma GCC system_header
1723
#endif
1824

25+
_LIBCPP_PUSH_MACROS
26+
#include <__undef_macros>
27+
1928
_LIBCPP_BEGIN_NAMESPACE_STD
2029

2130
template <class _InputIterator, class _Function>
22-
inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX20 _Function for_each(_InputIterator __first,
23-
_InputIterator __last,
24-
_Function __f) {
31+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Function
32+
for_each(_InputIterator __first, _InputIterator __last, _Function __f) {
2533
for (; __first != __last; ++__first)
2634
__f(*__first);
2735
return __f;
2836
}
2937

38+
// __movable_box is available in C++20, but is actually a copyable-box, so optimization is only correct in C++23
39+
#if _LIBCPP_STD_VER >= 23
40+
template <class _SegmentedIterator, class _Function>
41+
requires __is_segmented_iterator<_SegmentedIterator>::value
42+
_LIBCPP_HIDE_FROM_ABI constexpr _Function
43+
for_each(_SegmentedIterator __first, _SegmentedIterator __last, _Function __func) {
44+
ranges::__movable_box<_Function> __wrapped_func(in_place, std::move(__func));
45+
std::__for_each_segment(__first, __last, [&](auto __lfirst, auto __llast) {
46+
__wrapped_func =
47+
ranges::__movable_box<_Function>(in_place, std::for_each(__lfirst, __llast, std::move(*__wrapped_func)));
48+
});
49+
return std::move(*__wrapped_func);
50+
}
51+
#endif // _LIBCPP_STD_VER >= 23
52+
3053
_LIBCPP_END_NAMESPACE_STD
3154

55+
_LIBCPP_POP_MACROS
56+
3257
#endif // _LIBCPP___ALGORITHM_FOR_EACH_H
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
// <algorithm>
10+
11+
// template<InputIterator Iter, Callable<auto, Iter::reference> Function>
12+
// constexpr Function // constexpr since C++20
13+
// for_each(Iter first, Iter last, Function f);
14+
15+
#include <algorithm>
16+
#include <cassert>
17+
#include <deque>
18+
#if __has_include(<ranges>)
19+
# include <ranges>
20+
#endif
21+
#include <vector>
22+
23+
#include "test_macros.h"
24+
#include "test_iterators.h"
25+
26+
struct for_each_test {
27+
TEST_CONSTEXPR for_each_test(int c) : count(c) {}
28+
29+
// for_each functors only have to be move constructible
30+
for_each_test(const for_each_test&) = delete;
31+
for_each_test(for_each_test&&) = default;
32+
for_each_test& operator=(const for_each_test&) = delete;
33+
for_each_test& operator=(for_each_test&&) = delete;
34+
35+
int count;
36+
TEST_CONSTEXPR_CXX14 void operator()(int& i) {
37+
++i;
38+
++count;
39+
}
40+
};
41+
42+
struct Test {
43+
template <class Iter>
44+
TEST_CONSTEXPR_CXX20 void operator()() {
45+
int sizes[] = {0, 1, 6};
46+
for (const int size : sizes) {
47+
int ia[] = {0, 1, 2, 3, 4, 5};
48+
for_each_test f = std::for_each(Iter(ia), Iter(ia + size), for_each_test(0));
49+
assert(f.count == size);
50+
for (int i = 0; i < size; ++i)
51+
assert(ia[i] == static_cast<int>(i + 1));
52+
}
53+
}
54+
};
55+
56+
TEST_CONSTEXPR_CXX20 bool test() {
57+
types::for_each(types::cpp17_input_iterator_list<int*>(), Test());
58+
59+
// TODO: Remove the `_LIBCPP_ENABLE_EXPERIMENTAL` check once we have the FTM guarded or views::join isn't
60+
// experimental anymore
61+
#if TEST_STD_VER >= 20 && defined(_LIBCPP_ENABLE_EXPERIMENTAL)
62+
{ // Make sure that the segmented iterator optimization works during constant evaluation
63+
std::vector<std::vector<int>> vecs = {{1, 2, 3}, {4, 5, 6}, {7, 8, 9}};
64+
auto v = std::views::join(vecs);
65+
std::for_each(v.begin(), v.end(), [i = 0](int& a) mutable { assert(a == ++i); });
66+
}
67+
#endif
68+
69+
return true;
70+
}
71+
72+
struct deque_test {
73+
std::deque<int>* d_;
74+
int* i_;
75+
76+
deque_test(std::deque<int>& d, int& i) : d_(&d), i_(&i) {}
77+
78+
void operator()(int& v) {
79+
assert(&(*d_)[*i_] == &v);
80+
++*i_;
81+
}
82+
};
83+
84+
int main(int, char**) {
85+
test();
86+
#if TEST_STD_VER >= 20
87+
static_assert(test());
88+
#endif
89+
90+
// check that segmented iterators work properly
91+
int sizes[] = {0, 1, 2, 1023, 1024, 1025, 2047, 2048, 2049};
92+
for (const int size : sizes) {
93+
std::deque<int> d(size);
94+
int index = 0;
95+
96+
std::for_each(d.begin(), d.end(), deque_test(d, index));
97+
}
98+
99+
return 0;
100+
}

libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/test.pass.cpp

Lines changed: 0 additions & 56 deletions
This file was deleted.

libcxx/utils/data/ignore_format.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ libcxx/include/__algorithm/fill.h
1010
libcxx/include/__algorithm/fill_n.h
1111
libcxx/include/__algorithm/find_end.h
1212
libcxx/include/__algorithm/find_first_of.h
13-
libcxx/include/__algorithm/for_each.h
1413
libcxx/include/__algorithm/for_each_n.h
1514
libcxx/include/__algorithm/generate.h
1615
libcxx/include/__algorithm/generate_n.h
@@ -1139,7 +1138,6 @@ libcxx/test/std/algorithms/alg.nonmodifying/alg.find/ranges.find.pass.cpp
11391138
libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/for_each_n.pass.cpp
11401139
libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each_n.pass.cpp
11411140
libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each.pass.cpp
1142-
libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/test.pass.cpp
11431141
libcxx/test/std/algorithms/alg.nonmodifying/alg.is_permutation/is_permutation.pass.cpp
11441142
libcxx/test/std/algorithms/alg.nonmodifying/alg.is_permutation/is_permutation_pred.pass.cpp
11451143
libcxx/test/std/algorithms/alg.nonmodifying/alg.is_permutation/ranges.is_permutation.pass.cpp

0 commit comments

Comments
 (0)