Skip to content

Commit 5e5882b

Browse files
committed
Apply optimization for join_view segmented iterators
1 parent 1ad983c commit 5e5882b

File tree

5 files changed

+165
-5
lines changed

5 files changed

+165
-5
lines changed

libcxx/docs/ReleaseNotes/21.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,11 +64,17 @@ Improvements and New Features
6464

6565
- The ``num_put::do_put`` integral overloads have been optimized, resulting in a performance improvement of up to 2.4x.
6666

67+
<<<<<<< HEAD
6768
- The ``std::stable_sort`` algorithm uses radix sort for floating-point types now, which can improve the performance
6869
up to 10x, depending on type of sorted elements and the initial state of the sorted array.
6970

7071
- The segmented iterator optimization for ``std::for_each`` has been backported to C++11. Previously it was only available
7172
in C++23 and later.
73+
=======
74+
- The ``std::ranges::for_each`` and ``std::ranges::for_each_n`` algorithms have been optimized for segmented iterators,
75+
resulting in performance improvements of up to 21.2x for ``std::deque::iterator`` segmented inputs and 17.9x for
76+
``join_view`` of ``vector<vector<T>>``.
77+
>>>>>>> 50ac206d4a13 (Apply optimization for join_view segmented iterators)
7278

7379
- The ``std::for_each_n`` algorithm has been optimized for segmented iterators, resulting in a performance improvement of
7480
up to 17.7x for ``std::deque<short>`` iterators, and up to 13.9x for ``std::join_view<vector<vector<short>>>`` iterators.

libcxx/include/__algorithm/ranges_for_each_n.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
#ifndef _LIBCPP___ALGORITHM_RANGES_FOR_EACH_N_H
1010
#define _LIBCPP___ALGORITHM_RANGES_FOR_EACH_N_H
1111

12-
#include <__algorithm/for_each.h>
12+
#include <__algorithm/for_each_n.h>
1313
#include <__algorithm/in_fun_result.h>
1414
#include <__config>
1515
#include <__functional/identity.h>
@@ -43,9 +43,8 @@ struct __for_each_n {
4343
_LIBCPP_HIDE_FROM_ABI constexpr for_each_n_result<_Iter, _Func>
4444
operator()(_Iter __first, iter_difference_t<_Iter> __count, _Func __func, _Proj __proj = {}) const {
4545
if constexpr (forward_iterator<_Iter>) {
46-
auto __last = std::ranges::next(__first, __count);
4746
auto __f = [&](auto&& __val) { std::invoke(__func, std::invoke(__proj, __val)); };
48-
std::__for_each(__first, __last, __f);
47+
auto __last = std::for_each_n(__first, __count, __f);
4948
return {std::move(__last), std::move(__func)};
5049
} else {
5150
while (__count-- > 0) {

libcxx/test/benchmarks/algorithms/nonmodifying/for_each.bench.cpp

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ int main(int argc, char** argv) {
2323
// {std,ranges}::for_each
2424
{
2525
auto bm = []<class Container>(std::string name, auto for_each) {
26+
using ElemType = typename Container::value_type;
2627
benchmark::RegisterBenchmark(
2728
name,
2829
[for_each](auto& st) {
@@ -33,16 +34,34 @@ int main(int argc, char** argv) {
3334

3435
for ([[maybe_unused]] auto _ : st) {
3536
benchmark::DoNotOptimize(c);
36-
auto result = for_each(first, last, [](int& x) { x = std::clamp(x, 10, 100); });
37+
auto result = for_each(first, last, [](ElemType& x) { x = std::clamp<ElemType>(x, 10, 100); });
3738
benchmark::DoNotOptimize(result);
3839
}
3940
})
4041
->Arg(8)
4142
->Arg(32)
4243
->Arg(50) // non power-of-two
44+
->Arg(1024)
45+
->Arg(4096)
4346
->Arg(8192)
44-
->Arg(1 << 20);
47+
->Arg(1 << 14)
48+
->Arg(1 << 16)
49+
->Arg(1 << 18);
4550
};
51+
bm.operator()<std::vector<char>>("std::for_each(vector<char>)", std_for_each);
52+
bm.operator()<std::deque<char>>("std::for_each(deque<char>)", std_for_each);
53+
bm.operator()<std::list<char>>("std::for_each(list<char>)", std_for_each);
54+
bm.operator()<std::vector<char>>("rng::for_each(vector<char>)", std::ranges::for_each);
55+
bm.operator()<std::deque<char>>("rng::for_each(deque<char>)", std::ranges::for_each);
56+
bm.operator()<std::list<char>>("rng::for_each(list<char>)", std::ranges::for_each);
57+
58+
bm.operator()<std::vector<short>>("std::for_each(vector<short>)", std_for_each);
59+
bm.operator()<std::deque<short>>("std::for_each(deque<short>)", std_for_each);
60+
bm.operator()<std::list<short>>("std::for_each(list<short>)", std_for_each);
61+
bm.operator()<std::vector<short>>("rng::for_each(vector<short>)", std::ranges::for_each);
62+
bm.operator()<std::deque<short>>("rng::for_each(deque<short>)", std::ranges::for_each);
63+
bm.operator()<std::list<short>>("rng::for_each(list<short>)", std::ranges::for_each);
64+
4665
bm.operator()<std::vector<int>>("std::for_each(vector<int>)", std_for_each);
4766
bm.operator()<std::deque<int>>("std::for_each(deque<int>)", std_for_each);
4867
bm.operator()<std::list<int>>("std::for_each(list<int>)", std_for_each);
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
// UNSUPPORTED: c++03, c++11, c++14, c++17
10+
11+
#include <algorithm>
12+
#include <cstddef>
13+
#include <deque>
14+
#include <list>
15+
#include <ranges>
16+
#include <string>
17+
#include <vector>
18+
19+
#include <benchmark/benchmark.h>
20+
21+
int main(int argc, char** argv) {
22+
auto std_for_each = [](auto first, auto last, auto f) { return std::for_each(first, last, f); };
23+
auto std_for_each_n = [](auto first, auto n, auto f) { return std::for_each_n(first, n, f); };
24+
25+
// {std,ranges}::for_each
26+
{
27+
auto bm = []<class Container>(std::string name, auto for_each) {
28+
using C1 = typename Container::value_type;
29+
using ElemType = typename C1::value_type;
30+
31+
benchmark::RegisterBenchmark(
32+
name,
33+
[for_each](auto& st) {
34+
std::size_t const size = st.range(0);
35+
std::size_t const seg_size = 256;
36+
std::size_t const segments = (size + seg_size - 1) / seg_size;
37+
Container c(segments);
38+
for (std::size_t i = 0, n = size; i < segments; ++i, n -= seg_size) {
39+
c[i].resize(std::min(seg_size, n), ElemType(1));
40+
}
41+
42+
auto view = c | std::views::join;
43+
auto first = view.begin();
44+
auto last = view.end();
45+
46+
for ([[maybe_unused]] auto _ : st) {
47+
benchmark::DoNotOptimize(c);
48+
auto result = for_each(first, last, [](ElemType& x) { x = std::clamp<ElemType>(x, 10, 100); });
49+
benchmark::DoNotOptimize(result);
50+
}
51+
})
52+
->Arg(8)
53+
->Arg(32)
54+
->Arg(50) // non power-of-two
55+
->Arg(1024)
56+
->Arg(4096)
57+
->Arg(8192)
58+
->Arg(1 << 14)
59+
->Arg(1 << 16)
60+
->Arg(1 << 18);
61+
};
62+
bm.operator()<std::vector<std::vector<char>>>("std::for_each(join_view(vector<vector<char>>))", std_for_each);
63+
bm.operator()<std::vector<std::vector<short>>>("std::for_each(join_view(vector<vector<short>>))", std_for_each);
64+
bm.operator()<std::vector<std::vector<int>>>("std::for_each(join_view(vector<vector<int>>))", std_for_each);
65+
bm.operator()<std::vector<std::vector<char>>>(
66+
"rng::for_each(join_view(vector<vector<char>>)", std::ranges::for_each);
67+
bm.operator()<std::vector<std::vector<short>>>(
68+
"rng::for_each(join_view(vector<vector<short>>)", std::ranges::for_each);
69+
bm.operator()<std::vector<std::vector<int>>>("rng::for_each(join_view(vector<vector<int>>)", std::ranges::for_each);
70+
}
71+
72+
// {std,ranges}::for_each_n
73+
{
74+
auto bm = []<class Container>(std::string name, auto for_each_n) {
75+
using C1 = typename Container::value_type;
76+
using ElemType = typename C1::value_type;
77+
benchmark::RegisterBenchmark(
78+
name,
79+
[for_each_n](auto& st) {
80+
std::size_t const size = st.range(0);
81+
std::size_t const seg_size = 256;
82+
std::size_t const segments = (size + seg_size - 1) / seg_size;
83+
Container c(segments);
84+
for (std::size_t i = 0, n = size; i < segments; ++i, n -= seg_size) {
85+
c[i].resize(std::min(seg_size, n), ElemType(1));
86+
}
87+
88+
auto view = c | std::views::join;
89+
auto first = view.begin();
90+
91+
for ([[maybe_unused]] auto _ : st) {
92+
benchmark::DoNotOptimize(c);
93+
auto result = for_each_n(first, size, [](ElemType& x) { x = std::clamp<ElemType>(x, 10, 100); });
94+
benchmark::DoNotOptimize(result);
95+
}
96+
})
97+
->Arg(8)
98+
->Arg(32)
99+
->Arg(50) // non power-of-two
100+
->Arg(1024)
101+
->Arg(4096)
102+
->Arg(8192)
103+
->Arg(1 << 14)
104+
->Arg(1 << 16)
105+
->Arg(1 << 18);
106+
};
107+
bm.operator()<std::vector<std::vector<char>>>("std::for_each_n(join_view(vector<vector<char>>))", std_for_each_n);
108+
bm.operator()<std::vector<std::vector<short>>>("std::for_each_n(join_view(vector<vector<short>>))", std_for_each_n);
109+
bm.operator()<std::vector<std::vector<int>>>("std::for_each_n(join_view(vector<vector<int>>))", std_for_each_n);
110+
bm.operator()<std::vector<std::vector<char>>>(
111+
"rng::for_each_n(join_view(vector<vector<char>>)", std::ranges::for_each_n);
112+
bm.operator()<std::vector<std::vector<short>>>(
113+
"rng::for_each_n(join_view(vector<vector<short>>)", std::ranges::for_each_n);
114+
bm.operator()<std::vector<std::vector<int>>>(
115+
"rng::for_each_n(join_view(vector<vector<int>>)", std::ranges::for_each_n);
116+
}
117+
118+
benchmark::Initialize(&argc, argv);
119+
benchmark::RunSpecifiedBenchmarks();
120+
benchmark::Shutdown();
121+
return 0;
122+
}

libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,20 @@ int main(int argc, char** argv) {
4848
->Arg(1 << 16)
4949
->Arg(1 << 18);
5050
};
51+
bm.operator()<std::vector<char>>("std::for_each_n(vector<char>)", std_for_each_n);
52+
bm.operator()<std::deque<char>>("std::for_each_n(deque<char>)", std_for_each_n);
53+
bm.operator()<std::list<char>>("std::for_each_n(list<char>)", std_for_each_n);
54+
bm.operator()<std::vector<char>>("rng::for_each_n(vector<char>)", std::ranges::for_each_n);
55+
bm.operator()<std::deque<char>>("rng::for_each_n(deque<char>)", std::ranges::for_each_n);
56+
bm.operator()<std::list<char>>("rng::for_each_n(list<char>)", std::ranges::for_each_n);
57+
58+
bm.operator()<std::vector<short>>("std::for_each_n(vector<short>)", std_for_each_n);
59+
bm.operator()<std::deque<short>>("std::for_each_n(deque<short>)", std_for_each_n);
60+
bm.operator()<std::list<short>>("std::for_each_n(list<short>)", std_for_each_n);
61+
bm.operator()<std::vector<short>>("rng::for_each_n(vector<short>)", std::ranges::for_each_n);
62+
bm.operator()<std::deque<short>>("rng::for_each_n(deque<short>)", std::ranges::for_each_n);
63+
bm.operator()<std::list<short>>("rng::for_each_n(list<short>)", std::ranges::for_each_n);
64+
5165
bm.operator()<std::vector<int>>("std::for_each_n(vector<int>)", std_for_each_n);
5266
bm.operator()<std::deque<int>>("std::for_each_n(deque<int>)", std_for_each_n);
5367
bm.operator()<std::list<int>>("std::for_each_n(list<int>)", std_for_each_n);

0 commit comments

Comments
 (0)