Skip to content

Commit 0aad396

Browse files
committed
Apply optimization for join_view segmented iterators
1 parent f314697 commit 0aad396

File tree

10 files changed

+259
-20
lines changed

10 files changed

+259
-20
lines changed

libcxx/docs/ReleaseNotes/21.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,9 @@ Improvements and New Features
6060

6161
- Updated formatting library to Unicode 16.0.0.
6262

63+
- The ``std::ranges::{for_each, for_each_n}`` algorithms have been optimized for segmented iterators, resulting in
64+
performance improvements of up to 21.2x for `deque` segmented input and 17.9x for `join_view` of `vector`s.
65+
6366
Deprecations and Removals
6467
-------------------------
6568

libcxx/include/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ set(files
2525
__algorithm/find_segment_if.h
2626
__algorithm/for_each.h
2727
__algorithm/for_each_n.h
28+
__algorithm/for_each_n_segment.h
2829
__algorithm/for_each_segment.h
2930
__algorithm/generate.h
3031
__algorithm/generate_n.h

libcxx/include/__algorithm/for_each.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,14 @@ __for_each(_InputIterator __first, _Sent __last, _Function& __f) {
3333
return std::move(__f);
3434
}
3535

36+
// __do_segment acts as a functor for processing individual segments within the __for_each_segment{, _n} algorithms.
3637
template <class _InputIterator, class _Function>
37-
struct _ForeachSegment {
38+
struct __do_segment {
3839
using _Traits _LIBCPP_NODEBUG = __segmented_iterator_traits<_InputIterator>;
3940

4041
_Function& __func_;
4142

42-
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 explicit _ForeachSegment(_Function& __func) : __func_(__func) {}
43+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 explicit __do_segment(_Function& __func) : __func_(__func) {}
4344

4445
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void
4546
operator()(typename _Traits::__local_iterator __lfirst, typename _Traits::__local_iterator __llast) {
@@ -52,7 +53,7 @@ template <class _SegmentedIterator,
5253
__enable_if_t<__is_segmented_iterator<_SegmentedIterator>::value, int> = 0>
5354
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Function
5455
__for_each(_SegmentedIterator __first, _SegmentedIterator __last, _Function& __func) {
55-
std::__for_each_segment(__first, __last, _ForeachSegment<_SegmentedIterator, _Function>(__func));
56+
std::__for_each_segment(__first, __last, std::__do_segment<_SegmentedIterator, _Function>(__func));
5657
return std::move(__func);
5758
}
5859

libcxx/include/__algorithm/for_each_n.h

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#define _LIBCPP___ALGORITHM_FOR_EACH_N_H
1212

1313
#include <__algorithm/for_each.h>
14+
#include <__algorithm/for_each_n_segment.h>
1415
#include <__config>
1516
#include <__iterator/iterator_traits.h>
1617
#include <__iterator/next.h>
@@ -44,19 +45,15 @@ for_each_n(_InputIterator __first, _Size __orig_n, _Function __f) {
4445
return __first;
4546
}
4647

47-
template <class _InputIterator,
48+
template <class _SegmentedIterator,
4849
class _Size,
4950
class _Function,
50-
__enable_if_t<__is_segmented_iterator<_InputIterator>::value &&
51-
__has_forward_iterator_category<_InputIterator>::value,
51+
__enable_if_t<__is_segmented_iterator<_SegmentedIterator>::value &&
52+
__has_forward_iterator_category<_SegmentedIterator>::value,
5253
int> = 0>
53-
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
54-
for_each_n(_InputIterator __first, _Size __orig_n, _Function __f) {
55-
typedef decltype(std::__convert_to_integral(__orig_n)) _IntegralSize;
56-
_IntegralSize __n = __orig_n;
57-
_InputIterator __last = std::next(__first, __n);
58-
std::__for_each(__first, __last, __f);
59-
return __last;
54+
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _SegmentedIterator
55+
for_each_n(_SegmentedIterator __first, _Size __orig_n, _Function __f) {
56+
return std::__for_each_n_segment(__first, __orig_n, std::__do_segment<_SegmentedIterator, _Function>(__f));
6057
}
6158

6259
#endif
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef _LIBCPP___ALGORITHM_FOR_EACH_N_SEGMENT_H
10+
#define _LIBCPP___ALGORITHM_FOR_EACH_N_SEGMENT_H
11+
12+
#include <__config>
13+
#include <__iterator/distance.h>
14+
#include <__iterator/next.h>
15+
#include <__iterator/segmented_iterator.h>
16+
#include <__utility/convert_to_integral.h>
17+
18+
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
19+
# pragma GCC system_header
20+
#endif
21+
22+
_LIBCPP_BEGIN_NAMESPACE_STD
23+
24+
// __for_each_n_segment is a utility function for optimizing iterating over segmented iterators linearly.
25+
// __first and __orig_n are represent the begining and size of a segmented range. __func is expected to
26+
// take a range of local iterators. Anything that is returned from __func is ignored.
27+
28+
template <class _SegmentedIterator, class _Size, class _Functor>
29+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _SegmentedIterator
30+
__for_each_n_segment(_SegmentedIterator __first, _Size __orig_n, _Functor __func) {
31+
if (__orig_n == 0)
32+
return __first;
33+
34+
using _Traits = __segmented_iterator_traits<_SegmentedIterator>;
35+
typedef decltype(std::__convert_to_integral(__orig_n)) _IntegralSize;
36+
_IntegralSize __n = __orig_n;
37+
auto __seg = _Traits::__segment(__first);
38+
auto __sfirst = _Traits::__begin(__seg);
39+
auto __slast = _Traits::__end(__seg);
40+
auto __lfirst = _Traits::__local(__first);
41+
auto __seg_size = static_cast<_IntegralSize>(std::distance(__lfirst, __slast));
42+
43+
// We have only one single segment, which might not start or end at the boundaries of the segment
44+
if (__n <= __seg_size) {
45+
auto __llast = std::next(__lfirst, __n);
46+
__func(__lfirst, __llast);
47+
return _Traits::__compose(__seg, __llast);
48+
}
49+
50+
// We have more than one segment. Iterate over the first segment which might not start at the beginning
51+
__func(__lfirst, std::next(__lfirst, __seg_size));
52+
++__seg;
53+
__n -= __seg_size;
54+
55+
// Iterate over the 2nd to last segments which are guaranteed to start at the beginning of each segment
56+
while (true) {
57+
__sfirst = _Traits::__begin(__seg);
58+
__slast = _Traits::__end(__seg);
59+
__seg_size = std::distance(__sfirst, __slast);
60+
61+
// We are in the last segment
62+
if (__n <= __seg_size) {
63+
auto __llast = std::next(__sfirst, __n);
64+
__func(__sfirst, __llast);
65+
return _Traits::__compose(__seg, __llast);
66+
}
67+
68+
// We are in middle segments that are completely in the range
69+
__func(__sfirst, __slast);
70+
++__seg;
71+
__n -= __seg_size;
72+
}
73+
}
74+
75+
_LIBCPP_END_NAMESPACE_STD
76+
77+
#endif // _LIBCPP___ALGORITHM_FOR_EACH_N_SEGMENT_H

libcxx/include/__algorithm/ranges_for_each_n.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
#ifndef _LIBCPP___ALGORITHM_RANGES_FOR_EACH_N_H
1010
#define _LIBCPP___ALGORITHM_RANGES_FOR_EACH_N_H
1111

12-
#include <__algorithm/for_each.h>
12+
#include <__algorithm/for_each_n.h>
1313
#include <__algorithm/in_fun_result.h>
1414
#include <__config>
1515
#include <__functional/identity.h>
@@ -43,9 +43,8 @@ struct __for_each_n {
4343
_LIBCPP_HIDE_FROM_ABI constexpr for_each_n_result<_Iter, _Func>
4444
operator()(_Iter __first, iter_difference_t<_Iter> __count, _Func __func, _Proj __proj = {}) const {
4545
if constexpr (forward_iterator<_Iter>) {
46-
auto __last = std::ranges::next(__first, __count);
4746
auto __f = [&](auto&& __val) { std::invoke(__func, std::invoke(__proj, __val)); };
48-
std::__for_each(__first, __last, __f);
47+
auto __last = std::for_each_n(__first, __count, __f);
4948
return {std::move(__last), std::move(__func)};
5049
} else {
5150
while (__count-- > 0) {

libcxx/include/module.modulemap

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,7 @@ module std [system] {
436436
module find_segment_if { header "__algorithm/find_segment_if.h" }
437437
module find { header "__algorithm/find.h" }
438438
module for_each_n { header "__algorithm/for_each_n.h" }
439+
module for_each_n_segment { header "__algorithm/for_each_n_segment.h" }
439440
module for_each_segment { header "__algorithm/for_each_segment.h" }
440441
module for_each { header "__algorithm/for_each.h" }
441442
module generate_n { header "__algorithm/generate_n.h" }

libcxx/test/benchmarks/algorithms/nonmodifying/for_each.bench.cpp

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ int main(int argc, char** argv) {
2323
// {std,ranges}::for_each
2424
{
2525
auto bm = []<class Container>(std::string name, auto for_each) {
26+
using ElemType = typename Container::value_type;
2627
benchmark::RegisterBenchmark(
2728
name,
2829
[for_each](auto& st) {
@@ -33,16 +34,34 @@ int main(int argc, char** argv) {
3334

3435
for ([[maybe_unused]] auto _ : st) {
3536
benchmark::DoNotOptimize(c);
36-
auto result = for_each(first, last, [](int& x) { x = std::clamp(x, 10, 100); });
37+
auto result = for_each(first, last, [](ElemType& x) { x = std::clamp<ElemType>(x, 10, 100); });
3738
benchmark::DoNotOptimize(result);
3839
}
3940
})
4041
->Arg(8)
4142
->Arg(32)
4243
->Arg(50) // non power-of-two
44+
->Arg(1024)
45+
->Arg(4096)
4346
->Arg(8192)
44-
->Arg(1 << 20);
47+
->Arg(1 << 14)
48+
->Arg(1 << 16)
49+
->Arg(1 << 18);
4550
};
51+
bm.operator()<std::vector<char>>("std::for_each(vector<char>)", std_for_each);
52+
bm.operator()<std::deque<char>>("std::for_each(deque<char>)", std_for_each);
53+
bm.operator()<std::list<char>>("std::for_each(list<char>)", std_for_each);
54+
bm.operator()<std::vector<char>>("rng::for_each(vector<char>)", std::ranges::for_each);
55+
bm.operator()<std::deque<char>>("rng::for_each(deque<char>)", std::ranges::for_each);
56+
bm.operator()<std::list<char>>("rng::for_each(list<char>)", std::ranges::for_each);
57+
58+
bm.operator()<std::vector<short>>("std::for_each(vector<short>)", std_for_each);
59+
bm.operator()<std::deque<short>>("std::for_each(deque<short>)", std_for_each);
60+
bm.operator()<std::list<short>>("std::for_each(list<short>)", std_for_each);
61+
bm.operator()<std::vector<short>>("rng::for_each(vector<short>)", std::ranges::for_each);
62+
bm.operator()<std::deque<short>>("rng::for_each(deque<short>)", std::ranges::for_each);
63+
bm.operator()<std::list<short>>("rng::for_each(list<short>)", std::ranges::for_each);
64+
4665
bm.operator()<std::vector<int>>("std::for_each(vector<int>)", std_for_each);
4766
bm.operator()<std::deque<int>>("std::for_each(deque<int>)", std_for_each);
4867
bm.operator()<std::list<int>>("std::for_each(list<int>)", std_for_each);
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
// UNSUPPORTED: c++03, c++11, c++14, c++17
10+
11+
#include <algorithm>
12+
#include <cstddef>
13+
#include <deque>
14+
#include <list>
15+
#include <ranges>
16+
#include <string>
17+
#include <vector>
18+
19+
#include <benchmark/benchmark.h>
20+
21+
int main(int argc, char** argv) {
22+
auto std_for_each = [](auto first, auto last, auto f) { return std::for_each(first, last, f); };
23+
auto std_for_each_n = [](auto first, auto n, auto f) { return std::for_each_n(first, n, f); };
24+
25+
// {std,ranges}::for_each
26+
{
27+
auto bm = []<class Container>(std::string name, auto for_each) {
28+
using C1 = typename Container::value_type;
29+
using ElemType = typename C1::value_type;
30+
31+
benchmark::RegisterBenchmark(
32+
name,
33+
[for_each](auto& st) {
34+
std::size_t const size = st.range(0);
35+
std::size_t const seg_size = 256;
36+
std::size_t const segments = (size + seg_size - 1) / seg_size;
37+
Container c(segments);
38+
for (std::size_t i = 0, n = size; i < segments; ++i, n -= seg_size) {
39+
c[i].resize(std::min(seg_size, n), ElemType(1));
40+
}
41+
42+
auto view = c | std::views::join;
43+
auto first = view.begin();
44+
auto last = view.end();
45+
46+
for ([[maybe_unused]] auto _ : st) {
47+
benchmark::DoNotOptimize(c);
48+
auto result = for_each(first, last, [](ElemType& x) { x = std::clamp<ElemType>(x, 10, 100); });
49+
benchmark::DoNotOptimize(result);
50+
}
51+
})
52+
->Arg(8)
53+
->Arg(32)
54+
->Arg(50) // non power-of-two
55+
->Arg(1024)
56+
->Arg(4096)
57+
->Arg(8192)
58+
->Arg(1 << 14)
59+
->Arg(1 << 16)
60+
->Arg(1 << 18);
61+
};
62+
bm.operator()<std::vector<std::vector<char>>>("std::for_each(join_view(vector<vector<char>>))", std_for_each);
63+
bm.operator()<std::vector<std::vector<short>>>("std::for_each(join_view(vector<vector<short>>))", std_for_each);
64+
bm.operator()<std::vector<std::vector<int>>>("std::for_each(join_view(vector<vector<int>>))", std_for_each);
65+
bm.operator()<std::vector<std::vector<char>>>(
66+
"rng::for_each(join_view(vector<vector<char>>)", std::ranges::for_each);
67+
bm.operator()<std::vector<std::vector<short>>>(
68+
"rng::for_each(join_view(vector<vector<short>>)", std::ranges::for_each);
69+
bm.operator()<std::vector<std::vector<int>>>("rng::for_each(join_view(vector<vector<int>>)", std::ranges::for_each);
70+
}
71+
72+
// {std,ranges}::for_each_n
73+
{
74+
auto bm = []<class Container>(std::string name, auto for_each_n) {
75+
using C1 = typename Container::value_type;
76+
using ElemType = typename C1::value_type;
77+
benchmark::RegisterBenchmark(
78+
name,
79+
[for_each_n](auto& st) {
80+
std::size_t const size = st.range(0);
81+
std::size_t const seg_size = 256;
82+
std::size_t const segments = (size + seg_size - 1) / seg_size;
83+
Container c(segments);
84+
for (std::size_t i = 0, n = size; i < segments; ++i, n -= seg_size) {
85+
c[i].resize(std::min(seg_size, n), ElemType(1));
86+
}
87+
88+
auto view = c | std::views::join;
89+
auto first = view.begin();
90+
91+
for ([[maybe_unused]] auto _ : st) {
92+
benchmark::DoNotOptimize(c);
93+
auto result = for_each_n(first, size, [](ElemType& x) { x = std::clamp<ElemType>(x, 10, 100); });
94+
benchmark::DoNotOptimize(result);
95+
}
96+
})
97+
->Arg(8)
98+
->Arg(32)
99+
->Arg(50) // non power-of-two
100+
->Arg(1024)
101+
->Arg(4096)
102+
->Arg(8192)
103+
->Arg(1 << 14)
104+
->Arg(1 << 16)
105+
->Arg(1 << 18);
106+
};
107+
bm.operator()<std::vector<std::vector<char>>>("std::for_each_n(join_view(vector<vector<char>>))", std_for_each_n);
108+
bm.operator()<std::vector<std::vector<short>>>("std::for_each_n(join_view(vector<vector<short>>))", std_for_each_n);
109+
bm.operator()<std::vector<std::vector<int>>>("std::for_each_n(join_view(vector<vector<int>>))", std_for_each_n);
110+
bm.operator()<std::vector<std::vector<char>>>(
111+
"rng::for_each_n(join_view(vector<vector<char>>)", std::ranges::for_each_n);
112+
bm.operator()<std::vector<std::vector<short>>>(
113+
"rng::for_each_n(join_view(vector<vector<short>>)", std::ranges::for_each_n);
114+
bm.operator()<std::vector<std::vector<int>>>(
115+
"rng::for_each_n(join_view(vector<vector<int>>)", std::ranges::for_each_n);
116+
}
117+
118+
benchmark::Initialize(&argc, argv);
119+
benchmark::RunSpecifiedBenchmarks();
120+
benchmark::Shutdown();
121+
return 0;
122+
}

libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ int main(int argc, char** argv) {
2323
// {std,ranges}::for_each_n
2424
{
2525
auto bm = []<class Container>(std::string name, auto for_each_n) {
26+
using ElemType = typename Container::value_type;
2627
benchmark::RegisterBenchmark(
2728
name,
2829
[for_each_n](auto& st) {
@@ -32,16 +33,34 @@ int main(int argc, char** argv) {
3233

3334
for ([[maybe_unused]] auto _ : st) {
3435
benchmark::DoNotOptimize(c);
35-
auto result = for_each_n(first, n, [](int& x) { x = std::clamp(x, 10, 100); });
36+
auto result = for_each_n(first, n, [](ElemType& x) { x = std::clamp<ElemType>(x, 10, 100); });
3637
benchmark::DoNotOptimize(result);
3738
}
3839
})
3940
->Arg(8)
4041
->Arg(32)
4142
->Arg(50) // non power-of-two
43+
->Arg(1024)
44+
->Arg(4096)
4245
->Arg(8192)
43-
->Arg(1 << 20);
46+
->Arg(1 << 14)
47+
->Arg(1 << 16)
48+
->Arg(1 << 18);
4449
};
50+
bm.operator()<std::vector<char>>("std::for_each_n(vector<char>)", std_for_each_n);
51+
bm.operator()<std::deque<char>>("std::for_each_n(deque<char>)", std_for_each_n);
52+
bm.operator()<std::list<char>>("std::for_each_n(list<char>)", std_for_each_n);
53+
bm.operator()<std::vector<char>>("rng::for_each_n(vector<char>)", std::ranges::for_each_n);
54+
bm.operator()<std::deque<char>>("rng::for_each_n(deque<char>)", std::ranges::for_each_n);
55+
bm.operator()<std::list<char>>("rng::for_each_n(list<char>)", std::ranges::for_each_n);
56+
57+
bm.operator()<std::vector<short>>("std::for_each_n(vector<short>)", std_for_each_n);
58+
bm.operator()<std::deque<short>>("std::for_each_n(deque<short>)", std_for_each_n);
59+
bm.operator()<std::list<short>>("std::for_each_n(list<short>)", std_for_each_n);
60+
bm.operator()<std::vector<short>>("rng::for_each_n(vector<short>)", std::ranges::for_each_n);
61+
bm.operator()<std::deque<short>>("rng::for_each_n(deque<short>)", std::ranges::for_each_n);
62+
bm.operator()<std::list<short>>("rng::for_each_n(list<short>)", std::ranges::for_each_n);
63+
4564
bm.operator()<std::vector<int>>("std::for_each_n(vector<int>)", std_for_each_n);
4665
bm.operator()<std::deque<int>>("std::for_each_n(deque<int>)", std_for_each_n);
4766
bm.operator()<std::list<int>>("std::for_each_n(list<int>)", std_for_each_n);

0 commit comments

Comments
 (0)