Skip to content

Commit 5b6a846

Browse files
committed
Optimize std::for_each_n for segmented iterators
1 parent e9280a1 commit 5b6a846

File tree

8 files changed

+318
-48
lines changed

8 files changed

+318
-48
lines changed

libcxx/docs/ReleaseNotes/21.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@ Improvements and New Features
7070
- The segmented iterator optimization for ``std::for_each`` has been backported to C++11. Previously it was only available
7171
in C++23 and later.
7272

73+
- The ``std::for_each_n`` algorithm has been optimized for segmented iterators, resulting in a performance improvement of
74+
up to 17.7x for ``std::deque<short>`` iterators, and up to 13.9x for ``std::join_view<vector<vector<short>>>`` iterators.
75+
7376
Deprecations and Removals
7477
-------------------------
7578

libcxx/include/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ set(files
2525
__algorithm/find_segment_if.h
2626
__algorithm/for_each.h
2727
__algorithm/for_each_n.h
28+
__algorithm/for_each_n_segment.h
2829
__algorithm/for_each_segment.h
2930
__algorithm/generate.h
3031
__algorithm/generate_n.h

libcxx/include/__algorithm/for_each.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99

1010
#ifndef _LIBCPP___ALGORITHM_FOR_EACH_H
1111
#define _LIBCPP___ALGORITHM_FOR_EACH_H
12-
1312
#include <__algorithm/for_each_segment.h>
1413
#include <__config>
1514
#include <__iterator/segmented_iterator.h>

libcxx/include/__algorithm/for_each_n.h

Lines changed: 63 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,32 +10,88 @@
1010
#ifndef _LIBCPP___ALGORITHM_FOR_EACH_N_H
1111
#define _LIBCPP___ALGORITHM_FOR_EACH_N_H
1212

13+
#include <__algorithm/for_each.h>
14+
#include <__algorithm/for_each_n_segment.h>
1315
#include <__config>
16+
#include <__iterator/iterator_traits.h>
17+
#include <__iterator/segmented_iterator.h>
18+
#include <__type_traits/enable_if.h>
1419
#include <__utility/convert_to_integral.h>
20+
#include <__utility/move.h>
1521

1622
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
1723
# pragma GCC system_header
1824
#endif
1925

20-
_LIBCPP_BEGIN_NAMESPACE_STD
26+
_LIBCPP_PUSH_MACROS
27+
#include <__undef_macros>
2128

22-
#if _LIBCPP_STD_VER >= 17
29+
_LIBCPP_BEGIN_NAMESPACE_STD
2330

24-
template <class _InputIterator, class _Size, class _Function>
25-
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
26-
for_each_n(_InputIterator __first, _Size __orig_n, _Function __f) {
31+
template <class _InputIterator,
32+
class _Size,
33+
class _Func,
34+
__enable_if_t<!__has_random_access_iterator_category<_InputIterator>::value &&
35+
(!__is_segmented_iterator<_InputIterator>::value
36+
// || !__has_random_access_iterator_category<
37+
// typename __segmented_iterator_traits<_InputIterator>::__local_iterator>::value
38+
), // TODO: __segmented_iterator_traits<_InputIterator> results in template instantiation
39+
// during SFINAE, which is a hard error to be fixed. Once fixed, we should uncomment.
40+
int> = 0>
41+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
42+
__for_each_n(_InputIterator __first, _Size __orig_n, _Func& __f) {
2743
typedef decltype(std::__convert_to_integral(__orig_n)) _IntegralSize;
2844
_IntegralSize __n = __orig_n;
2945
while (__n > 0) {
3046
__f(*__first);
3147
++__first;
3248
--__n;
3349
}
34-
return __first;
50+
return std::move(__first);
3551
}
3652

37-
#endif
53+
template <class _RandIter,
54+
class _Size,
55+
class _Func,
56+
__enable_if_t<__has_random_access_iterator_category<_RandIter>::value, int> = 0>
57+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandIter
58+
__for_each_n(_RandIter __first, _Size __orig_n, _Func& __f) {
59+
typename std::iterator_traits<_RandIter>::difference_type __n = __orig_n;
60+
auto __last = __first + __n;
61+
std::__for_each(__first, __last, __f);
62+
return std::move(__last);
63+
}
64+
65+
#ifndef _LIBCPP_CXX03_LANG
66+
template <class _SegmentedIterator,
67+
class _Size,
68+
class _Func,
69+
__enable_if_t<!__has_random_access_iterator_category<_SegmentedIterator>::value &&
70+
__is_segmented_iterator<_SegmentedIterator>::value &&
71+
__has_random_access_iterator_category<
72+
typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator>::value,
73+
int> = 0>
74+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _SegmentedIterator
75+
__for_each_n(_SegmentedIterator __first, _Size __orig_n, _Func& __f) {
76+
using __local_iterator_t = typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator;
77+
return std::__for_each_n_segment(__first, __orig_n, [&](__local_iterator_t __lfirst, __local_iterator_t __llast) {
78+
std::__for_each(__lfirst, __llast, __f);
79+
});
80+
}
81+
#endif // !_LIBCPP_CXX03_LANG
82+
83+
#if _LIBCPP_STD_VER >= 17
84+
85+
template <class _InputIterator, class _Size, class _Function>
86+
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
87+
for_each_n(_InputIterator __first, _Size __orig_n, _Function __f) {
88+
return std::__for_each_n(__first, __orig_n, __f);
89+
}
90+
91+
#endif // _LIBCPP_STD_VER >= 17
3892

3993
_LIBCPP_END_NAMESPACE_STD
4094

95+
_LIBCPP_POP_MACROS
96+
4197
#endif // _LIBCPP___ALGORITHM_FOR_EACH_N_H
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef _LIBCPP___ALGORITHM_FOR_EACH_N_SEGMENT_H
10+
#define _LIBCPP___ALGORITHM_FOR_EACH_N_SEGMENT_H
11+
12+
#include <__config>
13+
#include <__iterator/iterator_traits.h>
14+
#include <__iterator/segmented_iterator.h>
15+
16+
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
17+
# pragma GCC system_header
18+
#endif
19+
20+
_LIBCPP_BEGIN_NAMESPACE_STD
21+
22+
// __for_each_n_segment optimizes linear iteration over segmented iterators. It processes a segmented
23+
// input range [__first, __first + __n) by applying the functor __func to each element within the segment.
24+
// The return value of __func is ignored, and the function returns an iterator pointing to one past the
25+
// last processed element in the input range.
26+
27+
template <class _SegmentedIterator, class _Size, class _Functor>
28+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _SegmentedIterator
29+
__for_each_n_segment(_SegmentedIterator __first, _Size __orig_n, _Functor __func) {
30+
static_assert(__is_segmented_iterator<_SegmentedIterator>::value &&
31+
__has_random_access_iterator_category<
32+
typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator>::value,
33+
"__for_each_n_segment only works with segmented iterators with random-access local iterators");
34+
if (__orig_n <= 0)
35+
return __first;
36+
37+
using _Traits = __segmented_iterator_traits<_SegmentedIterator>;
38+
using __local_iter_t = typename _Traits::__local_iterator;
39+
using __difference_t = typename std::iterator_traits<__local_iter_t>::difference_type;
40+
__difference_t __n = __orig_n;
41+
auto __seg = _Traits::__segment(__first);
42+
auto __local_first = _Traits::__local(__first);
43+
__local_iter_t __local_last;
44+
45+
while (__n > 0) {
46+
__local_last = _Traits::__end(__seg);
47+
auto __seg_size = __local_last - __local_first;
48+
if (__n <= __seg_size) {
49+
__local_last = __local_first + __n;
50+
__func(__local_first, __local_last);
51+
break;
52+
}
53+
__func(__local_first, __local_last);
54+
__n -= __seg_size;
55+
__local_first = _Traits::__begin(++__seg);
56+
}
57+
58+
return _Traits::__compose(__seg, __local_last);
59+
}
60+
61+
_LIBCPP_END_NAMESPACE_STD
62+
63+
#endif // _LIBCPP___ALGORITHM_FOR_EACH_N_SEGMENT_H

libcxx/include/module.modulemap.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -437,6 +437,7 @@ module std [system] {
437437
module find_segment_if { header "__algorithm/find_segment_if.h" }
438438
module find { header "__algorithm/find.h" }
439439
module for_each_n { header "__algorithm/for_each_n.h" }
440+
module for_each_n_segment { header "__algorithm/for_each_n_segment.h" }
440441
module for_each_segment { header "__algorithm/for_each_segment.h" }
441442
module for_each { header "__algorithm/for_each.h" }
442443
module generate_n { header "__algorithm/generate_n.h" }
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
// UNSUPPORTED: c++03, c++11, c++14, c++17
10+
11+
#include <algorithm>
12+
#include <cstddef>
13+
#include <deque>
14+
#include <list>
15+
#include <ranges>
16+
#include <string>
17+
#include <vector>
18+
19+
#include <benchmark/benchmark.h>
20+
21+
int main(int argc, char** argv) {
22+
auto std_for_each_n = [](auto first, auto n, auto f) { return std::for_each_n(first, n, f); };
23+
24+
// std::for_each_n
25+
{
26+
auto bm = []<class Container>(std::string name, auto for_each_n) {
27+
using ElemType = typename Container::value_type;
28+
benchmark::RegisterBenchmark(
29+
name,
30+
[for_each_n](auto& st) {
31+
std::size_t const n = st.range(0);
32+
Container c(n, 1);
33+
auto first = c.begin();
34+
35+
for ([[maybe_unused]] auto _ : st) {
36+
benchmark::DoNotOptimize(c);
37+
auto result = for_each_n(first, n, [](ElemType& x) { x = std::clamp<ElemType>(x, 10, 100); });
38+
benchmark::DoNotOptimize(result);
39+
}
40+
})
41+
->Arg(8)
42+
->Arg(32)
43+
->Arg(50) // non power-of-two
44+
->Arg(1024)
45+
->Arg(4096)
46+
->Arg(8192)
47+
->Arg(1 << 14)
48+
->Arg(1 << 16)
49+
->Arg(1 << 18);
50+
};
51+
bm.operator()<std::vector<int>>("std::for_each_n(vector<int>)", std_for_each_n);
52+
bm.operator()<std::deque<int>>("std::for_each_n(deque<int>)", std_for_each_n);
53+
bm.operator()<std::list<int>>("std::for_each_n(list<int>)", std_for_each_n);
54+
}
55+
56+
// std::for_each_n for join_view
57+
{
58+
auto bm = []<class Container>(std::string name, auto for_each_n) {
59+
using C1 = typename Container::value_type;
60+
using ElemType = typename C1::value_type;
61+
benchmark::RegisterBenchmark(
62+
name,
63+
[for_each_n](auto& st) {
64+
std::size_t const size = st.range(0);
65+
std::size_t const seg_size = 256;
66+
std::size_t const segments = (size + seg_size - 1) / seg_size;
67+
Container c(segments);
68+
for (std::size_t i = 0, n = size; i < segments; ++i, n -= seg_size) {
69+
c[i].resize(std::min(seg_size, n), ElemType(1));
70+
}
71+
72+
auto view = c | std::views::join;
73+
auto first = view.begin();
74+
75+
for ([[maybe_unused]] auto _ : st) {
76+
benchmark::DoNotOptimize(c);
77+
auto result = for_each_n(first, size, [](ElemType& x) { x = std::clamp<ElemType>(x, 10, 100); });
78+
benchmark::DoNotOptimize(result);
79+
}
80+
})
81+
->Arg(8)
82+
->Arg(32)
83+
->Arg(50) // non power-of-two
84+
->Arg(1024)
85+
->Arg(4096)
86+
->Arg(8192)
87+
->Arg(1 << 14)
88+
->Arg(1 << 16)
89+
->Arg(1 << 18);
90+
};
91+
bm.operator()<std::vector<std::vector<int>>>("std::for_each_n(join_view(vector<vector<int>>))", std_for_each_n);
92+
}
93+
94+
benchmark::Initialize(&argc, argv);
95+
benchmark::RunSpecifiedBenchmarks();
96+
benchmark::Shutdown();
97+
return 0;
98+
}

0 commit comments

Comments
 (0)