Skip to content

Commit f740741

Browse files
authored
[libc++] Optimize std::find for segmented iterators (#67224)
``` -------------------------------------------------------------------------- Benchmark old new -------------------------------------------------------------------------- bm_find<std::deque<char>>/1 6.06 ns 10.6 ns bm_find<std::deque<char>>/2 15.5 ns 10.6 ns bm_find<std::deque<char>>/3 19.0 ns 10.6 ns bm_find<std::deque<char>>/4 20.8 ns 10.6 ns bm_find<std::deque<char>>/5 22.0 ns 10.6 ns bm_find<std::deque<char>>/6 23.0 ns 10.5 ns bm_find<std::deque<char>>/7 24.8 ns 10.7 ns bm_find<std::deque<char>>/8 25.7 ns 10.6 ns bm_find<std::deque<char>>/16 28.3 ns 10.6 ns bm_find<std::deque<char>>/64 44.2 ns 27.0 ns bm_find<std::deque<char>>/512 133 ns 37.6 ns bm_find<std::deque<char>>/4096 867 ns 53.1 ns bm_find<std::deque<char>>/32768 6838 ns 160 ns bm_find<std::deque<char>>/262144 52897 ns 1495 ns bm_find<std::deque<char>>/1048576 215621 ns 6077 ns bm_find<std::deque<short>>/1 6.03 ns 6.28 ns bm_find<std::deque<short>>/2 15.8 ns 15.8 ns bm_find<std::deque<short>>/3 20.5 ns 20.3 ns bm_find<std::deque<short>>/4 21.0 ns 21.0 ns bm_find<std::deque<short>>/5 23.0 ns 22.1 ns bm_find<std::deque<short>>/6 22.6 ns 23.0 ns bm_find<std::deque<short>>/7 23.4 ns 23.7 ns bm_find<std::deque<short>>/8 24.4 ns 24.9 ns bm_find<std::deque<short>>/16 26.6 ns 27.2 ns bm_find<std::deque<short>>/64 43.2 ns 40.9 ns bm_find<std::deque<short>>/512 124 ns 90.7 ns bm_find<std::deque<short>>/4096 845 ns 525 ns bm_find<std::deque<short>>/32768 7273 ns 3194 ns bm_find<std::deque<short>>/262144 53710 ns 24385 ns bm_find<std::deque<short>>/1048576 216086 ns 96195 ns bm_find<std::deque<int>>/1 6.03 ns 10.3 ns bm_find<std::deque<int>>/2 15.6 ns 10.3 ns bm_find<std::deque<int>>/3 19.1 ns 10.3 ns bm_find<std::deque<int>>/4 22.3 ns 10.3 ns bm_find<std::deque<int>>/5 23.5 ns 10.4 ns bm_find<std::deque<int>>/6 23.1 ns 10.3 ns bm_find<std::deque<int>>/7 23.7 ns 10.2 ns bm_find<std::deque<int>>/8 24.5 ns 10.2 ns bm_find<std::deque<int>>/16 27.9 ns 26.6 ns bm_find<std::deque<int>>/64 42.6 ns 32.2 ns bm_find<std::deque<int>>/512 123 ns 43.0 ns bm_find<std::deque<int>>/4096 874 ns 93.5 ns bm_find<std::deque<int>>/32768 7031 ns 751 ns bm_find<std::deque<int>>/262144 57723 ns 6169 ns bm_find<std::deque<int>>/1048576 230867 ns 35851 ns bm_ranges_find<std::deque<char>>/1 5.97 ns 10.6 ns bm_ranges_find<std::deque<char>>/2 16.0 ns 10.5 ns bm_ranges_find<std::deque<char>>/3 19.5 ns 10.5 ns bm_ranges_find<std::deque<char>>/4 21.1 ns 10.6 ns bm_ranges_find<std::deque<char>>/5 22.8 ns 10.5 ns bm_ranges_find<std::deque<char>>/6 22.8 ns 10.6 ns bm_ranges_find<std::deque<char>>/7 23.4 ns 10.8 ns bm_ranges_find<std::deque<char>>/8 24.1 ns 10.5 ns bm_ranges_find<std::deque<char>>/16 26.9 ns 10.6 ns bm_ranges_find<std::deque<char>>/64 50.2 ns 27.2 ns bm_ranges_find<std::deque<char>>/512 126 ns 38.3 ns bm_ranges_find<std::deque<char>>/4096 868 ns 53.8 ns bm_ranges_find<std::deque<char>>/32768 6695 ns 161 ns bm_ranges_find<std::deque<char>>/262144 54411 ns 1497 ns bm_ranges_find<std::deque<char>>/1048576 241699 ns 6042 ns bm_ranges_find<std::deque<short>>/1 6.39 ns 6.31 ns bm_ranges_find<std::deque<short>>/2 15.8 ns 15.9 ns bm_ranges_find<std::deque<short>>/3 19.0 ns 19.8 ns bm_ranges_find<std::deque<short>>/4 20.8 ns 20.9 ns bm_ranges_find<std::deque<short>>/5 21.8 ns 22.1 ns bm_ranges_find<std::deque<short>>/6 23.0 ns 23.0 ns bm_ranges_find<std::deque<short>>/7 23.2 ns 23.9 ns bm_ranges_find<std::deque<short>>/8 23.7 ns 24.4 ns bm_ranges_find<std::deque<short>>/16 26.6 ns 26.8 ns bm_ranges_find<std::deque<short>>/64 43.4 ns 39.7 ns bm_ranges_find<std::deque<short>>/512 131 ns 90.5 ns bm_ranges_find<std::deque<short>>/4096 851 ns 523 ns bm_ranges_find<std::deque<short>>/32768 7370 ns 3166 ns bm_ranges_find<std::deque<short>>/262144 60778 ns 24814 ns bm_ranges_find<std::deque<short>>/1048576 229288 ns 99273 ns bm_ranges_find<std::deque<int>>/1 6.43 ns 10.2 ns bm_ranges_find<std::deque<int>>/2 16.6 ns 10.2 ns bm_ranges_find<std::deque<int>>/3 19.6 ns 10.2 ns bm_ranges_find<std::deque<int>>/4 21.0 ns 10.2 ns bm_ranges_find<std::deque<int>>/5 21.9 ns 10.4 ns bm_ranges_find<std::deque<int>>/6 22.7 ns 10.2 ns bm_ranges_find<std::deque<int>>/7 23.9 ns 10.2 ns bm_ranges_find<std::deque<int>>/8 23.8 ns 10.2 ns bm_ranges_find<std::deque<int>>/16 27.2 ns 27.1 ns bm_ranges_find<std::deque<int>>/64 42.4 ns 32.4 ns bm_ranges_find<std::deque<int>>/512 122 ns 43.0 ns bm_ranges_find<std::deque<int>>/4096 895 ns 93.7 ns bm_ranges_find<std::deque<int>>/32768 6890 ns 756 ns bm_ranges_find<std::deque<int>>/262144 54025 ns 6102 ns bm_ranges_find<std::deque<int>>/1048576 221558 ns 32783 ns ```
1 parent 810b5bc commit f740741

File tree

10 files changed

+253
-16
lines changed

10 files changed

+253
-16
lines changed

libcxx/benchmarks/algorithms/find.bench.cpp

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,15 @@
99
#include <algorithm>
1010
#include <benchmark/benchmark.h>
1111
#include <cstring>
12+
#include <deque>
1213
#include <random>
1314
#include <vector>
1415

15-
template <class T>
16+
template <class Container>
1617
static void bm_find(benchmark::State& state) {
17-
std::vector<T> vec1(state.range(), '1');
18+
using T = Container::value_type;
19+
20+
Container vec1(state.range(), '1');
1821
std::mt19937_64 rng(std::random_device{}());
1922

2023
for (auto _ : state) {
@@ -25,13 +28,18 @@ static void bm_find(benchmark::State& state) {
2528
vec1[idx] = '1';
2629
}
2730
}
28-
BENCHMARK(bm_find<char>)->DenseRange(1, 8)->Range(16, 1 << 20);
29-
BENCHMARK(bm_find<short>)->DenseRange(1, 8)->Range(16, 1 << 20);
30-
BENCHMARK(bm_find<int>)->DenseRange(1, 8)->Range(16, 1 << 20);
31+
BENCHMARK(bm_find<std::vector<char>>)->DenseRange(1, 8)->Range(16, 1 << 20);
32+
BENCHMARK(bm_find<std::vector<short>>)->DenseRange(1, 8)->Range(16, 1 << 20);
33+
BENCHMARK(bm_find<std::vector<int>>)->DenseRange(1, 8)->Range(16, 1 << 20);
34+
BENCHMARK(bm_find<std::deque<char>>)->DenseRange(1, 8)->Range(16, 1 << 20);
35+
BENCHMARK(bm_find<std::deque<short>>)->DenseRange(1, 8)->Range(16, 1 << 20);
36+
BENCHMARK(bm_find<std::deque<int>>)->DenseRange(1, 8)->Range(16, 1 << 20);
3137

32-
template <class T>
38+
template <class Container>
3339
static void bm_ranges_find(benchmark::State& state) {
34-
std::vector<T> vec1(state.range(), '1');
40+
using T = Container::value_type;
41+
42+
Container vec1(state.range(), '1');
3543
std::mt19937_64 rng(std::random_device{}());
3644

3745
for (auto _ : state) {
@@ -42,9 +50,12 @@ static void bm_ranges_find(benchmark::State& state) {
4250
vec1[idx] = '1';
4351
}
4452
}
45-
BENCHMARK(bm_ranges_find<char>)->DenseRange(1, 8)->Range(16, 1 << 20);
46-
BENCHMARK(bm_ranges_find<short>)->DenseRange(1, 8)->Range(16, 1 << 20);
47-
BENCHMARK(bm_ranges_find<int>)->DenseRange(1, 8)->Range(16, 1 << 20);
53+
BENCHMARK(bm_ranges_find<std::vector<char>>)->DenseRange(1, 8)->Range(16, 1 << 20);
54+
BENCHMARK(bm_ranges_find<std::vector<short>>)->DenseRange(1, 8)->Range(16, 1 << 20);
55+
BENCHMARK(bm_ranges_find<std::vector<int>>)->DenseRange(1, 8)->Range(16, 1 << 20);
56+
BENCHMARK(bm_ranges_find<std::deque<char>>)->DenseRange(1, 8)->Range(16, 1 << 20);
57+
BENCHMARK(bm_ranges_find<std::deque<short>>)->DenseRange(1, 8)->Range(16, 1 << 20);
58+
BENCHMARK(bm_ranges_find<std::deque<int>>)->DenseRange(1, 8)->Range(16, 1 << 20);
4859

4960
static void bm_vector_bool_find(benchmark::State& state) {
5061
std::vector<bool> vec1(state.range(), false);

libcxx/include/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ set(files
2222
__algorithm/find_first_of.h
2323
__algorithm/find_if.h
2424
__algorithm/find_if_not.h
25+
__algorithm/find_segment_if.h
2526
__algorithm/for_each.h
2627
__algorithm/for_each_n.h
2728
__algorithm/for_each_segment.h

libcxx/include/__algorithm/find.h

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#ifndef _LIBCPP___ALGORITHM_FIND_H
1111
#define _LIBCPP___ALGORITHM_FIND_H
1212

13+
#include <__algorithm/find_segment_if.h>
1314
#include <__algorithm/min.h>
1415
#include <__algorithm/unwrap_iter.h>
1516
#include <__bit/countr.h>
@@ -18,8 +19,10 @@
1819
#include <__functional/identity.h>
1920
#include <__functional/invoke.h>
2021
#include <__fwd/bit_reference.h>
22+
#include <__iterator/segmented_iterator.h>
2123
#include <__string/constexpr_c_functions.h>
2224
#include <__type_traits/is_same.h>
25+
#include <__utility/move.h>
2326

2427
#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
2528
# include <cwchar>
@@ -118,6 +121,34 @@ __find_impl(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst>
118121
return std::__find_bool<false>(__first, static_cast<typename _Cp::size_type>(__last - __first));
119122
}
120123

124+
// segmented iterator implementation
125+
126+
template <class>
127+
struct __find_segment;
128+
129+
template <class _SegmentedIterator,
130+
class _Tp,
131+
class _Proj,
132+
__enable_if_t<__is_segmented_iterator<_SegmentedIterator>::value, int> = 0>
133+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _SegmentedIterator
134+
__find_impl(_SegmentedIterator __first, _SegmentedIterator __last, const _Tp& __value, _Proj& __proj) {
135+
return std::__find_segment_if(std::move(__first), std::move(__last), __find_segment<_Tp>(__value), __proj);
136+
}
137+
138+
template <class _Tp>
139+
struct __find_segment {
140+
const _Tp& __value_;
141+
142+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR __find_segment(const _Tp& __value) : __value_(__value) {}
143+
144+
template <class _InputIterator, class _Proj>
145+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _InputIterator
146+
operator()(_InputIterator __first, _InputIterator __last, _Proj& __proj) const {
147+
return std::__find_impl(__first, __last, __value_, __proj);
148+
}
149+
};
150+
151+
// public API
121152
template <class _InputIterator, class _Tp>
122153
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
123154
find(_InputIterator __first, _InputIterator __last, const _Tp& __value) {
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef _LIBCPP___ALGORITHM_FIND_SEGMENT_IF_H
10+
#define _LIBCPP___ALGORITHM_FIND_SEGMENT_IF_H
11+
12+
#include <__config>
13+
#include <__iterator/segmented_iterator.h>
14+
15+
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
16+
# pragma GCC system_header
17+
#endif
18+
19+
_LIBCPP_BEGIN_NAMESPACE_STD
20+
21+
// __find_segment_if is a utility function for optimizing iteration over segmented iterators linearly.
22+
// [__first, __last) has to be a segmented range. __pred is expected to take a range of local iterators and the __proj.
23+
// It returns an iterator to the first element that satisfies the predicate, or a one-past-the-end iterator if there was
24+
// no match. __proj may be anything that should be passed to __pred, but is expected to be a projection to support
25+
// ranges algorithms, or __identity for classic algorithms.
26+
27+
template <class _SegmentedIterator, class _Pred, class _Proj>
28+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _SegmentedIterator
29+
__find_segment_if(_SegmentedIterator __first, _SegmentedIterator __last, _Pred __pred, _Proj& __proj) {
30+
using _Traits = __segmented_iterator_traits<_SegmentedIterator>;
31+
32+
auto __sfirst = _Traits::__segment(__first);
33+
auto __slast = _Traits::__segment(__last);
34+
35+
// We are in a single segment, so we might not be at the beginning or end
36+
if (__sfirst == __slast)
37+
return _Traits::__compose(__sfirst, __pred(_Traits::__local(__first), _Traits::__local(__last), __proj));
38+
39+
{ // We have more than one segment. Iterate over the first segment, since we might not start at the beginning
40+
auto __llast = _Traits::__end(__sfirst);
41+
auto __liter = __pred(_Traits::__local(__first), __llast, __proj);
42+
if (__liter != __llast)
43+
return _Traits::__compose(__sfirst, __liter);
44+
}
45+
++__sfirst;
46+
47+
// Iterate over the segments which are guaranteed to be completely in the range
48+
while (__sfirst != __slast) {
49+
auto __llast = _Traits::__end(__sfirst);
50+
auto __liter = __pred(_Traits::__begin(__sfirst), _Traits::__end(__sfirst), __proj);
51+
if (__liter != __llast)
52+
return _Traits::__compose(__sfirst, __liter);
53+
++__sfirst;
54+
}
55+
56+
// Iterate over the last segment
57+
return _Traits::__compose(__sfirst, __pred(_Traits::__begin(__sfirst), _Traits::__local(__last), __proj));
58+
}
59+
60+
_LIBCPP_END_NAMESPACE_STD
61+
62+
#endif // _LIBCPP___ALGORITHM_FIND_SEGMENT_IF_H

libcxx/include/deque

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -440,7 +440,7 @@ public:
440440
}
441441

442442
static _LIBCPP_HIDE_FROM_ABI _Iterator __compose(__segment_iterator __segment, __local_iterator __local) {
443-
if (__local == __end(__segment)) {
443+
if (__segment && __local == __end(__segment)) {
444444
++__segment;
445445
return _Iterator(__segment, *__segment);
446446
}

libcxx/include/module.modulemap.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -665,6 +665,7 @@ module std_private_algorithm_find_end [system
665665
module std_private_algorithm_find_first_of [system] { header "__algorithm/find_first_of.h" }
666666
module std_private_algorithm_find_if [system] { header "__algorithm/find_if.h" }
667667
module std_private_algorithm_find_if_not [system] { header "__algorithm/find_if_not.h" }
668+
module std_private_algorithm_find_segment_if [system] { header "__algorithm/find_segment_if.h" }
668669
module std_private_algorithm_for_each [system] { header "__algorithm/for_each.h" }
669670
module std_private_algorithm_for_each_n [system] { header "__algorithm/for_each_n.h" }
670671
module std_private_algorithm_for_each_segment [system] { header "__algorithm/for_each_segment.h" }

libcxx/test/libcxx/algorithms/ranges_robust_against_copying_comparators.pass.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <algorithm>
1616
#include <cassert>
1717
#include <cstddef>
18+
#include <deque>
1819

1920
#include "test_macros.h"
2021

libcxx/test/libcxx/algorithms/ranges_robust_against_copying_projections.pass.cpp

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <algorithm>
1616
#include <cassert>
1717
#include <cstddef>
18+
#include <deque>
1819

1920
#include "test_macros.h"
2021

@@ -224,10 +225,19 @@ constexpr bool all_the_algorithms()
224225
return true;
225226
}
226227

227-
int main(int, char**)
228-
{
229-
all_the_algorithms();
230-
static_assert(all_the_algorithms());
228+
void test_deque() {
229+
std::deque<T> d;
230+
int copies = 0;
231+
void* value = nullptr;
232+
233+
(void)std::ranges::find(d, value, Proj(&copies));
234+
assert(copies == 0);
235+
}
236+
237+
int main(int, char**) {
238+
test_deque();
239+
all_the_algorithms();
240+
static_assert(all_the_algorithms());
231241

232-
return 0;
242+
return 0;
233243
}

libcxx/test/std/algorithms/alg.nonmodifying/alg.find/find.pass.cpp

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
#include <algorithm>
2121
#include <cassert>
22+
#include <deque>
2223
#include <vector>
2324
#include <type_traits>
2425

@@ -115,6 +116,52 @@ struct TestTypes {
115116
}
116117
};
117118

119+
void test_deque() {
120+
{ // empty deque
121+
std::deque<int> data;
122+
assert(std::find(data.begin(), data.end(), 4) == data.end());
123+
}
124+
125+
{ // single element - match
126+
std::deque<int> data;
127+
data.push_back(4);
128+
assert(std::find(data.begin(), data.end(), 4) == data.begin());
129+
}
130+
131+
{ // single element - no match
132+
std::deque<int> data;
133+
data.push_back(3);
134+
assert(std::find(data.begin(), data.end(), 4) == data.end());
135+
}
136+
137+
// many elements
138+
int sizes[] = {2, 3, 1023, 1024, 1025, 2047, 2048, 2049};
139+
for (auto size : sizes) {
140+
{ // last element match
141+
std::deque<int> data;
142+
data.resize(size);
143+
std::fill(data.begin(), data.end(), 3);
144+
data[size - 1] = 4;
145+
assert(std::find(data.begin(), data.end(), 4) == data.end() - 1);
146+
}
147+
148+
{ // second-last element match
149+
std::deque<int> data;
150+
data.resize(size);
151+
std::fill(data.begin(), data.end(), 3);
152+
data[size - 2] = 4;
153+
assert(std::find(data.begin(), data.end(), 4) == data.end() - 2);
154+
}
155+
156+
{ // no match
157+
std::deque<int> data;
158+
data.resize(size);
159+
std::fill(data.begin(), data.end(), 3);
160+
assert(std::find(data.begin(), data.end(), 4) == data.end());
161+
}
162+
}
163+
}
164+
118165
TEST_CONSTEXPR_CXX20 bool test() {
119166
types::for_each(types::integer_types(), TestTypes<char>());
120167
types::for_each(types::integer_types(), TestTypes<int>());
@@ -124,10 +171,21 @@ TEST_CONSTEXPR_CXX20 bool test() {
124171
Test<TriviallyComparable<wchar_t>, TriviallyComparable<wchar_t>>().operator()<TriviallyComparable<wchar_t>*>();
125172
#endif
126173

174+
// TODO: Remove the `_LIBCPP_ENABLE_EXPERIMENTAL` check once we have the FTM guarded or views::join isn't
175+
// experimental anymore
176+
#if TEST_STD_VER >= 20 && (!defined(_LIBCPP_VERSION) || defined(_LIBCPP_ENABLE_EXPERIMENTAL))
177+
{
178+
std::vector<std::vector<int>> vec = {{1, 2, 3}, {4, 5, 6}, {7, 8, 9}};
179+
auto view = vec | std::views::join;
180+
assert(std::find(view.begin(), view.end(), 4) == std::next(view.begin(), 3));
181+
}
182+
#endif
183+
127184
return true;
128185
}
129186

130187
int main(int, char**) {
188+
test_deque();
131189
test();
132190
#if TEST_STD_VER >= 20
133191
static_assert(test());

0 commit comments

Comments
 (0)