Skip to content

Commit e5da968

Browse files
author
Xiaoyang Liu
committed
[libc++][ranges] optimize the performance of 'ranges::starts_with'
1 parent 2d09ac4 commit e5da968

File tree

4 files changed

+176
-13
lines changed

4 files changed

+176
-13
lines changed

libcxx/benchmarks/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ set(BENCHMARK_TESTS
197197
algorithms/ranges_sort.bench.cpp
198198
algorithms/ranges_sort_heap.bench.cpp
199199
algorithms/ranges_stable_sort.bench.cpp
200+
algorithms/ranges_starts_with.bench.cpp
200201
algorithms/sort.bench.cpp
201202
algorithms/sort_heap.bench.cpp
202203
algorithms/stable_sort.bench.cpp
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <algorithm>
10+
#include <benchmark/benchmark.h>
11+
#include <vector>
12+
13+
#include "test_iterators.h"
14+
15+
static void bm_starts_with_contiguous_iter_with_memcmp_optimization(benchmark::State& state) {
16+
std::vector<int> a(state.range(), 1);
17+
std::vector<int> p(state.range(), 1);
18+
19+
for (auto _ : state) {
20+
benchmark::DoNotOptimize(a);
21+
benchmark::DoNotOptimize(p);
22+
23+
auto begin1 = contiguous_iterator(a.data());
24+
auto end1 = contiguous_iterator(a.data() + a.size());
25+
auto begin2 = contiguous_iterator(p.data());
26+
auto end2 = contiguous_iterator(p.data() + p.size());
27+
28+
benchmark::DoNotOptimize(std::ranges::starts_with(begin1, end1, begin2, end2));
29+
}
30+
}
31+
BENCHMARK(bm_starts_with_contiguous_iter_with_memcmp_optimization)->RangeMultiplier(16)->Range(16, 16 << 20);
32+
33+
static void bm_starts_with_contiguous_iter(benchmark::State& state) {
34+
std::vector<int> a(state.range(), 1);
35+
std::vector<int> p(state.range(), 1);
36+
37+
for (auto _ : state) {
38+
benchmark::DoNotOptimize(a);
39+
benchmark::DoNotOptimize(p);
40+
41+
auto begin1 = contiguous_iterator(a.data());
42+
auto end1 = contiguous_iterator(a.data() + a.size());
43+
auto begin2 = contiguous_iterator(p.data());
44+
auto end2 = contiguous_iterator(p.data() + p.size());
45+
46+
// Using a custom predicate to make sure the memcmp optimization doesn't get invoked
47+
benchmark::DoNotOptimize(
48+
std::ranges::starts_with(begin1, end1, begin2, end2, [](const int a, const int b) { return a == b; }));
49+
}
50+
}
51+
BENCHMARK(bm_starts_with_contiguous_iter)->RangeMultiplier(16)->Range(16, 16 << 20);
52+
53+
static void bm_starts_with_random_access_iter(benchmark::State& state) {
54+
std::vector<int> a(state.range(), 1);
55+
std::vector<int> p(state.range(), 1);
56+
57+
for (auto _ : state) {
58+
benchmark::DoNotOptimize(a);
59+
benchmark::DoNotOptimize(p);
60+
61+
auto begin1 = random_access_iterator(a.data());
62+
auto end1 = random_access_iterator(a.data() + a.size());
63+
auto begin2 = random_access_iterator(p.data());
64+
auto end2 = random_access_iterator(p.data() + p.size());
65+
66+
benchmark::DoNotOptimize(std::ranges::starts_with(begin1, end1, begin2, end2));
67+
}
68+
}
69+
BENCHMARK(bm_starts_with_random_access_iter)->RangeMultiplier(16)->Range(16, 16 << 20);
70+
71+
static void bm_starts_with_bidirectional_iter(benchmark::State& state) {
72+
std::vector<int> a(state.range(), 1);
73+
std::vector<int> p(state.range(), 1);
74+
75+
for (auto _ : state) {
76+
benchmark::DoNotOptimize(a);
77+
benchmark::DoNotOptimize(p);
78+
79+
auto begin1 = bidirectional_iterator(a.data());
80+
auto end1 = bidirectional_iterator(a.data() + a.size());
81+
auto begin2 = bidirectional_iterator(p.data());
82+
auto end2 = bidirectional_iterator(p.data() + p.size());
83+
84+
benchmark::DoNotOptimize(std::ranges::starts_with(begin1, end1, begin2, end2));
85+
}
86+
}
87+
BENCHMARK(bm_starts_with_bidirectional_iter)->RangeMultiplier(16)->Range(16, 16 << 20);
88+
89+
static void bm_starts_with_forward_iter(benchmark::State& state) {
90+
std::vector<int> a(state.range(), 1);
91+
std::vector<int> p(state.range(), 1);
92+
93+
for (auto _ : state) {
94+
benchmark::DoNotOptimize(a);
95+
benchmark::DoNotOptimize(p);
96+
97+
auto begin1 = forward_iterator(a.data());
98+
auto end1 = forward_iterator(a.data() + a.size());
99+
auto begin2 = forward_iterator(p.data());
100+
auto end2 = forward_iterator(p.data() + p.size());
101+
102+
benchmark::DoNotOptimize(std::ranges::starts_with(begin1, end1, begin2, end2));
103+
}
104+
}
105+
BENCHMARK(bm_starts_with_forward_iter)->RangeMultiplier(16)->Range(16, 16 << 20);
106+
107+
BENCHMARK_MAIN();

libcxx/include/__algorithm/ranges_starts_with.h

Lines changed: 59 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,19 @@
99
#ifndef _LIBCPP___ALGORITHM_RANGES_STARTS_WITH_H
1010
#define _LIBCPP___ALGORITHM_RANGES_STARTS_WITH_H
1111

12-
#include <__algorithm/in_in_result.h>
12+
#include <__algorithm/ranges_equal.h>
1313
#include <__algorithm/ranges_mismatch.h>
1414
#include <__config>
1515
#include <__functional/identity.h>
1616
#include <__functional/ranges_operations.h>
17+
#include <__functional/reference_wrapper.h>
1718
#include <__iterator/concepts.h>
19+
#include <__iterator/distance.h>
1820
#include <__iterator/indirectly_comparable.h>
21+
#include <__iterator/next.h>
1922
#include <__ranges/access.h>
2023
#include <__ranges/concepts.h>
24+
#include <__ranges/size.h>
2125
#include <__utility/move.h>
2226

2327
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -50,14 +54,36 @@ struct __fn {
5054
_Pred __pred = {},
5155
_Proj1 __proj1 = {},
5256
_Proj2 __proj2 = {}) {
53-
return __mismatch::__fn::__go(
57+
if constexpr (sized_sentinel_for<_Sent1, _Iter1> && sized_sentinel_for<_Sent2, _Iter2>) {
58+
auto __n1 = ranges::distance(__first1, __last1);
59+
auto __n2 = ranges::distance(__first2, __last2);
60+
if (__n2 == 0) {
61+
return true;
62+
}
63+
if (__n2 > __n1) {
64+
return false;
65+
}
66+
67+
if constexpr (contiguous_iterator<_Iter1> && contiguous_iterator<_Iter2>) {
68+
return ranges::equal(
69+
std::move(__first1),
70+
ranges::next(__first1, __n2),
71+
std::move(__first2),
72+
std::move(__last2),
73+
std::ref(__pred),
74+
std::ref(__proj1),
75+
std::ref(__proj2));
76+
}
77+
}
78+
79+
return ranges::mismatch(
5480
std::move(__first1),
5581
std::move(__last1),
5682
std::move(__first2),
57-
std::move(__last2),
58-
__pred,
59-
__proj1,
60-
__proj2)
83+
__last2,
84+
std::ref(__pred),
85+
std::ref(__proj1),
86+
std::ref(__proj2))
6187
.in2 == __last2;
6288
}
6389

@@ -69,18 +95,41 @@ struct __fn {
6995
requires indirectly_comparable<iterator_t<_Range1>, iterator_t<_Range2>, _Pred, _Proj1, _Proj2>
7096
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr bool
7197
operator()(_Range1&& __range1, _Range2&& __range2, _Pred __pred = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) {
72-
return __mismatch::__fn::__go(
98+
if constexpr (sized_range<_Range1> && sized_range<_Range2>) {
99+
auto __n1 = ranges::size(__range1);
100+
auto __n2 = ranges::size(__range2);
101+
if (__n2 == 0) {
102+
return true;
103+
}
104+
if (__n2 > __n1) {
105+
return false;
106+
}
107+
108+
if constexpr (contiguous_range<_Range1> && contiguous_range<_Range2>) {
109+
return ranges::equal(
110+
ranges::begin(__range1),
111+
ranges::next(ranges::begin(__range1), __n2),
112+
ranges::begin(__range2),
113+
ranges::end(__range2),
114+
std::ref(__pred),
115+
std::ref(__proj1),
116+
std::ref(__proj2));
117+
}
118+
}
119+
120+
return ranges::mismatch(
73121
ranges::begin(__range1),
74122
ranges::end(__range1),
75123
ranges::begin(__range2),
76124
ranges::end(__range2),
77-
__pred,
78-
__proj1,
79-
__proj2)
125+
std::ref(__pred),
126+
std::ref(__proj1),
127+
std::ref(__proj2))
80128
.in2 == ranges::end(__range2);
81129
}
82130
};
83131
} // namespace __starts_with
132+
84133
inline namespace __cpo {
85134
inline constexpr auto starts_with = __starts_with::__fn{};
86135
} // namespace __cpo

libcxx/test/std/algorithms/alg.nonmodifying/alg.starts_with/ranges.starts_with.pass.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -216,12 +216,18 @@ constexpr void test_iterators() {
216216
constexpr bool test() {
217217
types::for_each(types::cpp20_input_iterator_list<int*>{}, []<class Iter2>() {
218218
types::for_each(types::cpp20_input_iterator_list<int*>{}, []<class Iter1>() {
219-
if constexpr (std::forward_iterator<Iter1> && std::forward_iterator<Iter2>)
219+
if constexpr (std::forward_iterator<Iter1> && std::forward_iterator<Iter2>) {
220220
test_iterators<Iter1, Iter1, Iter2, Iter2>();
221-
if constexpr (std::forward_iterator<Iter2>)
221+
}
222+
if constexpr (std::forward_iterator<Iter2>) {
223+
test_iterators<Iter1, sentinel_wrapper<Iter1>, Iter2, Iter2>();
222224
test_iterators<Iter1, sized_sentinel<Iter1>, Iter2, Iter2>();
223-
if constexpr (std::forward_iterator<Iter1>)
225+
}
226+
if constexpr (std::forward_iterator<Iter1>) {
227+
test_iterators<Iter1, Iter1, Iter2, sentinel_wrapper<Iter2>>();
224228
test_iterators<Iter1, Iter1, Iter2, sized_sentinel<Iter2>>();
229+
}
230+
test_iterators<Iter1, sentinel_wrapper<Iter1>, Iter2, sentinel_wrapper<Iter2>>();
225231
test_iterators<Iter1, sized_sentinel<Iter1>, Iter2, sized_sentinel<Iter2>>();
226232
});
227233
});

0 commit comments

Comments
 (0)