Skip to content

Commit d07fdf9

Browse files
authored
[libc++] Optimize lexicographical_compare (llvm#65279)
If the comparison operation is equivalent to < and that is a total order, we know that we can use equality comparison on that type instead to extract some information. Furthermore, if equality comparison on that type is trivial, the user can't observe that we're calling it. So instead of using the user-provided total order, we use std::mismatch, which uses equality comparison (and is vertorized). Additionally, if the type is trivially lexicographically comparable, we can go one step further and use std::memcmp directly instead of calling std::mismatch. Benchmarks: ``` ------------------------------------------------------------------------------------- Benchmark old new ------------------------------------------------------------------------------------- bm_lexicographical_compare<unsigned char>/1 1.17 ns 2.34 ns bm_lexicographical_compare<unsigned char>/2 1.64 ns 2.57 ns bm_lexicographical_compare<unsigned char>/3 2.23 ns 2.58 ns bm_lexicographical_compare<unsigned char>/4 2.82 ns 2.57 ns bm_lexicographical_compare<unsigned char>/5 3.34 ns 2.11 ns bm_lexicographical_compare<unsigned char>/6 3.94 ns 2.21 ns bm_lexicographical_compare<unsigned char>/7 4.56 ns 2.11 ns bm_lexicographical_compare<unsigned char>/8 5.25 ns 2.11 ns bm_lexicographical_compare<unsigned char>/16 9.88 ns 2.11 ns bm_lexicographical_compare<unsigned char>/64 38.9 ns 2.36 ns bm_lexicographical_compare<unsigned char>/512 317 ns 6.54 ns bm_lexicographical_compare<unsigned char>/4096 2517 ns 41.4 ns bm_lexicographical_compare<unsigned char>/32768 20052 ns 488 ns bm_lexicographical_compare<unsigned char>/262144 159579 ns 4409 ns bm_lexicographical_compare<unsigned char>/1048576 640456 ns 20342 ns bm_lexicographical_compare<signed char>/1 1.18 ns 2.37 ns bm_lexicographical_compare<signed char>/2 1.65 ns 2.60 ns bm_lexicographical_compare<signed char>/3 2.23 ns 2.83 ns bm_lexicographical_compare<signed char>/4 2.81 ns 3.06 ns bm_lexicographical_compare<signed char>/5 3.35 ns 3.30 ns bm_lexicographical_compare<signed char>/6 3.90 ns 3.99 ns bm_lexicographical_compare<signed char>/7 4.56 ns 3.78 ns bm_lexicographical_compare<signed char>/8 5.20 ns 4.02 ns bm_lexicographical_compare<signed char>/16 9.80 ns 6.21 ns bm_lexicographical_compare<signed char>/64 39.0 ns 3.16 ns bm_lexicographical_compare<signed char>/512 318 ns 7.58 ns bm_lexicographical_compare<signed char>/4096 2514 ns 47.4 ns bm_lexicographical_compare<signed char>/32768 20096 ns 504 ns bm_lexicographical_compare<signed char>/262144 156617 ns 4146 ns bm_lexicographical_compare<signed char>/1048576 624265 ns 19810 ns bm_lexicographical_compare<int>/1 1.15 ns 2.12 ns bm_lexicographical_compare<int>/2 1.60 ns 2.36 ns bm_lexicographical_compare<int>/3 2.21 ns 2.59 ns bm_lexicographical_compare<int>/4 2.74 ns 2.83 ns bm_lexicographical_compare<int>/5 3.26 ns 3.06 ns bm_lexicographical_compare<int>/6 3.81 ns 4.53 ns bm_lexicographical_compare<int>/7 4.41 ns 4.72 ns bm_lexicographical_compare<int>/8 5.08 ns 2.36 ns bm_lexicographical_compare<int>/16 9.54 ns 3.08 ns bm_lexicographical_compare<int>/64 37.8 ns 4.71 ns bm_lexicographical_compare<int>/512 309 ns 24.6 ns bm_lexicographical_compare<int>/4096 2422 ns 204 ns bm_lexicographical_compare<int>/32768 19362 ns 1947 ns bm_lexicographical_compare<int>/262144 155727 ns 19793 ns bm_lexicographical_compare<int>/1048576 623614 ns 80180 ns bm_ranges_lexicographical_compare<unsigned char>/1 1.07 ns 2.35 ns bm_ranges_lexicographical_compare<unsigned char>/2 1.72 ns 2.13 ns bm_ranges_lexicographical_compare<unsigned char>/3 2.46 ns 2.12 ns bm_ranges_lexicographical_compare<unsigned char>/4 3.17 ns 2.12 ns bm_ranges_lexicographical_compare<unsigned char>/5 3.86 ns 2.12 ns bm_ranges_lexicographical_compare<unsigned char>/6 4.55 ns 2.12 ns bm_ranges_lexicographical_compare<unsigned char>/7 5.25 ns 2.12 ns bm_ranges_lexicographical_compare<unsigned char>/8 5.95 ns 2.13 ns bm_ranges_lexicographical_compare<unsigned char>/16 11.7 ns 2.13 ns bm_ranges_lexicographical_compare<unsigned char>/64 45.5 ns 2.36 ns bm_ranges_lexicographical_compare<unsigned char>/512 366 ns 6.35 ns bm_ranges_lexicographical_compare<unsigned char>/4096 2886 ns 40.9 ns bm_ranges_lexicographical_compare<unsigned char>/32768 23054 ns 489 ns bm_ranges_lexicographical_compare<unsigned char>/262144 185302 ns 4339 ns bm_ranges_lexicographical_compare<unsigned char>/1048576 741576 ns 19430 ns bm_ranges_lexicographical_compare<signed char>/1 1.10 ns 2.12 ns bm_ranges_lexicographical_compare<signed char>/2 1.66 ns 2.35 ns bm_ranges_lexicographical_compare<signed char>/3 2.23 ns 2.58 ns bm_ranges_lexicographical_compare<signed char>/4 2.82 ns 2.82 ns bm_ranges_lexicographical_compare<signed char>/5 3.34 ns 3.06 ns bm_ranges_lexicographical_compare<signed char>/6 3.92 ns 3.99 ns bm_ranges_lexicographical_compare<signed char>/7 4.64 ns 4.10 ns bm_ranges_lexicographical_compare<signed char>/8 5.21 ns 4.61 ns bm_ranges_lexicographical_compare<signed char>/16 9.79 ns 7.42 ns bm_ranges_lexicographical_compare<signed char>/64 38.9 ns 2.93 ns bm_ranges_lexicographical_compare<signed char>/512 317 ns 7.31 ns bm_ranges_lexicographical_compare<signed char>/4096 2500 ns 47.5 ns bm_ranges_lexicographical_compare<signed char>/32768 19940 ns 496 ns bm_ranges_lexicographical_compare<signed char>/262144 159166 ns 4393 ns bm_ranges_lexicographical_compare<signed char>/1048576 638206 ns 19786 ns bm_ranges_lexicographical_compare<int>/1 1.10 ns 2.12 ns bm_ranges_lexicographical_compare<int>/2 1.64 ns 3.04 ns bm_ranges_lexicographical_compare<int>/3 2.23 ns 2.58 ns bm_ranges_lexicographical_compare<int>/4 2.81 ns 2.81 ns bm_ranges_lexicographical_compare<int>/5 3.35 ns 3.05 ns bm_ranges_lexicographical_compare<int>/6 3.94 ns 4.60 ns bm_ranges_lexicographical_compare<int>/7 4.60 ns 4.81 ns bm_ranges_lexicographical_compare<int>/8 5.19 ns 2.35 ns bm_ranges_lexicographical_compare<int>/16 9.85 ns 2.87 ns bm_ranges_lexicographical_compare<int>/64 38.9 ns 4.70 ns bm_ranges_lexicographical_compare<int>/512 318 ns 24.5 ns bm_ranges_lexicographical_compare<int>/4096 2494 ns 202 ns bm_ranges_lexicographical_compare<int>/32768 20000 ns 1939 ns bm_ranges_lexicographical_compare<int>/262144 160433 ns 19730 ns bm_ranges_lexicographical_compare<int>/1048576 642636 ns 80760 ns ```
1 parent 8d1b17b commit d07fdf9

File tree

21 files changed

+269
-96
lines changed

21 files changed

+269
-96
lines changed

libcxx/docs/ReleaseNotes/20.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@ Implemented Papers
4444
Improvements and New Features
4545
-----------------------------
4646

47-
- TODO
47+
- The ``lexicographical_compare`` and ``ranges::lexicographical_compare`` algorithms have been optimized for trivially
48+
equality comparable types, resulting in a performance improvement of up to 40x.
4849

4950

5051
Deprecations and Removals

libcxx/include/__algorithm/comp.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
#include <__config>
1313
#include <__type_traits/desugars_to.h>
14+
#include <__type_traits/is_integral.h>
1415

1516
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
1617
# pragma GCC system_header
@@ -42,7 +43,7 @@ struct __less<void, void> {
4243
};
4344

4445
template <class _Tp>
45-
inline const bool __desugars_to_v<__less_tag, __less<>, _Tp, _Tp> = true;
46+
inline const bool __desugars_to_v<__totally_ordered_less_tag, __less<>, _Tp, _Tp> = is_integral<_Tp>::value;
4647

4748
_LIBCPP_END_NAMESPACE_STD
4849

libcxx/include/__algorithm/lexicographical_compare.h

Lines changed: 82 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,40 +10,109 @@
1010
#define _LIBCPP___ALGORITHM_LEXICOGRAPHICAL_COMPARE_H
1111

1212
#include <__algorithm/comp.h>
13-
#include <__algorithm/comp_ref_type.h>
13+
#include <__algorithm/min.h>
14+
#include <__algorithm/mismatch.h>
15+
#include <__algorithm/simd_utils.h>
16+
#include <__algorithm/unwrap_iter.h>
1417
#include <__config>
18+
#include <__functional/identity.h>
1519
#include <__iterator/iterator_traits.h>
20+
#include <__string/constexpr_c_functions.h>
21+
#include <__type_traits/desugars_to.h>
22+
#include <__type_traits/invoke.h>
23+
#include <__type_traits/is_equality_comparable.h>
24+
#include <__type_traits/is_integral.h>
25+
#include <__type_traits/is_trivially_lexicographically_comparable.h>
26+
#include <__type_traits/is_volatile.h>
27+
28+
#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
29+
# include <cwchar>
30+
#endif
1631

1732
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
1833
# pragma GCC system_header
1934
#endif
2035

36+
_LIBCPP_PUSH_MACROS
37+
#include <__undef_macros>
38+
2139
_LIBCPP_BEGIN_NAMESPACE_STD
2240

23-
template <class _Compare, class _InputIterator1, class _InputIterator2>
41+
template <class _Iter1, class _Sent1, class _Iter2, class _Sent2, class _Proj1, class _Proj2, class _Comp>
2442
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __lexicographical_compare(
25-
_InputIterator1 __first1,
26-
_InputIterator1 __last1,
27-
_InputIterator2 __first2,
28-
_InputIterator2 __last2,
29-
_Compare __comp) {
30-
for (; __first2 != __last2; ++__first1, (void)++__first2) {
31-
if (__first1 == __last1 || __comp(*__first1, *__first2))
43+
_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Sent2 __last2, _Comp& __comp, _Proj1& __proj1, _Proj2& __proj2) {
44+
while (__first2 != __last2) {
45+
if (__first1 == __last1 ||
46+
std::__invoke(__comp, std::__invoke(__proj1, *__first1), std::__invoke(__proj2, *__first2)))
3247
return true;
33-
if (__comp(*__first2, *__first1))
48+
if (std::__invoke(__comp, std::__invoke(__proj2, *__first2), std::__invoke(__proj1, *__first1)))
3449
return false;
50+
++__first1;
51+
++__first2;
3552
}
3653
return false;
3754
}
3855

56+
#if _LIBCPP_STD_VER >= 14
57+
58+
// If the comparison operation is equivalent to < and that is a total order, we know that we can use equality comparison
59+
// on that type instead to extract some information. Furthermore, if equality comparison on that type is trivial, the
60+
// user can't observe that we're calling it. So instead of using the user-provided total order, we use std::mismatch,
61+
// which uses equality comparison (and is vertorized). Additionally, if the type is trivially lexicographically
62+
// comparable, we can go one step further and use std::memcmp directly instead of calling std::mismatch.
63+
template <class _Tp,
64+
class _Proj1,
65+
class _Proj2,
66+
class _Comp,
67+
__enable_if_t<__desugars_to_v<__totally_ordered_less_tag, _Comp, _Tp, _Tp> && !is_volatile<_Tp>::value &&
68+
__libcpp_is_trivially_equality_comparable<_Tp, _Tp>::value &&
69+
__is_identity<_Proj1>::value && __is_identity<_Proj2>::value,
70+
int> = 0>
71+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
72+
__lexicographical_compare(_Tp* __first1, _Tp* __last1, _Tp* __first2, _Tp* __last2, _Comp&, _Proj1&, _Proj2&) {
73+
if constexpr (__is_trivially_lexicographically_comparable_v<_Tp, _Tp>) {
74+
auto __res =
75+
std::__constexpr_memcmp(__first1, __first2, __element_count(std::min(__last1 - __first1, __last2 - __first2)));
76+
if (__res == 0)
77+
return __last1 - __first1 < __last2 - __first2;
78+
return __res < 0;
79+
}
80+
# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
81+
else if constexpr (is_same<__remove_cv_t<_Tp>, wchar_t>::value) {
82+
auto __res = std::__constexpr_wmemcmp(__first1, __first2, std::min(__last1 - __first1, __last2 - __first2));
83+
if (__res == 0)
84+
return __last1 - __first1 < __last2 - __first2;
85+
return __res < 0;
86+
}
87+
# endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
88+
else {
89+
auto __res = std::mismatch(__first1, __last1, __first2, __last2);
90+
if (__res.second == __last2)
91+
return false;
92+
if (__res.first == __last1)
93+
return true;
94+
return *__res.first < *__res.second;
95+
}
96+
}
97+
98+
#endif // _LIBCPP_STD_VER >= 14
99+
39100
template <class _InputIterator1, class _InputIterator2, class _Compare>
40101
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool lexicographical_compare(
41102
_InputIterator1 __first1,
42103
_InputIterator1 __last1,
43104
_InputIterator2 __first2,
44105
_InputIterator2 __last2,
45106
_Compare __comp) {
46-
return std::__lexicographical_compare<__comp_ref_type<_Compare> >(__first1, __last1, __first2, __last2, __comp);
107+
__identity __proj;
108+
return std::__lexicographical_compare(
109+
std::__unwrap_iter(__first1),
110+
std::__unwrap_iter(__last1),
111+
std::__unwrap_iter(__first2),
112+
std::__unwrap_iter(__last2),
113+
__comp,
114+
__proj,
115+
__proj);
47116
}
48117

49118
template <class _InputIterator1, class _InputIterator2>
@@ -54,4 +123,6 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 boo
54123

55124
_LIBCPP_END_NAMESPACE_STD
56125

126+
_LIBCPP_POP_MACROS
127+
57128
#endif // _LIBCPP___ALGORITHM_LEXICOGRAPHICAL_COMPARE_H

libcxx/include/__algorithm/ranges_lexicographical_compare.h

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
#ifndef _LIBCPP___ALGORITHM_RANGES_LEXICOGRAPHICAL_COMPARE_H
1010
#define _LIBCPP___ALGORITHM_RANGES_LEXICOGRAPHICAL_COMPARE_H
1111

12+
#include <__algorithm/lexicographical_compare.h>
13+
#include <__algorithm/unwrap_range.h>
1214
#include <__config>
1315
#include <__functional/identity.h>
1416
#include <__functional/invoke.h>
@@ -33,23 +35,24 @@ _LIBCPP_BEGIN_NAMESPACE_STD
3335
namespace ranges {
3436
struct __lexicographical_compare {
3537
template <class _Iter1, class _Sent1, class _Iter2, class _Sent2, class _Proj1, class _Proj2, class _Comp>
36-
_LIBCPP_HIDE_FROM_ABI constexpr static bool __lexicographical_compare_impl(
38+
static _LIBCPP_HIDE_FROM_ABI constexpr bool __lexicographical_compare_unwrap(
3739
_Iter1 __first1,
3840
_Sent1 __last1,
3941
_Iter2 __first2,
4042
_Sent2 __last2,
4143
_Comp& __comp,
4244
_Proj1& __proj1,
4345
_Proj2& __proj2) {
44-
while (__first2 != __last2) {
45-
if (__first1 == __last1 || std::invoke(__comp, std::invoke(__proj1, *__first1), std::invoke(__proj2, *__first2)))
46-
return true;
47-
if (std::invoke(__comp, std::invoke(__proj2, *__first2), std::invoke(__proj1, *__first1)))
48-
return false;
49-
++__first1;
50-
++__first2;
51-
}
52-
return false;
46+
auto [__first1_un, __last1_un] = std::__unwrap_range(std::move(__first1), std::move(__last1));
47+
auto [__first2_un, __last2_un] = std::__unwrap_range(std::move(__first2), std::move(__last2));
48+
return std::__lexicographical_compare(
49+
std::move(__first1_un),
50+
std::move(__last1_un),
51+
std::move(__first2_un),
52+
std::move(__last2_un),
53+
__comp,
54+
__proj1,
55+
__proj2);
5356
}
5457

5558
template <input_iterator _Iter1,
@@ -67,7 +70,7 @@ struct __lexicographical_compare {
6770
_Comp __comp = {},
6871
_Proj1 __proj1 = {},
6972
_Proj2 __proj2 = {}) const {
70-
return __lexicographical_compare_impl(
73+
return __lexicographical_compare_unwrap(
7174
std::move(__first1), std::move(__last1), std::move(__first2), std::move(__last2), __comp, __proj1, __proj2);
7275
}
7376

@@ -79,7 +82,7 @@ struct __lexicographical_compare {
7982
_Comp = ranges::less>
8083
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(
8184
_Range1&& __range1, _Range2&& __range2, _Comp __comp = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const {
82-
return __lexicographical_compare_impl(
85+
return __lexicographical_compare_unwrap(
8386
ranges::begin(__range1),
8487
ranges::end(__range1),
8588
ranges::begin(__range2),

libcxx/include/__algorithm/ranges_minmax.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ struct __minmax {
8888
// vectorize the code.
8989
if constexpr (contiguous_range<_Range> && is_integral_v<_ValueT> &&
9090
__is_cheap_to_copy<_ValueT> & __is_identity<_Proj>::value &&
91-
__desugars_to_v<__less_tag, _Comp, _ValueT, _ValueT>) {
91+
__desugars_to_v<__totally_ordered_less_tag, _Comp, _ValueT, _ValueT>) {
9292
minmax_result<_ValueT> __result = {__r[0], __r[0]};
9393
for (auto __e : __r) {
9494
if (__e < __result.min)

libcxx/include/__functional/operations.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include <__functional/binary_function.h>
1515
#include <__functional/unary_function.h>
1616
#include <__type_traits/desugars_to.h>
17+
#include <__type_traits/is_integral.h>
1718
#include <__utility/forward.h>
1819

1920
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -362,7 +363,7 @@ struct _LIBCPP_TEMPLATE_VIS less : __binary_function<_Tp, _Tp, bool> {
362363
_LIBCPP_CTAD_SUPPORTED_FOR_TYPE(less);
363364

364365
template <class _Tp>
365-
inline const bool __desugars_to_v<__less_tag, less<_Tp>, _Tp, _Tp> = true;
366+
inline const bool __desugars_to_v<__totally_ordered_less_tag, less<_Tp>, _Tp, _Tp> = is_integral<_Tp>::value;
366367

367368
#if _LIBCPP_STD_VER >= 14
368369
template <>
@@ -377,7 +378,7 @@ struct _LIBCPP_TEMPLATE_VIS less<void> {
377378
};
378379

379380
template <class _Tp>
380-
inline const bool __desugars_to_v<__less_tag, less<>, _Tp, _Tp> = true;
381+
inline const bool __desugars_to_v<__totally_ordered_less_tag, less<>, _Tp, _Tp> = is_integral<_Tp>::value;
381382
#endif
382383

383384
#if _LIBCPP_STD_VER >= 14

libcxx/include/__functional/ranges_operations.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ template <class _Tp, class _Up>
100100
inline const bool __desugars_to_v<__equal_tag, ranges::equal_to, _Tp, _Up> = true;
101101

102102
template <class _Tp, class _Up>
103-
inline const bool __desugars_to_v<__less_tag, ranges::less, _Tp, _Up> = true;
103+
inline const bool __desugars_to_v<__totally_ordered_less_tag, ranges::less, _Tp, _Up> = true;
104104

105105
#endif // _LIBCPP_STD_VER >= 20
106106

libcxx/include/__string/constexpr_c_functions.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,13 +64,13 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 size_t __constexpr_st
6464
return __builtin_strlen(reinterpret_cast<const char*>(__str));
6565
}
6666

67-
// Because of __libcpp_is_trivially_lexicographically_comparable we know that comparing the object representations is
67+
// Because of __is_trivially_lexicographically_comparable_v we know that comparing the object representations is
6868
// equivalent to a std::memcmp. Since we have multiple objects contiguously in memory, we can call memcmp once instead
6969
// of invoking it on every object individually.
7070
template <class _Tp, class _Up>
7171
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int
7272
__constexpr_memcmp(const _Tp* __lhs, const _Up* __rhs, __element_count __n) {
73-
static_assert(__libcpp_is_trivially_lexicographically_comparable<_Tp, _Up>::value,
73+
static_assert(__is_trivially_lexicographically_comparable_v<_Tp, _Up>,
7474
"_Tp and _Up have to be trivially lexicographically comparable");
7575

7676
auto __count = static_cast<size_t>(__n);

libcxx/include/__type_traits/desugars_to.h

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,21 @@
1717

1818
_LIBCPP_BEGIN_NAMESPACE_STD
1919

20-
// Tags to represent the canonical operations
20+
// Tags to represent the canonical operations.
21+
22+
// syntactically, the operation is equivalent to calling `a == b`
2123
struct __equal_tag {};
24+
25+
// syntactically, the operation is equivalent to calling `a + b`
2226
struct __plus_tag {};
23-
struct __less_tag {};
27+
28+
// syntactically, the operation is equivalent to calling `a < b`, and these expressions
29+
// have to be true for any `a` and `b`:
30+
// - `(a < b) == (b > a)`
31+
// - `(!(a < b) && !(b < a)) == (a == b)`
32+
// For example, this is satisfied for std::less on integral types, but also for ranges::less on all types due to
33+
// additional semantic requirements on that operation.
34+
struct __totally_ordered_less_tag {};
2435

2536
// This class template is used to determine whether an operation "desugars"
2637
// (or boils down) to a given canonical operation.

libcxx/include/__type_traits/is_trivially_lexicographically_comparable.h

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <__type_traits/remove_cv.h>
1717
#include <__type_traits/void_t.h>
1818
#include <__utility/declval.h>
19+
#include <cstddef>
1920

2021
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
2122
# pragma GCC system_header
@@ -40,13 +41,22 @@ _LIBCPP_BEGIN_NAMESPACE_STD
4041
// unsigned integer types with sizeof(T) > 1: depending on the endianness, the LSB might be the first byte to be
4142
// compared. This means that when comparing unsigned(129) and unsigned(2)
4243
// using memcmp(), the result would be that 2 > 129.
43-
// TODO: Do we want to enable this on big-endian systems?
44+
45+
template <class _Tp>
46+
inline const bool __is_std_byte_v = false;
47+
48+
#if _LIBCPP_STD_VER >= 17
49+
template <>
50+
inline const bool __is_std_byte_v<byte> = true;
51+
#endif
4452

4553
template <class _Tp, class _Up>
46-
struct __libcpp_is_trivially_lexicographically_comparable
47-
: integral_constant<bool,
48-
is_same<__remove_cv_t<_Tp>, __remove_cv_t<_Up> >::value && sizeof(_Tp) == 1 &&
49-
is_unsigned<_Tp>::value> {};
54+
inline const bool __is_trivially_lexicographically_comparable_v =
55+
is_same<__remove_cv_t<_Tp>, __remove_cv_t<_Up> >::value &&
56+
#ifdef _LIBCPP_LITTLE_ENDIAN
57+
sizeof(_Tp) == 1 &&
58+
#endif
59+
(is_unsigned<_Tp>::value || __is_std_byte_v<_Tp>);
5060

5161
_LIBCPP_END_NAMESPACE_STD
5262

libcxx/test/benchmarks/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ set(BENCHMARK_TESTS
114114
algorithms/find.bench.cpp
115115
algorithms/fill.bench.cpp
116116
algorithms/for_each.bench.cpp
117+
algorithms/lexicographical_compare.bench.cpp
117118
algorithms/lower_bound.bench.cpp
118119
algorithms/make_heap.bench.cpp
119120
algorithms/make_heap_then_sort_heap.bench.cpp
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <algorithm>
10+
#include <benchmark/benchmark.h>
11+
#include <vector>
12+
13+
// Benchmarks the worst case: check the whole range just to find out that they compare equal
14+
template <class T>
15+
static void bm_lexicographical_compare(benchmark::State& state) {
16+
std::vector<T> vec1(state.range(), '1');
17+
std::vector<T> vec2(state.range(), '1');
18+
19+
for (auto _ : state) {
20+
benchmark::DoNotOptimize(vec1);
21+
benchmark::DoNotOptimize(vec2);
22+
benchmark::DoNotOptimize(std::lexicographical_compare(vec1.begin(), vec1.end(), vec2.begin(), vec2.end()));
23+
}
24+
}
25+
BENCHMARK(bm_lexicographical_compare<unsigned char>)->DenseRange(1, 8)->Range(16, 1 << 20);
26+
BENCHMARK(bm_lexicographical_compare<signed char>)->DenseRange(1, 8)->Range(16, 1 << 20);
27+
BENCHMARK(bm_lexicographical_compare<int>)->DenseRange(1, 8)->Range(16, 1 << 20);
28+
29+
template <class T>
30+
static void bm_ranges_lexicographical_compare(benchmark::State& state) {
31+
std::vector<T> vec1(state.range(), '1');
32+
std::vector<T> vec2(state.range(), '1');
33+
34+
for (auto _ : state) {
35+
benchmark::DoNotOptimize(vec1);
36+
benchmark::DoNotOptimize(vec2);
37+
benchmark::DoNotOptimize(std::ranges::lexicographical_compare(vec1.begin(), vec1.end(), vec2.begin(), vec2.end()));
38+
}
39+
}
40+
BENCHMARK(bm_ranges_lexicographical_compare<unsigned char>)->DenseRange(1, 8)->Range(16, 1 << 20);
41+
BENCHMARK(bm_ranges_lexicographical_compare<signed char>)->DenseRange(1, 8)->Range(16, 1 << 20);
42+
BENCHMARK(bm_ranges_lexicographical_compare<int>)->DenseRange(1, 8)->Range(16, 1 << 20);
43+
44+
BENCHMARK_MAIN();

0 commit comments

Comments
 (0)