Skip to content

Commit 098b68d

Browse files
committed
Optimize ranges::equal for vector<bool>::iterator
1 parent 6e3631d commit 098b68d

File tree

6 files changed

+417
-217
lines changed

6 files changed

+417
-217
lines changed

libcxx/include/__algorithm/equal.h

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,27 @@
1111
#define _LIBCPP___ALGORITHM_EQUAL_H
1212

1313
#include <__algorithm/comp.h>
14+
#include <__algorithm/min.h>
1415
#include <__algorithm/unwrap_iter.h>
1516
#include <__config>
1617
#include <__functional/identity.h>
18+
#include <__fwd/bit_reference.h>
1719
#include <__iterator/distance.h>
1820
#include <__iterator/iterator_traits.h>
21+
#include <__memory/pointer_traits.h>
1922
#include <__string/constexpr_c_functions.h>
2023
#include <__type_traits/desugars_to.h>
2124
#include <__type_traits/enable_if.h>
2225
#include <__type_traits/invoke.h>
2326
#include <__type_traits/is_equality_comparable.h>
27+
#include <__type_traits/is_same.h>
2428
#include <__type_traits/is_volatile.h>
2529
#include <__utility/move.h>
2630

31+
#if _LIBCPP_STD_VER >= 20
32+
# include <__functional/ranges_operations.h>
33+
#endif
34+
2735
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
2836
# pragma GCC system_header
2937
#endif
@@ -33,6 +41,132 @@ _LIBCPP_PUSH_MACROS
3341

3442
_LIBCPP_BEGIN_NAMESPACE_STD
3543

44+
template <class _Cp, bool _IC1, bool _IC2>
45+
[[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool __equal_unaligned(
46+
__bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, __bit_iterator<_Cp, _IC2> __first2) {
47+
using _It = __bit_iterator<_Cp, _IC1>;
48+
using difference_type = typename _It::difference_type;
49+
using __storage_type = typename _It::__storage_type;
50+
51+
const int __bits_per_word = _It::__bits_per_word;
52+
difference_type __n = __last1 - __first1;
53+
if (__n > 0) {
54+
// do first word
55+
if (__first1.__ctz_ != 0) {
56+
unsigned __clz_f = __bits_per_word - __first1.__ctz_;
57+
difference_type __dn = std::min(static_cast<difference_type>(__clz_f), __n);
58+
__n -= __dn;
59+
__storage_type __m = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
60+
__storage_type __b = *__first1.__seg_ & __m;
61+
unsigned __clz_r = __bits_per_word - __first2.__ctz_;
62+
__storage_type __ddn = std::min<__storage_type>(__dn, __clz_r);
63+
__m = (~__storage_type(0) << __first2.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn));
64+
if (__first2.__ctz_ > __first1.__ctz_) {
65+
if ((*__first2.__seg_ & __m) != (__b << (__first2.__ctz_ - __first1.__ctz_)))
66+
return false;
67+
} else {
68+
if ((*__first2.__seg_ & __m) != (__b >> (__first1.__ctz_ - __first2.__ctz_)))
69+
return false;
70+
}
71+
__first2.__seg_ += (__ddn + __first2.__ctz_) / __bits_per_word;
72+
__first2.__ctz_ = static_cast<unsigned>((__ddn + __first2.__ctz_) % __bits_per_word);
73+
__dn -= __ddn;
74+
if (__dn > 0) {
75+
__m = ~__storage_type(0) >> (__bits_per_word - __dn);
76+
if ((*__first2.__seg_ & __m) != (__b >> (__first1.__ctz_ + __ddn)))
77+
return false;
78+
__first2.__ctz_ = static_cast<unsigned>(__dn);
79+
}
80+
++__first1.__seg_;
81+
// __first1.__ctz_ = 0;
82+
}
83+
// __first1.__ctz_ == 0;
84+
// do middle words
85+
unsigned __clz_r = __bits_per_word - __first2.__ctz_;
86+
__storage_type __m = ~__storage_type(0) << __first2.__ctz_;
87+
for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_) {
88+
__storage_type __b = *__first1.__seg_;
89+
if ((*__first2.__seg_ & __m) != (__b << __first2.__ctz_))
90+
return false;
91+
++__first2.__seg_;
92+
if ((*__first2.__seg_ & ~__m) != (__b >> __clz_r))
93+
return false;
94+
}
95+
// do last word
96+
if (__n > 0) {
97+
__m = ~__storage_type(0) >> (__bits_per_word - __n);
98+
__storage_type __b = *__first1.__seg_ & __m;
99+
__storage_type __dn = std::min(__n, static_cast<difference_type>(__clz_r));
100+
__m = (~__storage_type(0) << __first2.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn));
101+
if ((*__first2.__seg_ & __m) != (__b << __first2.__ctz_))
102+
return false;
103+
__first2.__seg_ += (__dn + __first2.__ctz_) / __bits_per_word;
104+
__first2.__ctz_ = static_cast<unsigned>((__dn + __first2.__ctz_) % __bits_per_word);
105+
__n -= __dn;
106+
if (__n > 0) {
107+
__m = ~__storage_type(0) >> (__bits_per_word - __n);
108+
if ((*__first2.__seg_ & __m) != (__b >> __dn))
109+
return false;
110+
}
111+
}
112+
}
113+
return true;
114+
}
115+
116+
template <class _Cp, bool _IC1, bool _IC2>
117+
[[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool __equal_aligned(
118+
__bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, __bit_iterator<_Cp, _IC2> __first2) {
119+
using _It = __bit_iterator<_Cp, _IC1>;
120+
using difference_type = typename _It::difference_type;
121+
using __storage_type = typename _It::__storage_type;
122+
123+
const int __bits_per_word = _It::__bits_per_word;
124+
difference_type __n = __last1 - __first1;
125+
if (__n > 0) {
126+
// do first word
127+
if (__first1.__ctz_ != 0) {
128+
unsigned __clz = __bits_per_word - __first1.__ctz_;
129+
difference_type __dn = std::min(static_cast<difference_type>(__clz), __n);
130+
__n -= __dn;
131+
__storage_type __m = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz - __dn));
132+
if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m))
133+
return false;
134+
++__first2.__seg_;
135+
++__first1.__seg_;
136+
// __first1.__ctz_ = 0;
137+
// __first2.__ctz_ = 0;
138+
}
139+
// __first1.__ctz_ == 0;
140+
// __first2.__ctz_ == 0;
141+
// do middle words
142+
for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_, ++__first2.__seg_)
143+
if (*__first2.__seg_ != *__first1.__seg_)
144+
return false;
145+
// do last word
146+
if (__n > 0) {
147+
__storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
148+
if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m))
149+
return false;
150+
}
151+
}
152+
return true;
153+
}
154+
155+
template <class _Cp,
156+
bool _IC1,
157+
bool _IC2,
158+
class _BinaryPredicate,
159+
__enable_if_t<std::is_same<_BinaryPredicate, __equal_to>::value, int> = 0>
160+
[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_iter_impl(
161+
__bit_iterator<_Cp, _IC1> __first1,
162+
__bit_iterator<_Cp, _IC1> __last1,
163+
__bit_iterator<_Cp, _IC2> __first2,
164+
_BinaryPredicate) {
165+
if (__first1.__ctz_ == __first2.__ctz_)
166+
return std::__equal_aligned(__first1, __last1, __first2);
167+
return std::__equal_unaligned(__first1, __last1, __first2);
168+
}
169+
36170
template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate>
37171
[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_iter_impl(
38172
_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _BinaryPredicate& __pred) {
@@ -94,6 +228,31 @@ __equal_impl(_Tp* __first1, _Tp* __last1, _Up* __first2, _Up*, _Pred&, _Proj1&,
94228
return std::__constexpr_memcmp_equal(__first1, __first2, __element_count(__last1 - __first1));
95229
}
96230

231+
template <class _Cp,
232+
bool _IC1,
233+
bool _IC2,
234+
class _Pred,
235+
class _Proj1,
236+
class _Proj2,
237+
__enable_if_t<(is_same<_Pred, __equal_to>::value
238+
# if _LIBCPP_STD_VER >= 20
239+
|| is_same<_Pred, ranges::equal_to>::value
240+
# endif
241+
) &&
242+
__desugars_to_v<__equal_tag, _Pred, bool, bool> && __is_identity<_Proj1>::value &&
243+
__is_identity<_Proj2>::value,
244+
int> = 0>
245+
[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_impl(
246+
__bit_iterator<_Cp, _IC1> __first1,
247+
__bit_iterator<_Cp, _IC1> __last1,
248+
__bit_iterator<_Cp, _IC2> __first2,
249+
__bit_iterator<_Cp, _IC2>,
250+
_Pred&,
251+
_Proj1&,
252+
_Proj2&) {
253+
return std::__equal_iter_impl(__first1, __last1, __first2, __equal_to());
254+
}
255+
97256
template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate>
98257
[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
99258
equal(_InputIterator1 __first1,

libcxx/include/__bit_reference

Lines changed: 11 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@
1010
#ifndef _LIBCPP___BIT_REFERENCE
1111
#define _LIBCPP___BIT_REFERENCE
1212

13+
#include <__algorithm/comp.h>
1314
#include <__algorithm/copy_n.h>
15+
#include <__algorithm/equal.h>
1416
#include <__algorithm/min.h>
1517
#include <__bit/countr.h>
1618
#include <__compare/ordering.h>
@@ -21,7 +23,9 @@
2123
#include <__memory/construct_at.h>
2224
#include <__memory/pointer_traits.h>
2325
#include <__type_traits/conditional.h>
26+
#include <__type_traits/enable_if.h>
2427
#include <__type_traits/is_constant_evaluated.h>
28+
#include <__type_traits/is_same.h>
2529
#include <__utility/swap.h>
2630

2731
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -655,127 +659,6 @@ rotate(__bit_iterator<_Cp, false> __first, __bit_iterator<_Cp, false> __middle,
655659
return __r;
656660
}
657661

658-
// equal
659-
660-
template <class _Cp, bool _IC1, bool _IC2>
661-
_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool __equal_unaligned(
662-
__bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, __bit_iterator<_Cp, _IC2> __first2) {
663-
using _It = __bit_iterator<_Cp, _IC1>;
664-
using difference_type = typename _It::difference_type;
665-
using __storage_type = typename _It::__storage_type;
666-
667-
const int __bits_per_word = _It::__bits_per_word;
668-
difference_type __n = __last1 - __first1;
669-
if (__n > 0) {
670-
// do first word
671-
if (__first1.__ctz_ != 0) {
672-
unsigned __clz_f = __bits_per_word - __first1.__ctz_;
673-
difference_type __dn = std::min(static_cast<difference_type>(__clz_f), __n);
674-
__n -= __dn;
675-
__storage_type __m = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
676-
__storage_type __b = *__first1.__seg_ & __m;
677-
unsigned __clz_r = __bits_per_word - __first2.__ctz_;
678-
__storage_type __ddn = std::min<__storage_type>(__dn, __clz_r);
679-
__m = (~__storage_type(0) << __first2.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn));
680-
if (__first2.__ctz_ > __first1.__ctz_) {
681-
if ((*__first2.__seg_ & __m) != (__b << (__first2.__ctz_ - __first1.__ctz_)))
682-
return false;
683-
} else {
684-
if ((*__first2.__seg_ & __m) != (__b >> (__first1.__ctz_ - __first2.__ctz_)))
685-
return false;
686-
}
687-
__first2.__seg_ += (__ddn + __first2.__ctz_) / __bits_per_word;
688-
__first2.__ctz_ = static_cast<unsigned>((__ddn + __first2.__ctz_) % __bits_per_word);
689-
__dn -= __ddn;
690-
if (__dn > 0) {
691-
__m = ~__storage_type(0) >> (__bits_per_word - __dn);
692-
if ((*__first2.__seg_ & __m) != (__b >> (__first1.__ctz_ + __ddn)))
693-
return false;
694-
__first2.__ctz_ = static_cast<unsigned>(__dn);
695-
}
696-
++__first1.__seg_;
697-
// __first1.__ctz_ = 0;
698-
}
699-
// __first1.__ctz_ == 0;
700-
// do middle words
701-
unsigned __clz_r = __bits_per_word - __first2.__ctz_;
702-
__storage_type __m = ~__storage_type(0) << __first2.__ctz_;
703-
for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_) {
704-
__storage_type __b = *__first1.__seg_;
705-
if ((*__first2.__seg_ & __m) != (__b << __first2.__ctz_))
706-
return false;
707-
++__first2.__seg_;
708-
if ((*__first2.__seg_ & ~__m) != (__b >> __clz_r))
709-
return false;
710-
}
711-
// do last word
712-
if (__n > 0) {
713-
__m = ~__storage_type(0) >> (__bits_per_word - __n);
714-
__storage_type __b = *__first1.__seg_ & __m;
715-
__storage_type __dn = std::min(__n, static_cast<difference_type>(__clz_r));
716-
__m = (~__storage_type(0) << __first2.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn));
717-
if ((*__first2.__seg_ & __m) != (__b << __first2.__ctz_))
718-
return false;
719-
__first2.__seg_ += (__dn + __first2.__ctz_) / __bits_per_word;
720-
__first2.__ctz_ = static_cast<unsigned>((__dn + __first2.__ctz_) % __bits_per_word);
721-
__n -= __dn;
722-
if (__n > 0) {
723-
__m = ~__storage_type(0) >> (__bits_per_word - __n);
724-
if ((*__first2.__seg_ & __m) != (__b >> __dn))
725-
return false;
726-
}
727-
}
728-
}
729-
return true;
730-
}
731-
732-
template <class _Cp, bool _IC1, bool _IC2>
733-
_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool __equal_aligned(
734-
__bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, __bit_iterator<_Cp, _IC2> __first2) {
735-
using _It = __bit_iterator<_Cp, _IC1>;
736-
using difference_type = typename _It::difference_type;
737-
using __storage_type = typename _It::__storage_type;
738-
739-
const int __bits_per_word = _It::__bits_per_word;
740-
difference_type __n = __last1 - __first1;
741-
if (__n > 0) {
742-
// do first word
743-
if (__first1.__ctz_ != 0) {
744-
unsigned __clz = __bits_per_word - __first1.__ctz_;
745-
difference_type __dn = std::min(static_cast<difference_type>(__clz), __n);
746-
__n -= __dn;
747-
__storage_type __m = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz - __dn));
748-
if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m))
749-
return false;
750-
++__first2.__seg_;
751-
++__first1.__seg_;
752-
// __first1.__ctz_ = 0;
753-
// __first2.__ctz_ = 0;
754-
}
755-
// __first1.__ctz_ == 0;
756-
// __first2.__ctz_ == 0;
757-
// do middle words
758-
for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_, ++__first2.__seg_)
759-
if (*__first2.__seg_ != *__first1.__seg_)
760-
return false;
761-
// do last word
762-
if (__n > 0) {
763-
__storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
764-
if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m))
765-
return false;
766-
}
767-
}
768-
return true;
769-
}
770-
771-
template <class _Cp, bool _IC1, bool _IC2>
772-
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
773-
equal(__bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, __bit_iterator<_Cp, _IC2> __first2) {
774-
if (__first1.__ctz_ == __first2.__ctz_)
775-
return std::__equal_aligned(__first1, __last1, __first2);
776-
return std::__equal_unaligned(__first1, __last1, __first2);
777-
}
778-
779662
template <class _Cp, bool _IsConst, typename _Cp::__storage_type>
780663
class __bit_iterator {
781664
public:
@@ -1004,9 +887,13 @@ private:
1004887
template <class _Dp, bool _IC1, bool _IC2>
1005888
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend bool
1006889
__equal_unaligned(__bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC2>);
1007-
template <class _Dp, bool _IC1, bool _IC2>
1008-
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend bool
1009-
equal(__bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC2>);
890+
template <class _Dp,
891+
bool _IC1,
892+
bool _IC2,
893+
class _BinaryPredicate,
894+
__enable_if_t<std::is_same<_BinaryPredicate, __equal_to>::value, int> >
895+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 friend bool __equal_iter_impl(
896+
__bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC2>, _BinaryPredicate);
1010897
template <bool _ToFind, class _Dp, bool _IC>
1011898
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, _IC>
1012899
__find_bool(__bit_iterator<_Dp, _IC>, typename _Dp::size_type);

libcxx/include/bitset

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ template <size_t N> struct hash<std::bitset<N>>;
130130
# include <__cxx03/bitset>
131131
#else
132132
# include <__algorithm/count.h>
133+
# include <__algorithm/equal.h>
133134
# include <__algorithm/fill.h>
134135
# include <__algorithm/fill_n.h>
135136
# include <__algorithm/find.h>

0 commit comments

Comments
 (0)