Skip to content

[libc++] Optimize ranges::equal for vector<bool>::iterator #121084

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Feb 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions libcxx/docs/ReleaseNotes/21.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ Improvements and New Features
- The ``std::ranges::{copy, copy_n, copy_backward, move, move_backward}`` algorithms have been optimized for
``std::vector<bool>::iterator``, resulting in a performance improvement of up to 2000x.

- The ``std::ranges::equal`` algorithm has been optimized for ``std::vector<bool>::iterator``, resulting in a performance
improvement of up to 188x.

- Updated formatting library to Unicode 16.0.0.

Deprecations and Removals
Expand Down
156 changes: 156 additions & 0 deletions libcxx/include/__algorithm/equal.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,20 @@
#define _LIBCPP___ALGORITHM_EQUAL_H

#include <__algorithm/comp.h>
#include <__algorithm/min.h>
#include <__algorithm/unwrap_iter.h>
#include <__config>
#include <__functional/identity.h>
#include <__fwd/bit_reference.h>
#include <__iterator/distance.h>
#include <__iterator/iterator_traits.h>
#include <__memory/pointer_traits.h>
#include <__string/constexpr_c_functions.h>
#include <__type_traits/desugars_to.h>
#include <__type_traits/enable_if.h>
#include <__type_traits/invoke.h>
#include <__type_traits/is_equality_comparable.h>
#include <__type_traits/is_same.h>
#include <__type_traits/is_volatile.h>
#include <__utility/move.h>

Expand All @@ -33,6 +37,136 @@ _LIBCPP_PUSH_MACROS

_LIBCPP_BEGIN_NAMESPACE_STD

template <class _Cp, bool _IsConst1, bool _IsConst2>
[[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool
__equal_unaligned(__bit_iterator<_Cp, _IsConst1> __first1,
__bit_iterator<_Cp, _IsConst1> __last1,
__bit_iterator<_Cp, _IsConst2> __first2) {
using _It = __bit_iterator<_Cp, _IsConst1>;
using difference_type = typename _It::difference_type;
using __storage_type = typename _It::__storage_type;

const int __bits_per_word = _It::__bits_per_word;
difference_type __n = __last1 - __first1;
if (__n > 0) {
// do first word
if (__first1.__ctz_ != 0) {
unsigned __clz_f = __bits_per_word - __first1.__ctz_;
difference_type __dn = std::min(static_cast<difference_type>(__clz_f), __n);
__n -= __dn;
__storage_type __m = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
__storage_type __b = *__first1.__seg_ & __m;
unsigned __clz_r = __bits_per_word - __first2.__ctz_;
__storage_type __ddn = std::min<__storage_type>(__dn, __clz_r);
__m = (~__storage_type(0) << __first2.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn));
if (__first2.__ctz_ > __first1.__ctz_) {
if ((*__first2.__seg_ & __m) != (__b << (__first2.__ctz_ - __first1.__ctz_)))
return false;
} else {
if ((*__first2.__seg_ & __m) != (__b >> (__first1.__ctz_ - __first2.__ctz_)))
return false;
}
__first2.__seg_ += (__ddn + __first2.__ctz_) / __bits_per_word;
__first2.__ctz_ = static_cast<unsigned>((__ddn + __first2.__ctz_) % __bits_per_word);
__dn -= __ddn;
if (__dn > 0) {
__m = ~__storage_type(0) >> (__bits_per_word - __dn);
if ((*__first2.__seg_ & __m) != (__b >> (__first1.__ctz_ + __ddn)))
return false;
__first2.__ctz_ = static_cast<unsigned>(__dn);
}
++__first1.__seg_;
// __first1.__ctz_ = 0;
}
// __first1.__ctz_ == 0;
// do middle words
unsigned __clz_r = __bits_per_word - __first2.__ctz_;
__storage_type __m = ~__storage_type(0) << __first2.__ctz_;
for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_) {
__storage_type __b = *__first1.__seg_;
if ((*__first2.__seg_ & __m) != (__b << __first2.__ctz_))
return false;
++__first2.__seg_;
if ((*__first2.__seg_ & ~__m) != (__b >> __clz_r))
return false;
}
// do last word
if (__n > 0) {
__m = ~__storage_type(0) >> (__bits_per_word - __n);
__storage_type __b = *__first1.__seg_ & __m;
__storage_type __dn = std::min(__n, static_cast<difference_type>(__clz_r));
__m = (~__storage_type(0) << __first2.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn));
if ((*__first2.__seg_ & __m) != (__b << __first2.__ctz_))
return false;
__first2.__seg_ += (__dn + __first2.__ctz_) / __bits_per_word;
__first2.__ctz_ = static_cast<unsigned>((__dn + __first2.__ctz_) % __bits_per_word);
__n -= __dn;
if (__n > 0) {
__m = ~__storage_type(0) >> (__bits_per_word - __n);
if ((*__first2.__seg_ & __m) != (__b >> __dn))
return false;
}
}
}
return true;
}

template <class _Cp, bool _IsConst1, bool _IsConst2>
[[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool
__equal_aligned(__bit_iterator<_Cp, _IsConst1> __first1,
__bit_iterator<_Cp, _IsConst1> __last1,
__bit_iterator<_Cp, _IsConst2> __first2) {
using _It = __bit_iterator<_Cp, _IsConst1>;
using difference_type = typename _It::difference_type;
using __storage_type = typename _It::__storage_type;

const int __bits_per_word = _It::__bits_per_word;
difference_type __n = __last1 - __first1;
if (__n > 0) {
// do first word
if (__first1.__ctz_ != 0) {
unsigned __clz = __bits_per_word - __first1.__ctz_;
difference_type __dn = std::min(static_cast<difference_type>(__clz), __n);
__n -= __dn;
__storage_type __m = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz - __dn));
if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m))
return false;
++__first2.__seg_;
++__first1.__seg_;
// __first1.__ctz_ = 0;
// __first2.__ctz_ = 0;
}
// __first1.__ctz_ == 0;
// __first2.__ctz_ == 0;
// do middle words
for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_, ++__first2.__seg_)
if (*__first2.__seg_ != *__first1.__seg_)
return false;
// do last word
if (__n > 0) {
__storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m))
return false;
}
}
return true;
}

template <class _Cp,
bool _IsConst1,
bool _IsConst2,
class _BinaryPredicate,
__enable_if_t<__desugars_to_v<__equal_tag, _BinaryPredicate, bool, bool>, int> = 0>
[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_iter_impl(
__bit_iterator<_Cp, _IsConst1> __first1,
__bit_iterator<_Cp, _IsConst1> __last1,
__bit_iterator<_Cp, _IsConst2> __first2,
_BinaryPredicate) {
if (__first1.__ctz_ == __first2.__ctz_)
return std::__equal_aligned(__first1, __last1, __first2);
return std::__equal_unaligned(__first1, __last1, __first2);
}

template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate>
[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_iter_impl(
_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _BinaryPredicate& __pred) {
Expand Down Expand Up @@ -94,6 +228,28 @@ __equal_impl(_Tp* __first1, _Tp* __last1, _Up* __first2, _Up*, _Pred&, _Proj1&,
return std::__constexpr_memcmp_equal(__first1, __first2, __element_count(__last1 - __first1));
}

template <class _Cp,
bool _IsConst1,
bool _IsConst2,
class _Pred,
class _Proj1,
class _Proj2,
__enable_if_t<__desugars_to_v<__equal_tag, _Pred, bool, bool> && __is_identity<_Proj1>::value &&
__is_identity<_Proj2>::value,
int> = 0>
[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_impl(
__bit_iterator<_Cp, _IsConst1> __first1,
__bit_iterator<_Cp, _IsConst1> __last1,
__bit_iterator<_Cp, _IsConst2> __first2,
__bit_iterator<_Cp, _IsConst2>,
_Pred&,
_Proj1&,
_Proj2&) {
if (__first1.__ctz_ == __first2.__ctz_)
return std::__equal_aligned(__first1, __last1, __first2);
return std::__equal_unaligned(__first1, __last1, __first2);
}

template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate>
[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
equal(_InputIterator1 __first1,
Expand Down
161 changes: 33 additions & 128 deletions libcxx/include/__bit_reference
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,28 @@
#ifndef _LIBCPP___BIT_REFERENCE
#define _LIBCPP___BIT_REFERENCE

#include <__algorithm/comp.h>
#include <__algorithm/copy.h>
#include <__algorithm/copy_backward.h>
#include <__algorithm/copy_n.h>
#include <__algorithm/equal.h>
#include <__algorithm/min.h>
#include <__assert>
#include <__bit/countr.h>
#include <__compare/ordering.h>
#include <__config>
#include <__cstddef/ptrdiff_t.h>
#include <__cstddef/size_t.h>
#include <__functional/identity.h>
#include <__fwd/bit_reference.h>
#include <__iterator/iterator_traits.h>
#include <__memory/construct_at.h>
#include <__memory/pointer_traits.h>
#include <__type_traits/conditional.h>
#include <__type_traits/desugars_to.h>
#include <__type_traits/enable_if.h>
#include <__type_traits/is_constant_evaluated.h>
#include <__type_traits/is_same.h>
#include <__type_traits/is_unsigned.h>
#include <__type_traits/void_t.h>
#include <__utility/pair.h>
Expand Down Expand Up @@ -428,127 +433,6 @@ rotate(__bit_iterator<_Cp, false> __first, __bit_iterator<_Cp, false> __middle,
return __r;
}

// equal

template <class _Cp, bool _IC1, bool _IC2>
_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool __equal_unaligned(
__bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, __bit_iterator<_Cp, _IC2> __first2) {
using _It = __bit_iterator<_Cp, _IC1>;
using difference_type = typename _It::difference_type;
using __storage_type = typename _It::__storage_type;

const int __bits_per_word = _It::__bits_per_word;
difference_type __n = __last1 - __first1;
if (__n > 0) {
// do first word
if (__first1.__ctz_ != 0) {
unsigned __clz_f = __bits_per_word - __first1.__ctz_;
difference_type __dn = std::min(static_cast<difference_type>(__clz_f), __n);
__n -= __dn;
__storage_type __m = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
__storage_type __b = *__first1.__seg_ & __m;
unsigned __clz_r = __bits_per_word - __first2.__ctz_;
__storage_type __ddn = std::min<__storage_type>(__dn, __clz_r);
__m = (~__storage_type(0) << __first2.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn));
if (__first2.__ctz_ > __first1.__ctz_) {
if ((*__first2.__seg_ & __m) != (__b << (__first2.__ctz_ - __first1.__ctz_)))
return false;
} else {
if ((*__first2.__seg_ & __m) != (__b >> (__first1.__ctz_ - __first2.__ctz_)))
return false;
}
__first2.__seg_ += (__ddn + __first2.__ctz_) / __bits_per_word;
__first2.__ctz_ = static_cast<unsigned>((__ddn + __first2.__ctz_) % __bits_per_word);
__dn -= __ddn;
if (__dn > 0) {
__m = ~__storage_type(0) >> (__bits_per_word - __dn);
if ((*__first2.__seg_ & __m) != (__b >> (__first1.__ctz_ + __ddn)))
return false;
__first2.__ctz_ = static_cast<unsigned>(__dn);
}
++__first1.__seg_;
// __first1.__ctz_ = 0;
}
// __first1.__ctz_ == 0;
// do middle words
unsigned __clz_r = __bits_per_word - __first2.__ctz_;
__storage_type __m = ~__storage_type(0) << __first2.__ctz_;
for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_) {
__storage_type __b = *__first1.__seg_;
if ((*__first2.__seg_ & __m) != (__b << __first2.__ctz_))
return false;
++__first2.__seg_;
if ((*__first2.__seg_ & ~__m) != (__b >> __clz_r))
return false;
}
// do last word
if (__n > 0) {
__m = ~__storage_type(0) >> (__bits_per_word - __n);
__storage_type __b = *__first1.__seg_ & __m;
__storage_type __dn = std::min(__n, static_cast<difference_type>(__clz_r));
__m = (~__storage_type(0) << __first2.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn));
if ((*__first2.__seg_ & __m) != (__b << __first2.__ctz_))
return false;
__first2.__seg_ += (__dn + __first2.__ctz_) / __bits_per_word;
__first2.__ctz_ = static_cast<unsigned>((__dn + __first2.__ctz_) % __bits_per_word);
__n -= __dn;
if (__n > 0) {
__m = ~__storage_type(0) >> (__bits_per_word - __n);
if ((*__first2.__seg_ & __m) != (__b >> __dn))
return false;
}
}
}
return true;
}

template <class _Cp, bool _IC1, bool _IC2>
_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool __equal_aligned(
__bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, __bit_iterator<_Cp, _IC2> __first2) {
using _It = __bit_iterator<_Cp, _IC1>;
using difference_type = typename _It::difference_type;
using __storage_type = typename _It::__storage_type;

const int __bits_per_word = _It::__bits_per_word;
difference_type __n = __last1 - __first1;
if (__n > 0) {
// do first word
if (__first1.__ctz_ != 0) {
unsigned __clz = __bits_per_word - __first1.__ctz_;
difference_type __dn = std::min(static_cast<difference_type>(__clz), __n);
__n -= __dn;
__storage_type __m = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz - __dn));
if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m))
return false;
++__first2.__seg_;
++__first1.__seg_;
// __first1.__ctz_ = 0;
// __first2.__ctz_ = 0;
}
// __first1.__ctz_ == 0;
// __first2.__ctz_ == 0;
// do middle words
for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_, ++__first2.__seg_)
if (*__first2.__seg_ != *__first1.__seg_)
return false;
// do last word
if (__n > 0) {
__storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m))
return false;
}
}
return true;
}

template <class _Cp, bool _IC1, bool _IC2>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
equal(__bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, __bit_iterator<_Cp, _IC2> __first2) {
if (__first1.__ctz_ == __first2.__ctz_)
return std::__equal_aligned(__first1, __last1, __first2);
return std::__equal_unaligned(__first1, __last1, __first2);
}

template <class _Cp, bool _IsConst, typename _Cp::__storage_type>
class __bit_iterator {
public:
Expand Down Expand Up @@ -771,15 +655,36 @@ private:
template <class _Dp>
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false>
rotate(__bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>);
template <class _Dp, bool _IC1, bool _IC2>
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend bool
__equal_aligned(__bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC2>);
template <class _Dp, bool _IC1, bool _IC2>
template <class _Dp, bool _IsConst1, bool _IsConst2>
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend bool
__equal_unaligned(__bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC2>);
template <class _Dp, bool _IC1, bool _IC2>
__equal_aligned(__bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst2>);
template <class _Dp, bool _IsConst1, bool _IsConst2>
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend bool
equal(__bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC2>);
__equal_unaligned(__bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst2>);
template <class _Dp,
bool _IsConst1,
bool _IsConst2,
class _BinaryPredicate,
__enable_if_t<__desugars_to_v<__equal_tag, _BinaryPredicate, bool, bool>, int> >
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 friend bool __equal_iter_impl(
__bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst2>, _BinaryPredicate);
template <class _Dp,
bool _IsConst1,
bool _IsConst2,
class _Pred,
class _Proj1,
class _Proj2,
__enable_if_t<__desugars_to_v<__equal_tag, _Pred, bool, bool> && __is_identity<_Proj1>::value &&
__is_identity<_Proj2>::value,
int> >
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 friend bool __equal_impl(
__bit_iterator<_Dp, _IsConst1> __first1,
__bit_iterator<_Dp, _IsConst1> __last1,
__bit_iterator<_Dp, _IsConst2> __first2,
__bit_iterator<_Dp, _IsConst2>,
_Pred&,
_Proj1&,
_Proj2&);
template <bool _ToFind, class _Dp, bool _IC>
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, _IC>
__find_bool(__bit_iterator<_Dp, _IC>, typename __size_difference_type_traits<_Dp>::size_type);
Expand Down
Loading