Skip to content

[libc++] Replace __libcpp_{ctz, clz} with __builtin_{ctzg, clzg} #133920

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions libcxx/include/__algorithm/sort.h
Original file line number Diff line number Diff line change
Expand Up @@ -359,10 +359,10 @@ inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos(
// Swap one pair on each iteration as long as both bitsets have at least one
// element for swapping.
while (__left_bitset != 0 && __right_bitset != 0) {
difference_type __tz_left = __libcpp_ctz(__left_bitset);
__left_bitset = __libcpp_blsr(__left_bitset);
difference_type __tz_right = __libcpp_ctz(__right_bitset);
__right_bitset = __libcpp_blsr(__right_bitset);
difference_type __tz_left = std::__countr_zero(__left_bitset);
__left_bitset = std::__libcpp_blsr(__left_bitset);
difference_type __tz_right = std::__countr_zero(__right_bitset);
__right_bitset = std::__libcpp_blsr(__right_bitset);
_Ops::iter_swap(__first + __tz_left, __last - __tz_right);
}
}
Expand Down Expand Up @@ -458,7 +458,7 @@ inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos_within(
// Swap within the left side. Need to find set positions in the reverse
// order.
while (__left_bitset != 0) {
difference_type __tz_left = __detail::__block_size - 1 - __libcpp_clz(__left_bitset);
difference_type __tz_left = __detail::__block_size - 1 - std::__countl_zero(__left_bitset);
__left_bitset &= (static_cast<uint64_t>(1) << __tz_left) - 1;
_RandomAccessIterator __it = __first + __tz_left;
if (__it != __lm1) {
Expand All @@ -471,7 +471,7 @@ inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos_within(
// Swap within the right side. Need to find set positions in the reverse
// order.
while (__right_bitset != 0) {
difference_type __tz_right = __detail::__block_size - 1 - __libcpp_clz(__right_bitset);
difference_type __tz_right = __detail::__block_size - 1 - std::__countl_zero(__right_bitset);
__right_bitset &= (static_cast<uint64_t>(1) << __tz_right) - 1;
_RandomAccessIterator __it = __lm1 - __tz_right;
if (__it != __first) {
Expand Down
62 changes: 0 additions & 62 deletions libcxx/include/__bit/countl.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@
//
//===----------------------------------------------------------------------===//

// TODO: __builtin_clzg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can
// refactor this code to exclusively use __builtin_clzg.

#ifndef _LIBCPP___BIT_COUNTL_H
#define _LIBCPP___BIT_COUNTL_H

Expand All @@ -27,69 +24,10 @@ _LIBCPP_PUSH_MACROS

_LIBCPP_BEGIN_NAMESPACE_STD

[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned __x) _NOEXCEPT {
return __builtin_clz(__x);
}

[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long __x) _NOEXCEPT {
return __builtin_clzl(__x);
}

[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long long __x) _NOEXCEPT {
return __builtin_clzll(__x);
}

#if _LIBCPP_HAS_INT128
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(__uint128_t __x) _NOEXCEPT {
# if __has_builtin(__builtin_clzg)
return __builtin_clzg(__x);
# else
// The function is written in this form due to C++ constexpr limitations.
// The algorithm:
// - Test whether any bit in the high 64-bits is set
// - No bits set:
// - The high 64-bits contain 64 leading zeros,
// - Add the result of the low 64-bits.
// - Any bits set:
// - The number of leading zeros of the input is the number of leading
// zeros in the high 64-bits.
return ((__x >> 64) == 0) ? (64 + __builtin_clzll(static_cast<unsigned long long>(__x)))
: __builtin_clzll(static_cast<unsigned long long>(__x >> 64));
# endif
}
#endif // _LIBCPP_HAS_INT128

template <class _Tp>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _NOEXCEPT {
static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__countl_zero requires an unsigned integer type");
#if __has_builtin(__builtin_clzg)
return __builtin_clzg(__t, numeric_limits<_Tp>::digits);
#else // __has_builtin(__builtin_clzg)
if (__t == 0)
return numeric_limits<_Tp>::digits;

if (sizeof(_Tp) <= sizeof(unsigned int))
return std::__libcpp_clz(static_cast<unsigned int>(__t)) -
(numeric_limits<unsigned int>::digits - numeric_limits<_Tp>::digits);
else if (sizeof(_Tp) <= sizeof(unsigned long))
return std::__libcpp_clz(static_cast<unsigned long>(__t)) -
(numeric_limits<unsigned long>::digits - numeric_limits<_Tp>::digits);
else if (sizeof(_Tp) <= sizeof(unsigned long long))
return std::__libcpp_clz(static_cast<unsigned long long>(__t)) -
(numeric_limits<unsigned long long>::digits - numeric_limits<_Tp>::digits);
else {
int __ret = 0;
int __iter = 0;
const unsigned int __ulldigits = numeric_limits<unsigned long long>::digits;
while (true) {
__t = std::__rotl(__t, __ulldigits);
if ((__iter = std::__countl_zero(static_cast<unsigned long long>(__t))) != __ulldigits)
break;
__ret += __iter;
}
return __ret + __iter;
}
#endif // __has_builtin(__builtin_clzg)
}

#if _LIBCPP_STD_VER >= 20
Expand Down
49 changes: 0 additions & 49 deletions libcxx/include/__bit/countr.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,10 @@
//
//===----------------------------------------------------------------------===//

// TODO: __builtin_ctzg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can
// refactor this code to exclusively use __builtin_ctzg.

#ifndef _LIBCPP___BIT_COUNTR_H
#define _LIBCPP___BIT_COUNTR_H

#include <__assert>
#include <__bit/rotate.h>
#include <__concepts/arithmetic.h>
#include <__config>
#include <__type_traits/is_unsigned.h>
Expand All @@ -28,55 +24,10 @@ _LIBCPP_PUSH_MACROS

_LIBCPP_BEGIN_NAMESPACE_STD

[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned __x) _NOEXCEPT {
return __builtin_ctz(__x);
}

[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long __x) _NOEXCEPT {
return __builtin_ctzl(__x);
}

[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long long __x) _NOEXCEPT {
return __builtin_ctzll(__x);
}

// A constexpr implementation for C++11 and later (using clang extensions for constexpr support)
// Precondition: __t != 0 (the caller __countr_zero handles __t == 0 as a special case)
template <class _Tp>
[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __countr_zero_impl(_Tp __t) _NOEXCEPT {
_LIBCPP_ASSERT_INTERNAL(__t != 0, "__countr_zero_impl called with zero value");
static_assert(is_unsigned<_Tp>::value, "__countr_zero_impl only works with unsigned types");
if _LIBCPP_CONSTEXPR (sizeof(_Tp) <= sizeof(unsigned int)) {
return std::__libcpp_ctz(static_cast<unsigned int>(__t));
} else if _LIBCPP_CONSTEXPR (sizeof(_Tp) <= sizeof(unsigned long)) {
return std::__libcpp_ctz(static_cast<unsigned long>(__t));
} else if _LIBCPP_CONSTEXPR (sizeof(_Tp) <= sizeof(unsigned long long)) {
return std::__libcpp_ctz(static_cast<unsigned long long>(__t));
} else {
#if _LIBCPP_STD_VER == 11
unsigned long long __ull = static_cast<unsigned long long>(__t);
const unsigned int __ulldigits = numeric_limits<unsigned long long>::digits;
return __ull == 0ull ? __ulldigits + std::__countr_zero_impl<_Tp>(__t >> __ulldigits) : std::__libcpp_ctz(__ull);
#else
int __ret = 0;
const unsigned int __ulldigits = numeric_limits<unsigned long long>::digits;
while (static_cast<unsigned long long>(__t) == 0uLL) {
__ret += __ulldigits;
__t >>= __ulldigits;
}
return __ret + std::__libcpp_ctz(static_cast<unsigned long long>(__t));
#endif
}
}

template <class _Tp>
[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __countr_zero(_Tp __t) _NOEXCEPT {
static_assert(is_unsigned<_Tp>::value, "__countr_zero only works with unsigned types");
#if __has_builtin(__builtin_ctzg) // TODO (LLVM 21): This can be dropped once we only support Clang >= 19.
return __builtin_ctzg(__t, numeric_limits<_Tp>::digits);
#else
return __t != 0 ? std::__countr_zero_impl(__t) : numeric_limits<_Tp>::digits;
#endif
}

#if _LIBCPP_STD_VER >= 20
Expand Down
4 changes: 2 additions & 2 deletions libcxx/include/__bit_reference
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ public:

_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void flip() _NOEXCEPT { *__seg_ ^= __mask_; }
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, false> operator&() const _NOEXCEPT {
return __bit_iterator<_Cp, false>(__seg_, static_cast<unsigned>(std::__libcpp_ctz(__mask_)));
return __bit_iterator<_Cp, false>(__seg_, static_cast<unsigned>(std::__countr_zero(__mask_)));
}

private:
Expand Down Expand Up @@ -234,7 +234,7 @@ public:
}

_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, true> operator&() const _NOEXCEPT {
return __bit_iterator<_Cp, true>(__seg_, static_cast<unsigned>(std::__libcpp_ctz(__mask_)));
return __bit_iterator<_Cp, true>(__seg_, static_cast<unsigned>(std::__countr_zero(__mask_)));
}

private:
Expand Down
15 changes: 6 additions & 9 deletions libcxx/include/__charconv/to_chars_integral.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,8 @@ struct _LIBCPP_HIDDEN __integral<2> {
template <typename _Tp>
_LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR int __width(_Tp __value) _NOEXCEPT {
// If value == 0 still need one digit. If the value != this has no
// effect since the code scans for the most significant bit set. (Note
// that __libcpp_clz doesn't work for 0.)
return numeric_limits<_Tp>::digits - std::__libcpp_clz(__value | 1);
// effect since the code scans for the most significant bit set.
return numeric_limits<_Tp>::digits - std::__countl_zero(__value | 1);
}

template <typename _Tp>
Expand Down Expand Up @@ -150,9 +149,8 @@ struct _LIBCPP_HIDDEN __integral<8> {
template <typename _Tp>
_LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR int __width(_Tp __value) _NOEXCEPT {
// If value == 0 still need one digit. If the value != this has no
// effect since the code scans for the most significat bit set. (Note
// that __libcpp_clz doesn't work for 0.)
return ((numeric_limits<_Tp>::digits - std::__libcpp_clz(__value | 1)) + 2) / 3;
// effect since the code scans for the most significat bit set.
return ((numeric_limits<_Tp>::digits - std::__countl_zero(__value | 1)) + 2) / 3;
}

template <typename _Tp>
Expand Down Expand Up @@ -186,9 +184,8 @@ struct _LIBCPP_HIDDEN __integral<16> {
template <typename _Tp>
_LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR int __width(_Tp __value) _NOEXCEPT {
// If value == 0 still need one digit. If the value != this has no
// effect since the code scans for the most significat bit set. (Note
// that __libcpp_clz doesn't work for 0.)
return (numeric_limits<_Tp>::digits - std::__libcpp_clz(__value | 1) + 3) / 4;
// effect since the code scans for the most significat bit set.
return (numeric_limits<_Tp>::digits - std::__countl_zero(__value | 1) + 3) / 4;
}

template <typename _Tp>
Expand Down
21 changes: 6 additions & 15 deletions libcxx/include/__charconv/traits.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,9 @@ struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t<sizeof(_Tp) <= sizeof(uin
///
/// The algorithm is based on
/// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
/// Instead of using IntegerLogBase2 it uses __libcpp_clz. Since that
/// function requires its input to have at least one bit set the value of
/// zero is set to one. This means the first element of the lookup table is
/// zero.
/// Instead of using IntegerLogBase2 it uses __countl_zero.
static _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI int __width(_Tp __v) {
auto __t = (32 - std::__libcpp_clz(static_cast<type>(__v | 1))) * 1233 >> 12;
auto __t = (32 - std::__countl_zero(static_cast<type>(__v | 1))) * 1233 >> 12;
return __t - (__v < __itoa::__pow10_32[__t]) + 1;
}

Expand All @@ -69,12 +66,9 @@ struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t<sizeof(_Tp) == sizeof(uin
///
/// The algorithm is based on
/// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
/// Instead of using IntegerLogBase2 it uses __libcpp_clz. Since that
/// function requires its input to have at least one bit set the value of
/// zero is set to one. This means the first element of the lookup table is
/// zero.
/// Instead of using IntegerLogBase2 it uses __countl_zero.
static _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI int __width(_Tp __v) {
auto __t = (64 - std::__libcpp_clz(static_cast<type>(__v | 1))) * 1233 >> 12;
auto __t = (64 - std::__countl_zero(static_cast<type>(__v | 1))) * 1233 >> 12;
return __t - (__v < __itoa::__pow10_64[__t]) + 1;
}

Expand All @@ -96,15 +90,12 @@ struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t<sizeof(_Tp) == sizeof(__u
///
/// The algorithm is based on
/// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
/// Instead of using IntegerLogBase2 it uses __libcpp_clz. Since that
/// function requires its input to have at least one bit set the value of
/// zero is set to one. This means the first element of the lookup table is
/// zero.
/// Instead of using IntegerLogBase2 it uses __countl_zero.
static _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI int __width(_Tp __v) {
_LIBCPP_ASSERT_INTERNAL(
__v > numeric_limits<uint64_t>::max(), "The optimizations for this algorithm fail when this isn't true.");
// There's always a bit set in the upper 64-bits.
auto __t = (128 - std::__libcpp_clz(static_cast<uint64_t>(__v >> 64))) * 1233 >> 12;
auto __t = (128 - std::__countl_zero(static_cast<uint64_t>(__v >> 64))) * 1233 >> 12;
_LIBCPP_ASSERT_INTERNAL(__t >= __itoa::__pow10_128_offset, "Index out of bounds");
// __t is adjusted since the lookup table misses the lower entries.
return __t - (__v < __itoa::__pow10_128[__t - __itoa::__pow10_128_offset]) + 1;
Expand Down
2 changes: 1 addition & 1 deletion libcxx/include/__hash_table
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ inline _LIBCPP_HIDE_FROM_ABI size_t __constrain_hash(size_t __h, size_t __bc) {
}

inline _LIBCPP_HIDE_FROM_ABI size_t __next_hash_pow2(size_t __n) {
return __n < 2 ? __n : (size_t(1) << (numeric_limits<size_t>::digits - __libcpp_clz(__n - 1)));
return __n < 2 ? __n : (size_t(1) << (numeric_limits<size_t>::digits - std::__countl_zero(__n - 1)));
}

template <class _Tp, class _Hash, class _Equal, class _Alloc>
Expand Down