-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[libc++][bit] Improves rotate functions. (#98032) #101892
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This is the cherry-pick of #98032 |
@llvm/pr-subscribers-libcxx Author: Mark de Wever (mordante) ChangesInvestigating #96612 shows our implementation was different from the Standard and could cause UB. Testing the codegen showed quite a bit of assembly generated for these functions. The functions have been written differently which allows Clang to optimize the code to use simple CPU rotate instructions. Fixes: #96612 Full diff: https://github.com/llvm/llvm-project/pull/101892.diff 3 Files Affected:
diff --git a/libcxx/include/__bit/rotate.h b/libcxx/include/__bit/rotate.h
index d848056c3350d0..90e430e9d04256 100644
--- a/libcxx/include/__bit/rotate.h
+++ b/libcxx/include/__bit/rotate.h
@@ -20,24 +20,37 @@
_LIBCPP_BEGIN_NAMESPACE_STD
+// Writing two full functions for rotl and rotr makes it easier for the compiler
+// to optimize the code. On x86 this function becomes the ROL instruction and
+// the rotr function becomes the ROR instruction.
template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp __rotr(_Tp __t, int __cnt) _NOEXCEPT {
- static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__rotr requires an unsigned integer type");
- const unsigned int __dig = numeric_limits<_Tp>::digits;
- if ((__cnt % __dig) == 0)
- return __t;
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp __rotl(_Tp __x, int __s) _NOEXCEPT {
+ static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__rotl requires an unsigned integer type");
+ const int __N = numeric_limits<_Tp>::digits;
+ int __r = __s % __N;
+
+ if (__r == 0)
+ return __x;
- if (__cnt < 0) {
- __cnt *= -1;
- return (__t << (__cnt % __dig)) | (__t >> (__dig - (__cnt % __dig))); // rotr with negative __cnt is similar to rotl
- }
+ if (__r > 0)
+ return (__x << __r) | (__x >> (__N - __r));
- return (__t >> (__cnt % __dig)) | (__t << (__dig - (__cnt % __dig)));
+ return (__x >> -__r) | (__x << (__N + __r));
}
template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp __rotl(_Tp __t, int __cnt) _NOEXCEPT {
- return std::__rotr(__t, -__cnt);
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp __rotr(_Tp __x, int __s) _NOEXCEPT {
+ static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__rotr requires an unsigned integer type");
+ const int __N = numeric_limits<_Tp>::digits;
+ int __r = __s % __N;
+
+ if (__r == 0)
+ return __x;
+
+ if (__r > 0)
+ return (__x >> __r) | (__x << (__N - __r));
+
+ return (__x << -__r) | (__x >> (__N + __r));
}
#if _LIBCPP_STD_VER >= 20
diff --git a/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp b/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp
index 50e498b5761e54..16eabbd2a5a4de 100644
--- a/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp
+++ b/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp
@@ -41,6 +41,8 @@ constexpr bool test()
assert(std::rotl(T(max - 1), 5) == T(max - 32));
assert(std::rotl(T(max - 1), 6) == T(max - 64));
assert(std::rotl(T(max - 1), 7) == T(max - 128));
+ assert(std::rotl(T(max - 1), std::numeric_limits<int>::max()) ==
+ std::rotl(T(max - 1), std::numeric_limits<int>::max() % std::numeric_limits<T>::digits));
assert(std::rotl(T(max - 1), -1) == T(max - highbit));
assert(std::rotl(T(max - 1), -2) == T(max - (highbit >> 1)));
@@ -49,6 +51,8 @@ constexpr bool test()
assert(std::rotl(T(max - 1), -5) == T(max - (highbit >> 4)));
assert(std::rotl(T(max - 1), -6) == T(max - (highbit >> 5)));
assert(std::rotl(T(max - 1), -7) == T(max - (highbit >> 6)));
+ assert(std::rotl(T(max - 1), std::numeric_limits<int>::min()) ==
+ std::rotl(T(max - 1), std::numeric_limits<int>::min() % std::numeric_limits<T>::digits));
assert(std::rotl(T(1), 0) == T(1));
assert(std::rotl(T(1), 1) == T(2));
diff --git a/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp b/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp
index 00c9e617d2edf3..53405588266f74 100644
--- a/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp
+++ b/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp
@@ -41,6 +41,8 @@ constexpr bool test()
assert(std::rotr(T(max - 1), 5) == T(max - (highbit >> 4)));
assert(std::rotr(T(max - 1), 6) == T(max - (highbit >> 5)));
assert(std::rotr(T(max - 1), 7) == T(max - (highbit >> 6)));
+ assert(std::rotr(T(max - 1), std::numeric_limits<int>::max()) ==
+ std::rotr(T(max - 1), std::numeric_limits<int>::max() % std::numeric_limits<T>::digits));
assert(std::rotr(T(max - 1), -1) == T(max - 2));
assert(std::rotr(T(max - 1), -2) == T(max - 4));
@@ -49,6 +51,8 @@ constexpr bool test()
assert(std::rotr(T(max - 1), -5) == T(max - 32));
assert(std::rotr(T(max - 1), -6) == T(max - 64));
assert(std::rotr(T(max - 1), -7) == T(max - 128));
+ assert(std::rotr(T(max - 1), std::numeric_limits<int>::min()) ==
+ std::rotr(T(max - 1), std::numeric_limits<int>::min() % std::numeric_limits<T>::digits));
assert(std::rotr(T(128), 0) == T(128));
assert(std::rotr(T(128), 1) == T(64));
|
Investigating llvm#96612 shows our implementation was different from the Standard and could cause UB. Testing the codegen showed quite a bit of assembly generated for these functions. The functions have been written differently which allows Clang to optimize the code to use simple CPU rotate instructions. Fixes: llvm#96612
5af85ec
to
01a49d2
Compare
@mordante (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. |
Investigating #96612 shows our implementation was different from the Standard and could cause UB. Testing the codegen showed quite a bit of assembly generated for these functions. The functions have been written differently which allows Clang to optimize the code to use simple CPU rotate instructions.
Fixes: #96612