[libc++][hardening] Categorize all `ryu` assertions as internal #71853

var-const · 2023-11-09T19:20:36Z

These assertions can only be triggered by bugs in the algorithm's implementation; all user inputs should be handled gracefully.

llvmbot · 2023-11-09T19:21:10Z

@llvm/pr-subscribers-libcxx

Author: Konstantin Varlamov (var-const)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/71853.diff

5 Files Affected:

(modified) libcxx/src/include/ryu/common.h (+7-7)
(modified) libcxx/src/include/ryu/d2s_intrinsics.h (+8-8)
(modified) libcxx/src/ryu/d2fixed.cpp (+2-2)
(modified) libcxx/src/ryu/d2s.cpp (+1-1)
(modified) libcxx/src/ryu/f2s.cpp (+10-10)

diff --git a/libcxx/src/include/ryu/common.h b/libcxx/src/include/ryu/common.h
index 23cb7d3b773bd57..d5168d8710bf269 100644
--- a/libcxx/src/include/ryu/common.h
+++ b/libcxx/src/include/ryu/common.h
@@ -52,7 +52,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
   // Function precondition: __v is not a 10-digit number.
   // (f2s: 9 digits are sufficient for round-tripping.)
   // (d2fixed: We print 9-digit blocks.)
-  _LIBCPP_ASSERT_UNCATEGORIZED(__v < 1000000000, "");
+  _LIBCPP_ASSERT_INTERNAL(__v < 1000000000, "");
   if (__v >= 100000000) { return 9; }
   if (__v >= 10000000) { return 8; }
   if (__v >= 1000000) { return 7; }
@@ -69,24 +69,24 @@ _LIBCPP_BEGIN_NAMESPACE_STD
   // This approximation works up to the point that the multiplication overflows at __e = 3529.
   // If the multiplication were done in 64 bits, it would fail at 5^4004 which is just greater
   // than 2^9297.
-  _LIBCPP_ASSERT_UNCATEGORIZED(__e >= 0, "");
-  _LIBCPP_ASSERT_UNCATEGORIZED(__e <= 3528, "");
+  _LIBCPP_ASSERT_INTERNAL(__e >= 0, "");
+  _LIBCPP_ASSERT_INTERNAL(__e <= 3528, "");
   return static_cast<int32_t>(((static_cast<uint32_t>(__e) * 1217359) >> 19) + 1);
 }
 
 // Returns floor(log_10(2^__e)).
 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI  inline uint32_t __log10Pow2(const int32_t __e) {
   // The first value this approximation fails for is 2^1651 which is just greater than 10^297.
-  _LIBCPP_ASSERT_UNCATEGORIZED(__e >= 0, "");
-  _LIBCPP_ASSERT_UNCATEGORIZED(__e <= 1650, "");
+  _LIBCPP_ASSERT_INTERNAL(__e >= 0, "");
+  _LIBCPP_ASSERT_INTERNAL(__e <= 1650, "");
   return (static_cast<uint32_t>(__e) * 78913) >> 18;
 }
 
 // Returns floor(log_10(5^__e)).
 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __log10Pow5(const int32_t __e) {
   // The first value this approximation fails for is 5^2621 which is just greater than 10^1832.
-  _LIBCPP_ASSERT_UNCATEGORIZED(__e >= 0, "");
-  _LIBCPP_ASSERT_UNCATEGORIZED(__e <= 2620, "");
+  _LIBCPP_ASSERT_INTERNAL(__e >= 0, "");
+  _LIBCPP_ASSERT_INTERNAL(__e <= 2620, "");
   return (static_cast<uint32_t>(__e) * 732923) >> 20;
 }
 
diff --git a/libcxx/src/include/ryu/d2s_intrinsics.h b/libcxx/src/include/ryu/d2s_intrinsics.h
index 7697c32ff6e93cf..be50361fb3b334d 100644
--- a/libcxx/src/include/ryu/d2s_intrinsics.h
+++ b/libcxx/src/include/ryu/d2s_intrinsics.h
@@ -63,7 +63,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
   // (The shift value is in the range [49, 58].)
   // Check this here in case a future change requires larger shift
   // values. In this case this function needs to be adjusted.
-  _LIBCPP_ASSERT_UNCATEGORIZED(__dist < 64, "");
+  _LIBCPP_ASSERT_INTERNAL(__dist < 64, "");
   return __shiftright128(__lo, __hi, static_cast<unsigned char>(__dist));
 }
 
@@ -85,7 +85,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
   // (The shift value is in the range [49, 58].)
   // Check this here in case a future change requires larger shift
   // values. In this case this function needs to be adjusted.
-  _LIBCPP_ASSERT_UNCATEGORIZED(__dist < 64, "");
+  _LIBCPP_ASSERT_INTERNAL(__dist < 64, "");
   auto __temp = __lo | ((unsigned __int128)__hi << 64);
   // For x64 128-bit shfits using the `shrd` instruction and two 64-bit
   // registers, the shift value is modulo 64.  Thus the `& 63` is free.
@@ -126,13 +126,13 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_shiftright128(const uint64_t __lo, const uint64_t __hi, const uint32_t __dist) {
   // We don't need to handle the case __dist >= 64 here (see above).
-  _LIBCPP_ASSERT_UNCATEGORIZED(__dist < 64, "");
+  _LIBCPP_ASSERT_INTERNAL(__dist < 64, "");
 #ifdef _LIBCPP_64_BIT
-  _LIBCPP_ASSERT_UNCATEGORIZED(__dist > 0, "");
+  _LIBCPP_ASSERT_INTERNAL(__dist > 0, "");
   return (__hi << (64 - __dist)) | (__lo >> __dist);
 #else // ^^^ 64-bit ^^^ / vvv 32-bit vvv
   // Avoid a 64-bit shift by taking advantage of the range of shift values.
-  _LIBCPP_ASSERT_UNCATEGORIZED(__dist >= 32, "");
+  _LIBCPP_ASSERT_INTERNAL(__dist >= 32, "");
   return (__hi << (64 - __dist)) | (static_cast<uint32_t>(__lo >> 32) >> (__dist - 32));
 #endif // ^^^ 32-bit ^^^
 }
@@ -227,7 +227,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __pow5Factor(uint64_t __value) {
   uint32_t __count = 0;
   for (;;) {
-    _LIBCPP_ASSERT_UNCATEGORIZED(__value != 0, "");
+    _LIBCPP_ASSERT_INTERNAL(__value != 0, "");
     const uint64_t __q = __div5(__value);
     const uint32_t __r = static_cast<uint32_t>(__value) - 5 * static_cast<uint32_t>(__q);
     if (__r != 0) {
@@ -247,8 +247,8 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 // Returns true if __value is divisible by 2^__p.
 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf2(const uint64_t __value, const uint32_t __p) {
-  _LIBCPP_ASSERT_UNCATEGORIZED(__value != 0, "");
-  _LIBCPP_ASSERT_UNCATEGORIZED(__p < 64, "");
+  _LIBCPP_ASSERT_INTERNAL(__value != 0, "");
+  _LIBCPP_ASSERT_INTERNAL(__p < 64, "");
   // __builtin_ctzll doesn't appear to be faster here.
   return (__value & ((1ull << __p) - 1)) == 0;
 }
diff --git a/libcxx/src/ryu/d2fixed.cpp b/libcxx/src/ryu/d2fixed.cpp
index 3ae515d1dcccb81..4cfc39535988e29 100644
--- a/libcxx/src/ryu/d2fixed.cpp
+++ b/libcxx/src/ryu/d2fixed.cpp
@@ -102,8 +102,8 @@ inline constexpr int __POW10_ADDITIONAL_BITS = 120;
   const uint64_t __s1low = __low2 + __high1 + __c1; // 128
   const uint32_t __c2 = __s1low < __low2; // __high1 + __c1 can't overflow, so compare against __low2
   const uint64_t __s1high = __high2 + __c2;         // 192
-  _LIBCPP_ASSERT_UNCATEGORIZED(__j >= 128, "");
-  _LIBCPP_ASSERT_UNCATEGORIZED(__j <= 180, "");
+  _LIBCPP_ASSERT_INTERNAL(__j >= 128, "");
+  _LIBCPP_ASSERT_INTERNAL(__j <= 180, "");
 #ifdef _LIBCPP_INTRINSIC128
   const uint32_t __dist = static_cast<uint32_t>(__j - 128); // __dist: [0, 52]
   const uint64_t __shiftedhigh = __s1high >> __dist;
diff --git a/libcxx/src/ryu/d2s.cpp b/libcxx/src/ryu/d2s.cpp
index 86d0b61848be7de..32d617cb5532483 100644
--- a/libcxx/src/ryu/d2s.cpp
+++ b/libcxx/src/ryu/d2s.cpp
@@ -154,7 +154,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
   // The average output length is 16.38 digits, so we check high-to-low.
   // Function precondition: __v is not an 18, 19, or 20-digit number.
   // (17 digits are sufficient for round-tripping.)
-  _LIBCPP_ASSERT_UNCATEGORIZED(__v < 100000000000000000u, "");
+  _LIBCPP_ASSERT_INTERNAL(__v < 100000000000000000u, "");
   if (__v >= 10000000000000000u) { return 17; }
   if (__v >= 1000000000000000u) { return 16; }
   if (__v >= 100000000000000u) { return 15; }
diff --git a/libcxx/src/ryu/f2s.cpp b/libcxx/src/ryu/f2s.cpp
index 7470dc63d748e9b..f42fbd68c91d2d4 100644
--- a/libcxx/src/ryu/f2s.cpp
+++ b/libcxx/src/ryu/f2s.cpp
@@ -86,7 +86,7 @@ inline constexpr uint64_t __FLOAT_POW5_SPLIT[47] = {
 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __pow5Factor(uint32_t __value) {
   uint32_t __count = 0;
   for (;;) {
-    _LIBCPP_ASSERT_UNCATEGORIZED(__value != 0, "");
+    _LIBCPP_ASSERT_INTERNAL(__value != 0, "");
     const uint32_t __q = __value / 5;
     const uint32_t __r = __value % 5;
     if (__r != 0) {
@@ -105,14 +105,14 @@ inline constexpr uint64_t __FLOAT_POW5_SPLIT[47] = {
 
 // Returns true if __value is divisible by 2^__p.
 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf2(const uint32_t __value, const uint32_t __p) {
-  _LIBCPP_ASSERT_UNCATEGORIZED(__value != 0, "");
-  _LIBCPP_ASSERT_UNCATEGORIZED(__p < 32, "");
+  _LIBCPP_ASSERT_INTERNAL(__value != 0, "");
+  _LIBCPP_ASSERT_INTERNAL(__p < 32, "");
   // __builtin_ctz doesn't appear to be faster here.
   return (__value & ((1u << __p) - 1)) == 0;
 }
 
 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __mulShift(const uint32_t __m, const uint64_t __factor, const int32_t __shift) {
-  _LIBCPP_ASSERT_UNCATEGORIZED(__shift > 32, "");
+  _LIBCPP_ASSERT_INTERNAL(__shift > 32, "");
 
   // The casts here help MSVC to avoid calls to the __allmul library
   // function.
@@ -134,7 +134,7 @@ inline constexpr uint64_t __FLOAT_POW5_SPLIT[47] = {
 #else // ^^^ 32-bit ^^^ / vvv 64-bit vvv
   const uint64_t __sum = (__bits0 >> 32) + __bits1;
   const uint64_t __shiftedSum = __sum >> (__shift - 32);
-  _LIBCPP_ASSERT_UNCATEGORIZED(__shiftedSum <= UINT32_MAX, "");
+  _LIBCPP_ASSERT_INTERNAL(__shiftedSum <= UINT32_MAX, "");
   return static_cast<uint32_t>(__shiftedSum);
 #endif // ^^^ 64-bit ^^^
 }
@@ -309,8 +309,8 @@ struct __floating_decimal_32 {
   // Performance note: Long division appears to be faster than losslessly widening float to double and calling
   // __d2fixed_buffered_n(). If __f2fixed_buffered_n() is implemented, it might be faster than long division.
 
-  _LIBCPP_ASSERT_UNCATEGORIZED(_Exponent2 > 0, "");
-  _LIBCPP_ASSERT_UNCATEGORIZED(_Exponent2 <= 104, ""); // because __ieeeExponent <= 254
+  _LIBCPP_ASSERT_INTERNAL(_Exponent2 > 0, "");
+  _LIBCPP_ASSERT_INTERNAL(_Exponent2 <= 104, ""); // because __ieeeExponent <= 254
 
   // Manually represent _Mantissa2 * 2^_Exponent2 as a large integer. _Mantissa2 is always 24 bits
   // (due to the implicit bit), while _Exponent2 indicates a shift of at most 104 bits.
@@ -328,7 +328,7 @@ struct __floating_decimal_32 {
 
   // _Maxidx is the index of the most significant nonzero element.
   uint32_t _Maxidx = ((24 + static_cast<uint32_t>(_Exponent2) + 31) / 32) - 1;
-  _LIBCPP_ASSERT_UNCATEGORIZED(_Maxidx < _Data_size, "");
+  _LIBCPP_ASSERT_INTERNAL(_Maxidx < _Data_size, "");
 
   const uint32_t _Bit_shift = static_cast<uint32_t>(_Exponent2) % 32;
   if (_Bit_shift <= 8) { // _Mantissa2's 24 bits don't cross an element boundary
@@ -388,9 +388,9 @@ struct __floating_decimal_32 {
     }
   }
 
-  _LIBCPP_ASSERT_UNCATEGORIZED(_Data[0] != 0, "");
+  _LIBCPP_ASSERT_INTERNAL(_Data[0] != 0, "");
   for (uint32_t _Idx = 1; _Idx < _Data_size; ++_Idx) {
-    _LIBCPP_ASSERT_UNCATEGORIZED(_Data[_Idx] == 0, "");
+    _LIBCPP_ASSERT_INTERNAL(_Data[_Idx] == 0, "");
   }
 
   const uint32_t _Data_olength = _Data[0] >= 1000000000 ? 10 : __decimalLength9(_Data[0]);

ldionne · 2023-11-09T19:51:42Z

In practice, this means that we'll never enable those assertions because we'd have to build the dylib in Debug mode. Is that what we intend? It may be OK, but I wanted to point it out.

mordante · 2023-11-16T19:41:58Z

In practice, this means that we'll never enable those assertions because we'd have to build the dylib in Debug mode. Is that what we intend? It may be OK, but I wanted to point it out.

Does that differ from _LIBCPP_ASSERT_UNCATEGORIZED or would that be debug build only too?

ldionne · 2023-11-16T20:02:31Z

We aim to get rid of all the UNCATEGORIZED assertions eventually. Right now those are enabled in the extensive and the debug mode, but really UNCATEGORIZED was just a temporary state to get us where we want to be (everything with a proper category).

var-const · 2023-11-21T04:13:35Z

In practice, this means that we'll never enable those assertions because we'd have to build the dylib in Debug mode. Is that what we intend? It may be OK, but I wanted to point it out.

Does that differ from _LIBCPP_ASSERT_UNCATEGORIZED or would that be debug build only too?

"uncategorized" assertions are essentially TODOs. They are currently enabled in the extensive mode to avoid a significant change in behavior for current users of the safe mode. Once the existing uncategorized assertions get properly classified, I think we should switch to only enabling "uncategorized" in the debug mode (we could get rid of this category entirely but realistically it might be useful to avoid blocking patches on categorization in cases where it's not obvious).

var-const · 2023-11-21T04:15:01Z

In practice, this means that we'll never enable those assertions because we'd have to build the dylib in Debug mode. Is that what we intend? It may be OK, but I wanted to point it out.

Thanks for bringing this up. I think the assertions are divided roughly 50/50% between headers and sources, so it seems that about a half of these would be effectively always disabled. Do we have any plans to provide debug builds of the dylib?

ldionne

If we do ship a debug build of the library, it will probably be via the mechanism discussed here: https://discourse.llvm.org/t/rfc-instrumented-versions-of-libc-for-different-sanitizers.

I am fine with this patch -- the fact that these are internal-only assertions that will almost always be disabled is not a problem, just an observation.

[libc++][hardening] Categorize all ryu assertions as internal

cc5d9fb

var-const requested a review from a team as a code owner November 9, 2023 19:20

llvmbot added the libc++ libc++ C++ Standard Library. Not GNU libstdc++. Not libc++abi. label Nov 9, 2023

var-const requested a review from mordante November 9, 2023 19:27

var-const assigned mordante Nov 9, 2023

var-const added the hardening Issues related to the hardening effort label Nov 9, 2023

ldionne approved these changes Nov 21, 2023

View reviewed changes

var-const merged commit bed1a5b into llvm:main Nov 21, 2023

var-const deleted the varconst/categorize-ryu-assertions branch November 21, 2023 20:09

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[libc++][hardening] Categorize all `ryu` assertions as internal #71853

[libc++][hardening] Categorize all `ryu` assertions as internal #71853

Uh oh!

var-const commented Nov 9, 2023 •

edited

Loading

Uh oh!

llvmbot commented Nov 9, 2023

Uh oh!

ldionne commented Nov 9, 2023

Uh oh!

mordante commented Nov 16, 2023

Uh oh!

ldionne commented Nov 16, 2023

Uh oh!

var-const commented Nov 21, 2023

Uh oh!

var-const commented Nov 21, 2023

Uh oh!

ldionne left a comment

Uh oh!

Uh oh!

[libc++][hardening] Categorize all ryu assertions as internal #71853

[libc++][hardening] Categorize all ryu assertions as internal #71853

Uh oh!

Conversation

var-const commented Nov 9, 2023 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Nov 9, 2023

Uh oh!

ldionne commented Nov 9, 2023

Uh oh!

mordante commented Nov 16, 2023

Uh oh!

ldionne commented Nov 16, 2023

Uh oh!

var-const commented Nov 21, 2023

Uh oh!

var-const commented Nov 21, 2023

Uh oh!

ldionne left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

[libc++][hardening] Categorize all `ryu` assertions as internal #71853

[libc++][hardening] Categorize all `ryu` assertions as internal #71853

var-const commented Nov 9, 2023 •

edited

Loading