[libc] Fix printf %e and %g bugs

michaelrj-google · michaelrj-google · commit cd06b9d98fe2 · 2023-08-15T16:23:19.000-07:00
Fuzzing revealed bugs in the %e and %g conversions. Since these are very similar, they are grouped together. Again, most of the bugs were related to rounding. As an example, previously the code to check if the number was truncated only worked for digits below the decimal point, due to it being originally designed for %f. This patch adds a mechanism to check the digits above the decimal point for both %e and %g. Reviewed By: sivachandra, lntue Differential Revision: https://reviews.llvm.org/D157536
diff --git a/libc/src/stdio/printf_core/float_dec_converter.h b/libc/src/stdio/printf_core/float_dec_converter.h
@@ -406,19 +406,24 @@ class FloatWriter {
       }
     }
 
-    char low_digit;
+    char low_digit = '0';
     if (block_digits > 0) {
       low_digit = end_buff[block_digits - 1];
     } else if (max_block_count > 0) {
       low_digit = '9';
-    } else {
+    } else if (buffered_digits > 0) {
       low_digit = block_buffer[buffered_digits - 1];
     }
 
+    bool round_up_max_blocks = false;
+
     // Round up
     if (round == RoundDirection::Up ||
         (round == RoundDirection::Even && low_digit % 2 != 0)) {
       bool has_carry = true;
+      round_up_max_blocks = true; // if we're rounding up, we might need to
+                                  // round up the max blocks that are buffered.
+
       // handle the low block that we're adding
       for (int count = static_cast<int>(block_digits) - 1;
            count >= 0 && has_carry; --count) {
@@ -427,6 +432,8 @@ class FloatWriter {
         } else {
           end_buff[count] += 1;
           has_carry = false;
+          round_up_max_blocks = false; // If the low block isn't all nines, then
+                                       // the max blocks aren't rounded up.
         }
       }
       // handle the high block that's buffered
@@ -490,7 +497,7 @@ class FloatWriter {
     // Either we intend to round down, or the rounding up is complete. Flush the
     // buffers.
 
-    RET_IF_RESULT_NEGATIVE(flush_buffer());
+    RET_IF_RESULT_NEGATIVE(flush_buffer(round_up_max_blocks));
 
     // And then write the final block. It's written via the buffer so that if
     // this is also the first block, the decimal point will be placed correctly.
@@ -740,58 +747,59 @@ LIBC_INLINE int convert_float_dec_exp_typed(Writer *writer,
     last_block_size = IntegerToString<intmax_t>(digits).size();
   }
 
+  // This tracks if the number is truncated, that meaning that the digits after
+  // last_digit are non-zero.
+  bool truncated = false;
+
   // This is the last block.
   const size_t maximum = precision + 1 - digits_written;
   uint32_t last_digit = 0;
   for (uint32_t k = 0; k < last_block_size - maximum; ++k) {
+    if (last_digit > 0)
+      truncated = true;
+
     last_digit = digits % 10;
     digits /= 10;
   }
 
   // If the last block we read doesn't have the digit after the end of what
   // we'll print, then we need to read the next block to get that digit.
   if (maximum == last_block_size) {
-    BlockInt extra_block = float_converter.get_block(cur_block - 1);
+    --cur_block;
+    BlockInt extra_block = float_converter.get_block(cur_block);
     last_digit = extra_block / ((MAX_BLOCK / 10) + 1);
+    if (extra_block % ((MAX_BLOCK / 10) + 1) > 0) {
+      truncated = true;
+    }
   }
 
   RoundDirection round;
-  // Is m * 10^(additionalDigits + 1) / 2^(-exponent) integer?
-  const int32_t requiredTwos =
-      -(exponent - MANT_WIDTH) - static_cast<int32_t>(precision) - 1;
-  const bool trailingZeros =
-      requiredTwos <= 0 ||
-      (requiredTwos < 60 &&
-       multiple_of_power_of_2(float_bits.get_explicit_mantissa(),
-                              static_cast<uint32_t>(requiredTwos)));
-  switch (fputil::quick_get_round()) {
-  case FE_TONEAREST:
-    // Round to nearest, if it's exactly halfway then round to even.
-    if (last_digit != 5) {
-      round = last_digit > 5 ? RoundDirection::Up : RoundDirection::Down;
-    } else {
-      round = trailingZeros ? RoundDirection::Even : RoundDirection::Up;
-    }
-    break;
-  case FE_DOWNWARD:
-    if (is_negative && (!trailingZeros || last_digit > 0)) {
-      round = RoundDirection::Up;
-    } else {
-      round = RoundDirection::Down;
+
+  // If we've already seen a truncated digit, then we don't need to check any
+  // more.
+  if (!truncated) {
+    // Check the blocks above the decimal point
+    if (cur_block >= 0) {
+      // Check every block until the decimal point for non-zero digits.
+      for (int cur_extra_block = cur_block - 1; cur_extra_block >= 0;
+           --cur_extra_block) {
+        BlockInt extra_block = float_converter.get_block(cur_extra_block);
+        if (extra_block > 0) {
+          truncated = true;
+          break;
+        }
+      }
     }
-    break;
-  case FE_UPWARD:
-    if (!is_negative && (!trailingZeros || last_digit > 0)) {
-      round = RoundDirection::Up;
-    } else {
-      round = RoundDirection::Down;
+    // If it's still not truncated and there are digits below the decimal point
+    if (!truncated && exponent - MANT_WIDTH < 0) {
+      // Use the formula from %f.
+      truncated =
+          !zero_after_digits(exponent - MANT_WIDTH, precision - final_exponent,
+                             float_bits.get_explicit_mantissa());
     }
-    round = is_negative ? RoundDirection::Down : RoundDirection::Up;
-    break;
-  case FE_TOWARDZERO:
-    round = RoundDirection::Down;
-    break;
   }
+  round = get_round_direction(last_digit, truncated, is_negative);
+
   RET_IF_RESULT_NEGATIVE(float_writer.write_last_block_exp(
       digits, maximum, round, final_exponent, a + 'E' - 'A'));
 
@@ -984,71 +992,93 @@ LIBC_INLINE int convert_float_dec_auto_typed(Writer *writer,
     }
   }
 
+  bool truncated = false;
+
   // Find the digit after the lowest digit that we'll actually print to
   // determine the rounding.
   const uint32_t maximum =
       exp_precision + 1 - static_cast<uint32_t>(digits_checked);
   uint32_t last_digit = 0;
   for (uint32_t k = 0; k < last_block_size - maximum; ++k) {
+    if (last_digit > 0)
+      truncated = true;
+
     last_digit = digits % 10;
     digits /= 10;
   }
 
   // If the last block we read doesn't have the digit after the end of what
   // we'll print, then we need to read the next block to get that digit.
   if (maximum == last_block_size) {
-    BlockInt extra_block = float_converter.get_block(cur_block - 1);
+    --cur_block;
+    BlockInt extra_block = float_converter.get_block(cur_block);
     last_digit = extra_block / ((MAX_BLOCK / 10) + 1);
+
+    if (extra_block % ((MAX_BLOCK / 10) + 1) > 0)
+      truncated = true;
   }
 
   // TODO: unify this code across the three float conversions.
   RoundDirection round;
-  // Is m * 10^(additionalDigits + 1) / 2^(-exponent) integer?
-  const int32_t requiredTwos =
-      -(exponent - MANT_WIDTH) - static_cast<int32_t>(exp_precision) - 1;
-  // TODO: rename this variable to remove confusion with trailing_zeroes
-  const bool trailingZeros =
-      requiredTwos <= 0 ||
-      (requiredTwos < 60 &&
-       multiple_of_power_of_2(float_bits.get_explicit_mantissa(),
-                              static_cast<uint32_t>(requiredTwos)));
-  switch (fputil::quick_get_round()) {
-  case FE_TONEAREST:
-    // Round to nearest, if it's exactly halfway then round to even.
-    if (last_digit != 5) {
-      round = last_digit > 5 ? RoundDirection::Up : RoundDirection::Down;
-    } else {
-      round = trailingZeros ? RoundDirection::Even : RoundDirection::Up;
-    }
-    break;
-  case FE_DOWNWARD:
-    if (is_negative && (!trailingZeros || last_digit > 0)) {
-      round = RoundDirection::Up;
-    } else {
-      round = RoundDirection::Down;
+
+  // If we've already seen a truncated digit, then we don't need to check any
+  // more.
+  if (!truncated) {
+    // Check the blocks above the decimal point
+    if (cur_block >= 0) {
+      // Check every block until the decimal point for non-zero digits.
+      for (int cur_extra_block = cur_block - 1; cur_extra_block >= 0;
+           --cur_extra_block) {
+        BlockInt extra_block = float_converter.get_block(cur_extra_block);
+        if (extra_block > 0) {
+          truncated = true;
+          break;
+        }
+      }
     }
-    break;
-  case FE_UPWARD:
-    if (!is_negative && (!trailingZeros || last_digit > 0)) {
-      round = RoundDirection::Up;
-    } else {
-      round = RoundDirection::Down;
+    // If it's still not truncated and there are digits below the decimal point
+    if (!truncated && exponent - MANT_WIDTH < 0) {
+      // Use the formula from %f.
+      truncated =
+          !zero_after_digits(exponent - MANT_WIDTH, exp_precision - base_10_exp,
+                             float_bits.get_explicit_mantissa());
     }
-    round = is_negative ? RoundDirection::Down : RoundDirection::Up;
-    break;
-  case FE_TOWARDZERO:
-    round = RoundDirection::Down;
-    break;
   }
 
+  round = get_round_direction(last_digit, truncated, is_negative);
+
   bool round_up;
   if (round == RoundDirection::Up) {
     round_up = true;
   } else if (round == RoundDirection::Down) {
     round_up = false;
   } else {
-    // RoundDirection is even, so check the extra digit.
-    uint32_t low_digit = digits % 10;
+    // RoundDirection is even, so check the lowest digit that will be printed.
+    uint32_t low_digit;
+
+    // maximum is the number of digits that will remain in digits after getting
+    // last_digit. If it's greater than zero, we can just check the lowest digit
+    // in digits.
+    if (maximum > 0) {
+      low_digit = digits % 10;
+    } else {
+      // Else if there are trailing nines, then the low digit is a nine, same
+      // with zeroes.
+      if (trailing_nines > 0) {
+        low_digit = 9;
+      } else if (trailing_zeroes > 0) {
+        low_digit = 0;
+      } else {
+        // If there are no trailing zeroes or nines, then the round direction
+        // doesn't actually matter here. Since this conversion passes off the
+        // value to another one for final conversion, rounding only matters to
+        // determine if the exponent is higher than expected (with an all nine
+        // number) or to determine the trailing zeroes to trim. In this case
+        // low_digit is set to 0, but it could be set to any number.
+
+        low_digit = 0;
+      }
+    }
     round_up = (low_digit % 2) != 0;
   }
 
diff --git a/libc/test/src/stdio/sprintf_test.cpp b/libc/test/src/stdio/sprintf_test.cpp
@@ -888,7 +888,8 @@ TEST_F(LlvmLibcSPrintfTest, FloatDecimalConv) {
   double inf = __llvm_libc::fputil::FPBits<double>::inf().get_val();
   double nan = __llvm_libc::fputil::FPBits<double>::build_nan(1);
 
-  char big_buff[10000];
+  char big_buff[10000]; // Used for long doubles and other extremely wide
+                        // numbers.
 
   written = __llvm_libc::sprintf(buff, "%f", 1.0);
   ASSERT_STREQ_LEN(written, buff, "1.000000");
@@ -1696,6 +1697,14 @@ TEST_F(LlvmLibcSPrintfTest, FloatExponentConv) {
 
   // Length Modifier Tests.
 
+#if defined(SPECIAL_X86_LONG_DOUBLE)
+  written = __llvm_libc::sprintf(buff, "%.9Le", 1000000000500000000.1L);
+  ASSERT_STREQ_LEN(written, buff, "1.000000001e+18");
+
+  written = __llvm_libc::sprintf(buff, "%.9Le", 1000000000500000000.0L);
+  ASSERT_STREQ_LEN(written, buff, "1.000000000e+18");
+#endif
+
   // TODO: Fix long doubles (needs bigger table or alternate algorithm.)
   // Currently the table values are generated, which is very slow.
   /*
@@ -1908,6 +1917,45 @@ TEST_F(LlvmLibcSPrintfTest, FloatExponentConv) {
   written = __llvm_libc::sprintf(buff, "%.5e", 1.008e3);
   ASSERT_STREQ_LEN(written, buff, "1.00800e+03");
 
+  // These tests also focus on rounding. Almost all of them have a 5 right after
+  // the printed string (e.g. 9.5 with precision 0 prints 0 digits after the
+  // decimal point). This is again because rounding a number with a 5 after the
+  // printed section means that more digits have to be checked to determine if
+  // this should be rounded up (if there are non-zero digits after the 5) or to
+  // even (if the 5 is the last non-zero digit). Additionally, the algorithm for
+  // checking if a number is all 0s after the decimal point may not work since
+  // the decimal point moves in this representation.
+  written = __llvm_libc::sprintf(buff, "%.0e", 2.5812229360061737E+200);
+  ASSERT_STREQ_LEN(written, buff, "3e+200");
+
+  written = __llvm_libc::sprintf(buff, "%.1e", 9.059E+200);
+  ASSERT_STREQ_LEN(written, buff, "9.1e+200");
+
+  written = __llvm_libc::sprintf(buff, "%.0e", 9.059E+200);
+  ASSERT_STREQ_LEN(written, buff, "9e+200");
+
+  written = __llvm_libc::sprintf(buff, "%.166e", 1.131959884853339E-72);
+  ASSERT_STREQ_LEN(written, buff,
+                   "1."
+                   "13195988485333904593863991136097397258531639976739227369782"
+                   "68612419376648241056393424414314951197624317440549121097287"
+                   "069853416091591569170304865110665559768676757812e-72");
+
+  written = __llvm_libc::sprintf(buff, "%.0e", 9.5);
+  ASSERT_STREQ_LEN(written, buff, "1e+01");
+
+  written = __llvm_libc::sprintf(buff, "%.10e", 1.9999999999890936);
+  ASSERT_STREQ_LEN(written, buff, "2.0000000000e+00");
+
+  written = __llvm_libc::sprintf(buff, "%.1e", 745362143563.03894);
+  ASSERT_STREQ_LEN(written, buff, "7.5e+11");
+
+  written = __llvm_libc::sprintf(buff, "%.0e", 45181042688.0);
+  ASSERT_STREQ_LEN(written, buff, "5e+10");
+
+  written = __llvm_libc::sprintf(buff, "%.35e", 1.3752441369139243);
+  ASSERT_STREQ_LEN(written, buff, "1.37524413691392433101157166674965993e+00");
+
   // Subnormal Precision Tests
 
   written = __llvm_libc::sprintf(buff, "%.310e", 0x1.0p-1022);
@@ -2512,8 +2560,25 @@ TEST_F(LlvmLibcSPrintfTest, FloatAutoConv) {
   written = __llvm_libc::sprintf(buff, "%.15g", 22.25);
   ASSERT_STREQ_LEN(written, buff, "22.25");
 
-  // Subnormal Precision Tests
+  // These tests also focus on rounding, but only in how it relates to the base
+  // 10 exponent. The %g conversion selects between being a %f or %e conversion
+  // based on what the exponent would be if it was %e. If we call the precision
+  // P (equal to 6 if the precision is not set, 0 if the provided precision is
+  // 0, and provided precision - 1 otherwise) and the exponent X, then the style
+  // is %f with an effective precision of P - X + 1 if P > X >= -4, else the
+  // style is %e with effective precision P - 1. Additionally, it attempts to
+  // trim zeros that would be displayed after the decimal point.
+  written = __llvm_libc::sprintf(buff, "%.1g", 9.059E+200);
+  ASSERT_STREQ_LEN(written, buff, "9e+200");
+
+  written = __llvm_libc::sprintf(buff, "%.2g", 9.059E+200);
+  ASSERT_STREQ_LEN(written, buff, "9.1e+200");
 
+  // For this test, P = 0 and X = 1, so P > X >= -4 is false, giving a %e style.
+  written = __llvm_libc::sprintf(buff, "%.0g", 9.5);
+  ASSERT_STREQ_LEN(written, buff, "1e+01");
+
+  // Subnormal Precision Tests
   written = __llvm_libc::sprintf(buff, "%.310g", 0x1.0p-1022);
   ASSERT_STREQ_LEN(
       written, buff,