[libc] Fix printf %f bugs

michaelrj-google · michaelrj-google · commit b02e73a3551f · 2023-08-15T16:23:13.000-07:00
Fuzzing revealed several bugs in the %f float conversion. This patch fixes them. Most of these bugs are related to rounding, such as 1.999...999 being rounded to 2.999...999 instead of 2.000...000 due to rounding up not properly changing the nines to zeros. Additionally, much of the rounding infrastructure has been refactored out so it can be shared with the other conversions. Reviewed By: sivachandra Differential Revision: https://reviews.llvm.org/D157535
diff --git a/libc/src/stdio/printf_core/float_dec_converter.h b/libc/src/stdio/printf_core/float_dec_converter.h
@@ -53,6 +53,47 @@ constexpr char DECIMAL_POINT = '.';
 // This is used to represent which direction the number should be rounded.
 enum class RoundDirection { Up, Down, Even };
 
+LIBC_INLINE RoundDirection get_round_direction(int last_digit, bool truncated,
+                                               bool is_negative) {
+  switch (fputil::quick_get_round()) {
+  case FE_TONEAREST:
+    // Round to nearest, if it's exactly halfway then round to even.
+    if (last_digit != 5) {
+      return last_digit > 5 ? RoundDirection::Up : RoundDirection::Down;
+    } else {
+      return !truncated ? RoundDirection::Even : RoundDirection::Up;
+    }
+  case FE_DOWNWARD:
+    if (is_negative && (truncated || last_digit > 0)) {
+      return RoundDirection::Up;
+    } else {
+      return RoundDirection::Down;
+    }
+  case FE_UPWARD:
+    if (!is_negative && (truncated || last_digit > 0)) {
+      return RoundDirection::Up;
+    } else {
+      return RoundDirection::Down;
+    }
+    return is_negative ? RoundDirection::Down : RoundDirection::Up;
+  case FE_TOWARDZERO:
+    return RoundDirection::Down;
+  default:
+    return RoundDirection::Down;
+  }
+}
+
+template <typename T>
+LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_integral_v<T>, bool>
+zero_after_digits(int32_t base_2_exp, int32_t digits_after_point, T mantissa) {
+  const int32_t required_twos = -base_2_exp - digits_after_point - 1;
+  const bool has_trailing_zeros =
+      required_twos <= 0 ||
+      (required_twos < 60 &&
+       multiple_of_power_of_2(mantissa, static_cast<uint32_t>(required_twos)));
+  return has_trailing_zeros;
+}
+
 class PaddingWriter {
   bool left_justified = false;
   bool leading_zeroes = false;
@@ -127,7 +168,9 @@ class FloatWriter {
   Writer *writer;                   // Writes to the final output.
   PaddingWriter padding_writer; // Handles prefixes/padding, uses total_digits.
 
-  int flush_buffer() {
+  int flush_buffer(bool round_up_max_blocks = false) {
+    const char MAX_BLOCK_DIGIT = (round_up_max_blocks ? '0' : '9');
+
     // Write the most recent buffered block, and mark has_written
     if (!has_written) {
       has_written = true;
@@ -174,12 +217,12 @@ class FloatWriter {
         has_decimal_point) {
       size_t digits_to_write = digits_before_decimal - total_digits_written;
       if (digits_to_write > 0) {
-        RET_IF_RESULT_NEGATIVE(writer->write('9', digits_to_write));
+        RET_IF_RESULT_NEGATIVE(writer->write(MAX_BLOCK_DIGIT, digits_to_write));
       }
       RET_IF_RESULT_NEGATIVE(writer->write(DECIMAL_POINT));
       if ((BLOCK_SIZE * max_block_count) - digits_to_write > 0) {
         RET_IF_RESULT_NEGATIVE(writer->write(
-            '9', (BLOCK_SIZE * max_block_count) - digits_to_write));
+            MAX_BLOCK_DIGIT, (BLOCK_SIZE * max_block_count) - digits_to_write));
       }
       // add 1 for the decimal point
       total_digits_written += BLOCK_SIZE * max_block_count + 1;
@@ -189,7 +232,8 @@ class FloatWriter {
 
     // Clear the buffer of max blocks
     if (max_block_count > 0) {
-      RET_IF_RESULT_NEGATIVE(writer->write('9', max_block_count * BLOCK_SIZE));
+      RET_IF_RESULT_NEGATIVE(
+          writer->write(MAX_BLOCK_DIGIT, max_block_count * BLOCK_SIZE));
       total_digits_written += max_block_count * BLOCK_SIZE;
       max_block_count = 0;
     }
@@ -268,19 +312,23 @@ class FloatWriter {
       end_buff[count] = int_to_str[count + 1 + (BLOCK_SIZE - block_digits)];
     }
 
-    char low_digit;
+    char low_digit = '0';
     if (block_digits > 0) {
       low_digit = end_buff[block_digits - 1];
     } else if (max_block_count > 0) {
       low_digit = '9';
-    } else {
+    } else if (buffered_digits > 0) {
       low_digit = block_buffer[buffered_digits - 1];
     }
 
+    bool round_up_max_blocks = false;
+
     // Round up
     if (round == RoundDirection::Up ||
         (round == RoundDirection::Even && low_digit % 2 != 0)) {
       bool has_carry = true;
+      round_up_max_blocks = true; // if we're rounding up, we might need to
+                                  // round up the max blocks that are buffered.
       // handle the low block that we're adding
       for (int count = static_cast<int>(block_digits) - 1;
            count >= 0 && has_carry; --count) {
@@ -289,6 +337,8 @@ class FloatWriter {
         } else {
           end_buff[count] += 1;
           has_carry = false;
+          round_up_max_blocks = false; // If the low block isn't all nines, then
+                                       // the max blocks aren't rounded up.
         }
       }
       // handle the high block that's buffered
@@ -333,7 +383,7 @@ class FloatWriter {
     // Either we intend to round down, or the rounding up is complete. Flush the
     // buffers.
 
-    RET_IF_RESULT_NEGATIVE(flush_buffer());
+    RET_IF_RESULT_NEGATIVE(flush_buffer(round_up_max_blocks));
 
     // And then write the final block.
     RET_IF_RESULT_NEGATIVE(writer->write({end_buff, block_digits}));
@@ -571,42 +621,11 @@ LIBC_INLINE int convert_float_decimal_typed(Writer *writer,
           digits /= 10;
         }
         RoundDirection round;
-        // Is m * 10^(additionalDigits + 1) / 2^(-exponent) integer?
-        const int32_t requiredTwos =
-            -(exponent - MANT_WIDTH) - static_cast<int32_t>(precision) - 1;
-        const bool trailingZeros =
-            requiredTwos <= 0 ||
-            (requiredTwos < 60 &&
-             multiple_of_power_of_2(float_bits.get_explicit_mantissa(),
-                                    static_cast<uint32_t>(requiredTwos)));
-        switch (fputil::quick_get_round()) {
-        case FE_TONEAREST:
-          // Round to nearest, if it's exactly halfway then round to even.
-          if (last_digit != 5) {
-            round = last_digit > 5 ? RoundDirection::Up : RoundDirection::Down;
-          } else {
-            round = trailingZeros ? RoundDirection::Even : RoundDirection::Up;
-          }
-          break;
-        case FE_DOWNWARD:
-          if (is_negative && (!trailingZeros || last_digit > 0)) {
-            round = RoundDirection::Up;
-          } else {
-            round = RoundDirection::Down;
-          }
-          break;
-        case FE_UPWARD:
-          if (!is_negative && (!trailingZeros || last_digit > 0)) {
-            round = RoundDirection::Up;
-          } else {
-            round = RoundDirection::Down;
-          }
-          round = is_negative ? RoundDirection::Down : RoundDirection::Up;
-          break;
-        case FE_TOWARDZERO:
-          round = RoundDirection::Down;
-          break;
-        }
+        const bool truncated =
+            !zero_after_digits(exponent - MANT_WIDTH, precision,
+                               float_bits.get_explicit_mantissa());
+        round = get_round_direction(last_digit, truncated, is_negative);
+
         RET_IF_RESULT_NEGATIVE(
             float_writer.write_last_block_dec(digits, maximum, round));
         break;
diff --git a/libc/test/src/stdio/sprintf_test.cpp b/libc/test/src/stdio/sprintf_test.cpp
@@ -13,6 +13,8 @@
 #include "test/UnitTest/RoundingModeUtils.h"
 #include "test/UnitTest/Test.h"
 
+// TODO: Add a comment here explaining the printf format string.
+
 // #include <stdio.h>
 // namespace __llvm_libc {
 // using ::sprintf;
@@ -886,6 +888,8 @@ TEST_F(LlvmLibcSPrintfTest, FloatDecimalConv) {
   double inf = __llvm_libc::fputil::FPBits<double>::inf().get_val();
   double nan = __llvm_libc::fputil::FPBits<double>::build_nan(1);
 
+  char big_buff[10000];
+
   written = __llvm_libc::sprintf(buff, "%f", 1.0);
   ASSERT_STREQ_LEN(written, buff, "1.000000");
 
@@ -958,8 +962,6 @@ TEST_F(LlvmLibcSPrintfTest, FloatDecimalConv) {
                    "99999999999999999996693535322073426194986990198284960792713"
                    "91541752018669482644324418977840117055488.000000");
 
-  char big_buff[10000];
-
   written = __llvm_libc::sprintf(big_buff, "%Lf", 1e1000L);
   ASSERT_STREQ_LEN(
       written, big_buff,
@@ -1281,6 +1283,55 @@ TEST_F(LlvmLibcSPrintfTest, FloatDecimalConv) {
   written = __llvm_libc::sprintf(buff, "%.0f", 0x1.1000000000006p+3);
   ASSERT_STREQ_LEN(written, buff, "9");
 
+  // Most of these tests are checking rounding behavior when the precision is
+  // set. As an example, %.9f has a precision of 9, meaning it should be rounded
+  // to 9 digits after the decimal point. In this case, that means that it
+  // should be rounded up. Many of these tests have precisions divisible by 9
+  // since when printing the floating point numbers are broken up into "blocks"
+  // of 9 digits. They often also have a 5 after the end of what's printed,
+  // since in round to nearest mode, that requires checking additional digits.
+  written = __llvm_libc::sprintf(buff, "%.9f", 1.9999999999999514);
+  ASSERT_STREQ_LEN(written, buff, "2.000000000");
+
+  // The number continues after the literal because floating point numbers can't
+  // represent every value. The printed value is the closest value a double can
+  // represent, rounded to the requested precision.
+  written = __llvm_libc::sprintf(buff, "%.238f", 1.131959884853339E-72);
+  ASSERT_STREQ_LEN(
+      written, buff,
+      "0."
+      "000000000000000000000000000000000000000000000000000000000000000000000001"
+      "131959884853339045938639911360973972585316399767392273697826861241937664"
+      "824105639342441431495119762431744054912109728706985341609159156917030486"
+      "5110665559768676757812");
+
+  written = __llvm_libc::sprintf(buff, "%.36f", 9.9e-77);
+  ASSERT_STREQ_LEN(written, buff, "0.000000000000000000000000000000000000");
+
+  written = __llvm_libc::sprintf(big_buff, "%.1071f", 2.0226568751604562E-314);
+  ASSERT_STREQ_LEN(
+      written, big_buff,
+      "0."
+      "000000000000000000000000000000000000000000000000000000000000000000000000"
+      "000000000000000000000000000000000000000000000000000000000000000000000000"
+      "000000000000000000000000000000000000000000000000000000000000000000000000"
+      "000000000000000000000000000000000000000000000000000000000000000000000000"
+      "000000000000000000000000020226568751604561683387695750739190248658016786"
+      "876938365740768295004457513021760887468117675879956193821375945376632621"
+      "367998639317487303530427946024002091961988296562516210434394107910027236"
+      "308233439098296717697919471698168200340836487924061502604112643734560622"
+      "258525943451473162532620033398739382796482175564084902819878893430369431"
+      "907237673154867595954110791891883281880339550955455702452422857027182100"
+      "606009588295886640782228837851739241290179512817803196347460636150182981"
+      "085084829941917048152725177119574542042352896161225179181967347829576272"
+      "242480201291872969114441104973910102402751449901108484914924879541248714"
+      "939096548775588293353689592872854495101242645279589976452453829724479805"
+      "750016448075109469332839157162950982637994457036256790161132812");
+
+  // If no precision is specified it defaults to 6 for %f.
+  written = __llvm_libc::sprintf(buff, "%f", 2325885.4901960781);
+  ASSERT_STREQ_LEN(written, buff, "2325885.490196");
+
   // Subnormal Precision Tests
 
   written = __llvm_libc::sprintf(buff, "%.310f", 0x1.0p-1022);