Skip to content

Commit cd06b9d

Browse files
[libc] Fix printf %e and %g bugs
Fuzzing revealed bugs in the %e and %g conversions. Since these are very similar, they are grouped together. Again, most of the bugs were related to rounding. As an example, previously the code to check if the number was truncated only worked for digits below the decimal point, due to it being originally designed for %f. This patch adds a mechanism to check the digits above the decimal point for both %e and %g. Reviewed By: sivachandra, lntue Differential Revision: https://reviews.llvm.org/D157536
1 parent b02e73a commit cd06b9d

File tree

2 files changed

+171
-76
lines changed

2 files changed

+171
-76
lines changed

libc/src/stdio/printf_core/float_dec_converter.h

Lines changed: 104 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -406,19 +406,24 @@ class FloatWriter {
406406
}
407407
}
408408

409-
char low_digit;
409+
char low_digit = '0';
410410
if (block_digits > 0) {
411411
low_digit = end_buff[block_digits - 1];
412412
} else if (max_block_count > 0) {
413413
low_digit = '9';
414-
} else {
414+
} else if (buffered_digits > 0) {
415415
low_digit = block_buffer[buffered_digits - 1];
416416
}
417417

418+
bool round_up_max_blocks = false;
419+
418420
// Round up
419421
if (round == RoundDirection::Up ||
420422
(round == RoundDirection::Even && low_digit % 2 != 0)) {
421423
bool has_carry = true;
424+
round_up_max_blocks = true; // if we're rounding up, we might need to
425+
// round up the max blocks that are buffered.
426+
422427
// handle the low block that we're adding
423428
for (int count = static_cast<int>(block_digits) - 1;
424429
count >= 0 && has_carry; --count) {
@@ -427,6 +432,8 @@ class FloatWriter {
427432
} else {
428433
end_buff[count] += 1;
429434
has_carry = false;
435+
round_up_max_blocks = false; // If the low block isn't all nines, then
436+
// the max blocks aren't rounded up.
430437
}
431438
}
432439
// handle the high block that's buffered
@@ -490,7 +497,7 @@ class FloatWriter {
490497
// Either we intend to round down, or the rounding up is complete. Flush the
491498
// buffers.
492499

493-
RET_IF_RESULT_NEGATIVE(flush_buffer());
500+
RET_IF_RESULT_NEGATIVE(flush_buffer(round_up_max_blocks));
494501

495502
// And then write the final block. It's written via the buffer so that if
496503
// this is also the first block, the decimal point will be placed correctly.
@@ -740,58 +747,59 @@ LIBC_INLINE int convert_float_dec_exp_typed(Writer *writer,
740747
last_block_size = IntegerToString<intmax_t>(digits).size();
741748
}
742749

750+
// This tracks if the number is truncated, that meaning that the digits after
751+
// last_digit are non-zero.
752+
bool truncated = false;
753+
743754
// This is the last block.
744755
const size_t maximum = precision + 1 - digits_written;
745756
uint32_t last_digit = 0;
746757
for (uint32_t k = 0; k < last_block_size - maximum; ++k) {
758+
if (last_digit > 0)
759+
truncated = true;
760+
747761
last_digit = digits % 10;
748762
digits /= 10;
749763
}
750764

751765
// If the last block we read doesn't have the digit after the end of what
752766
// we'll print, then we need to read the next block to get that digit.
753767
if (maximum == last_block_size) {
754-
BlockInt extra_block = float_converter.get_block(cur_block - 1);
768+
--cur_block;
769+
BlockInt extra_block = float_converter.get_block(cur_block);
755770
last_digit = extra_block / ((MAX_BLOCK / 10) + 1);
771+
if (extra_block % ((MAX_BLOCK / 10) + 1) > 0) {
772+
truncated = true;
773+
}
756774
}
757775

758776
RoundDirection round;
759-
// Is m * 10^(additionalDigits + 1) / 2^(-exponent) integer?
760-
const int32_t requiredTwos =
761-
-(exponent - MANT_WIDTH) - static_cast<int32_t>(precision) - 1;
762-
const bool trailingZeros =
763-
requiredTwos <= 0 ||
764-
(requiredTwos < 60 &&
765-
multiple_of_power_of_2(float_bits.get_explicit_mantissa(),
766-
static_cast<uint32_t>(requiredTwos)));
767-
switch (fputil::quick_get_round()) {
768-
case FE_TONEAREST:
769-
// Round to nearest, if it's exactly halfway then round to even.
770-
if (last_digit != 5) {
771-
round = last_digit > 5 ? RoundDirection::Up : RoundDirection::Down;
772-
} else {
773-
round = trailingZeros ? RoundDirection::Even : RoundDirection::Up;
774-
}
775-
break;
776-
case FE_DOWNWARD:
777-
if (is_negative && (!trailingZeros || last_digit > 0)) {
778-
round = RoundDirection::Up;
779-
} else {
780-
round = RoundDirection::Down;
777+
778+
// If we've already seen a truncated digit, then we don't need to check any
779+
// more.
780+
if (!truncated) {
781+
// Check the blocks above the decimal point
782+
if (cur_block >= 0) {
783+
// Check every block until the decimal point for non-zero digits.
784+
for (int cur_extra_block = cur_block - 1; cur_extra_block >= 0;
785+
--cur_extra_block) {
786+
BlockInt extra_block = float_converter.get_block(cur_extra_block);
787+
if (extra_block > 0) {
788+
truncated = true;
789+
break;
790+
}
791+
}
781792
}
782-
break;
783-
case FE_UPWARD:
784-
if (!is_negative && (!trailingZeros || last_digit > 0)) {
785-
round = RoundDirection::Up;
786-
} else {
787-
round = RoundDirection::Down;
793+
// If it's still not truncated and there are digits below the decimal point
794+
if (!truncated && exponent - MANT_WIDTH < 0) {
795+
// Use the formula from %f.
796+
truncated =
797+
!zero_after_digits(exponent - MANT_WIDTH, precision - final_exponent,
798+
float_bits.get_explicit_mantissa());
788799
}
789-
round = is_negative ? RoundDirection::Down : RoundDirection::Up;
790-
break;
791-
case FE_TOWARDZERO:
792-
round = RoundDirection::Down;
793-
break;
794800
}
801+
round = get_round_direction(last_digit, truncated, is_negative);
802+
795803
RET_IF_RESULT_NEGATIVE(float_writer.write_last_block_exp(
796804
digits, maximum, round, final_exponent, a + 'E' - 'A'));
797805

@@ -984,71 +992,93 @@ LIBC_INLINE int convert_float_dec_auto_typed(Writer *writer,
984992
}
985993
}
986994

995+
bool truncated = false;
996+
987997
// Find the digit after the lowest digit that we'll actually print to
988998
// determine the rounding.
989999
const uint32_t maximum =
9901000
exp_precision + 1 - static_cast<uint32_t>(digits_checked);
9911001
uint32_t last_digit = 0;
9921002
for (uint32_t k = 0; k < last_block_size - maximum; ++k) {
1003+
if (last_digit > 0)
1004+
truncated = true;
1005+
9931006
last_digit = digits % 10;
9941007
digits /= 10;
9951008
}
9961009

9971010
// If the last block we read doesn't have the digit after the end of what
9981011
// we'll print, then we need to read the next block to get that digit.
9991012
if (maximum == last_block_size) {
1000-
BlockInt extra_block = float_converter.get_block(cur_block - 1);
1013+
--cur_block;
1014+
BlockInt extra_block = float_converter.get_block(cur_block);
10011015
last_digit = extra_block / ((MAX_BLOCK / 10) + 1);
1016+
1017+
if (extra_block % ((MAX_BLOCK / 10) + 1) > 0)
1018+
truncated = true;
10021019
}
10031020

10041021
// TODO: unify this code across the three float conversions.
10051022
RoundDirection round;
1006-
// Is m * 10^(additionalDigits + 1) / 2^(-exponent) integer?
1007-
const int32_t requiredTwos =
1008-
-(exponent - MANT_WIDTH) - static_cast<int32_t>(exp_precision) - 1;
1009-
// TODO: rename this variable to remove confusion with trailing_zeroes
1010-
const bool trailingZeros =
1011-
requiredTwos <= 0 ||
1012-
(requiredTwos < 60 &&
1013-
multiple_of_power_of_2(float_bits.get_explicit_mantissa(),
1014-
static_cast<uint32_t>(requiredTwos)));
1015-
switch (fputil::quick_get_round()) {
1016-
case FE_TONEAREST:
1017-
// Round to nearest, if it's exactly halfway then round to even.
1018-
if (last_digit != 5) {
1019-
round = last_digit > 5 ? RoundDirection::Up : RoundDirection::Down;
1020-
} else {
1021-
round = trailingZeros ? RoundDirection::Even : RoundDirection::Up;
1022-
}
1023-
break;
1024-
case FE_DOWNWARD:
1025-
if (is_negative && (!trailingZeros || last_digit > 0)) {
1026-
round = RoundDirection::Up;
1027-
} else {
1028-
round = RoundDirection::Down;
1023+
1024+
// If we've already seen a truncated digit, then we don't need to check any
1025+
// more.
1026+
if (!truncated) {
1027+
// Check the blocks above the decimal point
1028+
if (cur_block >= 0) {
1029+
// Check every block until the decimal point for non-zero digits.
1030+
for (int cur_extra_block = cur_block - 1; cur_extra_block >= 0;
1031+
--cur_extra_block) {
1032+
BlockInt extra_block = float_converter.get_block(cur_extra_block);
1033+
if (extra_block > 0) {
1034+
truncated = true;
1035+
break;
1036+
}
1037+
}
10291038
}
1030-
break;
1031-
case FE_UPWARD:
1032-
if (!is_negative && (!trailingZeros || last_digit > 0)) {
1033-
round = RoundDirection::Up;
1034-
} else {
1035-
round = RoundDirection::Down;
1039+
// If it's still not truncated and there are digits below the decimal point
1040+
if (!truncated && exponent - MANT_WIDTH < 0) {
1041+
// Use the formula from %f.
1042+
truncated =
1043+
!zero_after_digits(exponent - MANT_WIDTH, exp_precision - base_10_exp,
1044+
float_bits.get_explicit_mantissa());
10361045
}
1037-
round = is_negative ? RoundDirection::Down : RoundDirection::Up;
1038-
break;
1039-
case FE_TOWARDZERO:
1040-
round = RoundDirection::Down;
1041-
break;
10421046
}
10431047

1048+
round = get_round_direction(last_digit, truncated, is_negative);
1049+
10441050
bool round_up;
10451051
if (round == RoundDirection::Up) {
10461052
round_up = true;
10471053
} else if (round == RoundDirection::Down) {
10481054
round_up = false;
10491055
} else {
1050-
// RoundDirection is even, so check the extra digit.
1051-
uint32_t low_digit = digits % 10;
1056+
// RoundDirection is even, so check the lowest digit that will be printed.
1057+
uint32_t low_digit;
1058+
1059+
// maximum is the number of digits that will remain in digits after getting
1060+
// last_digit. If it's greater than zero, we can just check the lowest digit
1061+
// in digits.
1062+
if (maximum > 0) {
1063+
low_digit = digits % 10;
1064+
} else {
1065+
// Else if there are trailing nines, then the low digit is a nine, same
1066+
// with zeroes.
1067+
if (trailing_nines > 0) {
1068+
low_digit = 9;
1069+
} else if (trailing_zeroes > 0) {
1070+
low_digit = 0;
1071+
} else {
1072+
// If there are no trailing zeroes or nines, then the round direction
1073+
// doesn't actually matter here. Since this conversion passes off the
1074+
// value to another one for final conversion, rounding only matters to
1075+
// determine if the exponent is higher than expected (with an all nine
1076+
// number) or to determine the trailing zeroes to trim. In this case
1077+
// low_digit is set to 0, but it could be set to any number.
1078+
1079+
low_digit = 0;
1080+
}
1081+
}
10521082
round_up = (low_digit % 2) != 0;
10531083
}
10541084

libc/test/src/stdio/sprintf_test.cpp

Lines changed: 67 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -888,7 +888,8 @@ TEST_F(LlvmLibcSPrintfTest, FloatDecimalConv) {
888888
double inf = __llvm_libc::fputil::FPBits<double>::inf().get_val();
889889
double nan = __llvm_libc::fputil::FPBits<double>::build_nan(1);
890890

891-
char big_buff[10000];
891+
char big_buff[10000]; // Used for long doubles and other extremely wide
892+
// numbers.
892893

893894
written = __llvm_libc::sprintf(buff, "%f", 1.0);
894895
ASSERT_STREQ_LEN(written, buff, "1.000000");
@@ -1696,6 +1697,14 @@ TEST_F(LlvmLibcSPrintfTest, FloatExponentConv) {
16961697

16971698
// Length Modifier Tests.
16981699

1700+
#if defined(SPECIAL_X86_LONG_DOUBLE)
1701+
written = __llvm_libc::sprintf(buff, "%.9Le", 1000000000500000000.1L);
1702+
ASSERT_STREQ_LEN(written, buff, "1.000000001e+18");
1703+
1704+
written = __llvm_libc::sprintf(buff, "%.9Le", 1000000000500000000.0L);
1705+
ASSERT_STREQ_LEN(written, buff, "1.000000000e+18");
1706+
#endif
1707+
16991708
// TODO: Fix long doubles (needs bigger table or alternate algorithm.)
17001709
// Currently the table values are generated, which is very slow.
17011710
/*
@@ -1908,6 +1917,45 @@ TEST_F(LlvmLibcSPrintfTest, FloatExponentConv) {
19081917
written = __llvm_libc::sprintf(buff, "%.5e", 1.008e3);
19091918
ASSERT_STREQ_LEN(written, buff, "1.00800e+03");
19101919

1920+
// These tests also focus on rounding. Almost all of them have a 5 right after
1921+
// the printed string (e.g. 9.5 with precision 0 prints 0 digits after the
1922+
// decimal point). This is again because rounding a number with a 5 after the
1923+
// printed section means that more digits have to be checked to determine if
1924+
// this should be rounded up (if there are non-zero digits after the 5) or to
1925+
// even (if the 5 is the last non-zero digit). Additionally, the algorithm for
1926+
// checking if a number is all 0s after the decimal point may not work since
1927+
// the decimal point moves in this representation.
1928+
written = __llvm_libc::sprintf(buff, "%.0e", 2.5812229360061737E+200);
1929+
ASSERT_STREQ_LEN(written, buff, "3e+200");
1930+
1931+
written = __llvm_libc::sprintf(buff, "%.1e", 9.059E+200);
1932+
ASSERT_STREQ_LEN(written, buff, "9.1e+200");
1933+
1934+
written = __llvm_libc::sprintf(buff, "%.0e", 9.059E+200);
1935+
ASSERT_STREQ_LEN(written, buff, "9e+200");
1936+
1937+
written = __llvm_libc::sprintf(buff, "%.166e", 1.131959884853339E-72);
1938+
ASSERT_STREQ_LEN(written, buff,
1939+
"1."
1940+
"13195988485333904593863991136097397258531639976739227369782"
1941+
"68612419376648241056393424414314951197624317440549121097287"
1942+
"069853416091591569170304865110665559768676757812e-72");
1943+
1944+
written = __llvm_libc::sprintf(buff, "%.0e", 9.5);
1945+
ASSERT_STREQ_LEN(written, buff, "1e+01");
1946+
1947+
written = __llvm_libc::sprintf(buff, "%.10e", 1.9999999999890936);
1948+
ASSERT_STREQ_LEN(written, buff, "2.0000000000e+00");
1949+
1950+
written = __llvm_libc::sprintf(buff, "%.1e", 745362143563.03894);
1951+
ASSERT_STREQ_LEN(written, buff, "7.5e+11");
1952+
1953+
written = __llvm_libc::sprintf(buff, "%.0e", 45181042688.0);
1954+
ASSERT_STREQ_LEN(written, buff, "5e+10");
1955+
1956+
written = __llvm_libc::sprintf(buff, "%.35e", 1.3752441369139243);
1957+
ASSERT_STREQ_LEN(written, buff, "1.37524413691392433101157166674965993e+00");
1958+
19111959
// Subnormal Precision Tests
19121960

19131961
written = __llvm_libc::sprintf(buff, "%.310e", 0x1.0p-1022);
@@ -2512,8 +2560,25 @@ TEST_F(LlvmLibcSPrintfTest, FloatAutoConv) {
25122560
written = __llvm_libc::sprintf(buff, "%.15g", 22.25);
25132561
ASSERT_STREQ_LEN(written, buff, "22.25");
25142562

2515-
// Subnormal Precision Tests
2563+
// These tests also focus on rounding, but only in how it relates to the base
2564+
// 10 exponent. The %g conversion selects between being a %f or %e conversion
2565+
// based on what the exponent would be if it was %e. If we call the precision
2566+
// P (equal to 6 if the precision is not set, 0 if the provided precision is
2567+
// 0, and provided precision - 1 otherwise) and the exponent X, then the style
2568+
// is %f with an effective precision of P - X + 1 if P > X >= -4, else the
2569+
// style is %e with effective precision P - 1. Additionally, it attempts to
2570+
// trim zeros that would be displayed after the decimal point.
2571+
written = __llvm_libc::sprintf(buff, "%.1g", 9.059E+200);
2572+
ASSERT_STREQ_LEN(written, buff, "9e+200");
2573+
2574+
written = __llvm_libc::sprintf(buff, "%.2g", 9.059E+200);
2575+
ASSERT_STREQ_LEN(written, buff, "9.1e+200");
25162576

2577+
// For this test, P = 0 and X = 1, so P > X >= -4 is false, giving a %e style.
2578+
written = __llvm_libc::sprintf(buff, "%.0g", 9.5);
2579+
ASSERT_STREQ_LEN(written, buff, "1e+01");
2580+
2581+
// Subnormal Precision Tests
25172582
written = __llvm_libc::sprintf(buff, "%.310g", 0x1.0p-1022);
25182583
ASSERT_STREQ_LEN(
25192584
written, buff,

0 commit comments

Comments
 (0)