Skip to content

Commit 9b5e00f

Browse files
dpwedhalbert
authored andcommitted
py/formatfloat: Format all whole-number floats exactly.
Formerly, py/formatfloat would print whole numbers inaccurately with nonzero digits beyond the decimal place. This resulted from its strategy of successive scaling of the argument by 0.1 which cannot be exactly represented in floating point. The change in this commit avoids scaling until the value is smaller than 1, so all whole numbers print with zero fractional part. Fixes issue #4212. Signed-off-by: Dan Ellis [email protected]
1 parent d95cfc3 commit 9b5e00f

File tree

7 files changed

+153
-54
lines changed

7 files changed

+153
-54
lines changed

py/formatfloat.c

Lines changed: 100 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
*/
2626

2727
#include "py/mpconfig.h"
28+
#include "py/misc.h"
2829
#if MICROPY_FLOAT_IMPL != MICROPY_FLOAT_IMPL_NONE
2930

3031
#include <assert.h>
@@ -96,7 +97,16 @@ static inline int fp_isless1(float x) {
9697
#define fp_iszero(x) (x == 0)
9798
#define fp_isless1(x) (x < 1.0)
9899

99-
#endif
100+
#endif // MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT/DOUBLE
101+
102+
static inline int fp_ge_eps(FPTYPE x, FPTYPE y) {
103+
mp_float_union_t fb_y = {y};
104+
// Back off 2 eps.
105+
// This is valid for almost all values, but in practice
106+
// it's only used when y = 1eX for X>=0.
107+
fb_y.i -= 2;
108+
return x >= fb_y.f;
109+
}
100110

101111
static const FPTYPE g_pos_pow[] = {
102112
#if FPDECEXP > 32
@@ -173,6 +183,7 @@ int mp_format_float(FPTYPE f, char *buf, size_t buf_size, char fmt, int prec, ch
173183
int num_digits = 0;
174184
const FPTYPE *pos_pow = g_pos_pow;
175185
const FPTYPE *neg_pow = g_neg_pow;
186+
int signed_e = 0;
176187

177188
if (fp_iszero(f)) {
178189
e = 0;
@@ -192,40 +203,32 @@ int mp_format_float(FPTYPE f, char *buf, size_t buf_size, char fmt, int prec, ch
192203
}
193204
}
194205
} else if (fp_isless1(f)) {
195-
// We need to figure out what an integer digit will be used
196-
// in case 'f' is used (or we revert other format to it below).
197-
// As we just tested number to be <1, this is obviously 0,
198-
// but we can round it up to 1 below.
199-
char first_dig = '0';
200-
if (f >= FPROUND_TO_ONE) {
201-
first_dig = '1';
202-
}
203-
206+
FPTYPE f_mod = f;
204207
// Build negative exponent
205208
for (e = 0, e1 = FPDECEXP; e1; e1 >>= 1, pos_pow++, neg_pow++) {
206-
if (*neg_pow > f) {
209+
if (*neg_pow > f_mod) {
207210
e += e1;
208-
f *= *pos_pow;
211+
f_mod *= *pos_pow;
209212
}
210213
}
214+
211215
char e_sign_char = '-';
212-
if (fp_isless1(f) && f >= FPROUND_TO_ONE) {
213-
f = FPCONST(1.0);
216+
if (fp_isless1(f_mod) && f_mod >= FPROUND_TO_ONE) {
217+
f_mod = FPCONST(1.0);
214218
if (e == 0) {
215219
e_sign_char = '+';
216220
}
217-
} else if (fp_isless1(f)) {
221+
} else if (fp_isless1(f_mod)) {
218222
e++;
219-
f *= FPCONST(10.0);
223+
f_mod *= FPCONST(10.0);
220224
}
221225

222226
// If the user specified 'g' format, and e is <= 4, then we'll switch
223227
// to the fixed format ('f')
224228

225229
if (fmt == 'f' || (fmt == 'g' && e <= 4)) {
226230
fmt = 'f';
227-
dec = -1;
228-
*s++ = first_dig;
231+
dec = 0;
229232

230233
if (org_fmt == 'g') {
231234
prec += (e - 1);
@@ -237,13 +240,8 @@ int mp_format_float(FPTYPE f, char *buf, size_t buf_size, char fmt, int prec, ch
237240
}
238241

239242
num_digits = prec;
240-
if (num_digits) {
241-
*s++ = '.';
242-
while (--e && num_digits) {
243-
*s++ = '0';
244-
num_digits--;
245-
}
246-
}
243+
signed_e = 0;
244+
++num_digits;
247245
} else {
248246
// For e & g formats, we'll be printing the exponent, so set the
249247
// sign.
@@ -256,22 +254,29 @@ int mp_format_float(FPTYPE f, char *buf, size_t buf_size, char fmt, int prec, ch
256254
prec++;
257255
}
258256
}
257+
signed_e = -e;
259258
}
260259
} else {
261-
// Build positive exponent
262-
for (e = 0, e1 = FPDECEXP; e1; e1 >>= 1, pos_pow++, neg_pow++) {
263-
if (*pos_pow <= f) {
260+
// Build positive exponent.
261+
// We don't modify f at this point to avoid innaccuracies from
262+
// scaling it. Instead, we find the product of powers of 10
263+
// that is not greater than it, and use that to start the
264+
// mantissa.
265+
FPTYPE u_base = FPCONST(1.0);
266+
for (e = 0, e1 = FPDECEXP; e1; e1 >>= 1, pos_pow++) {
267+
FPTYPE next_u = u_base * *pos_pow;
268+
// fp_ge_eps performs "f >= (next_u - 2eps)" so that if, for
269+
// numerical reasons, f is very close to a power of ten but
270+
// not strictly equal, we still treat it as that power of 10.
271+
// The comparison was failing for maybe 10% of 1eX values, but
272+
// although rounding fixed many of them, there were still some
273+
// rendering as 9.99999998e(X-1).
274+
if (fp_ge_eps(f, next_u)) {
275+
u_base = next_u;
264276
e += e1;
265-
f *= *neg_pow;
266277
}
267278
}
268279

269-
// It can be that f was right on the edge of an entry in pos_pow needs to be reduced
270-
if ((int)f >= 10) {
271-
e += 1;
272-
f *= FPCONST(0.1);
273-
}
274-
275280
// If the user specified fixed format (fmt == 'f') and e makes the
276281
// number too big to fit into the available buffer, then we'll
277282
// switch to the 'e' format.
@@ -310,15 +315,15 @@ int mp_format_float(FPTYPE f, char *buf, size_t buf_size, char fmt, int prec, ch
310315
} else {
311316
e_sign = '+';
312317
}
318+
signed_e = e;
313319
}
314320
if (prec < 0) {
315321
// This can happen when the prec is trimmed to prevent buffer overflow
316322
prec = 0;
317323
}
318324

319-
// We now have num.f as a floating point number between >= 1 and < 10
320-
// (or equal to zero), and e contains the absolute value of the power of
321-
// 10 exponent. and (dec + 1) == the number of dgits before the decimal.
325+
// At this point e contains the absolute value of the power of 10 exponent.
326+
// (dec + 1) == the number of dgits before the decimal.
322327

323328
// For e, prec is # digits after the decimal
324329
// For f, prec is # digits after the decimal
@@ -336,25 +341,63 @@ int mp_format_float(FPTYPE f, char *buf, size_t buf_size, char fmt, int prec, ch
336341
num_digits = prec;
337342
}
338343

339-
// Print the digits of the mantissa
340-
for (int i = 0; i < num_digits; ++i, --dec) {
341-
int32_t d = (int32_t)f;
342-
if (d < 0) {
343-
*s++ = '0';
344-
} else {
344+
if (signed_e < 0) {
345+
// The algorithm below treats numbers smaller than 1 by scaling them
346+
// repeatedly by 10 to bring the new digit to the top. Our input number
347+
// was smaller than 1, so scale it up to be 1 <= f < 10.
348+
FPTYPE u_base = FPCONST(1.0);
349+
const FPTYPE *pow_u = g_pos_pow;
350+
for (int m = FPDECEXP; m; m >>= 1, pow_u++) {
351+
if (m & e) {
352+
u_base *= *pow_u;
353+
}
354+
}
355+
f *= u_base;
356+
}
357+
358+
int d = 0;
359+
int num_digits_left = num_digits;
360+
for (int digit_index = signed_e; num_digits_left >= 0; --digit_index) {
361+
FPTYPE u_base = FPCONST(1.0);
362+
if (digit_index > 0) {
363+
// Generate 10^digit_index for positive digit_index.
364+
const FPTYPE *pow_u = g_pos_pow;
365+
int target_index = digit_index;
366+
for (int m = FPDECEXP; m; m >>= 1, pow_u++) {
367+
if (m & target_index) {
368+
u_base *= *pow_u;
369+
}
370+
}
371+
}
372+
for (d = 0; d < 9; ++d) {
373+
// This is essentially "if (f < u_base)", but with 2eps margin
374+
// so that if f is just a tiny bit smaller, we treat it as
375+
// equal (and accept the additional digit value).
376+
if (!fp_ge_eps(f, u_base)) {
377+
break;
378+
}
379+
f -= u_base;
380+
}
381+
// We calculate one more digit than we display, to use in rounding
382+
// below. So only emit the digit if it's one that we display.
383+
if (num_digits_left > 0) {
384+
// Emit this number (the leading digit).
345385
*s++ = '0' + d;
386+
if (dec == 0 && prec > 0) {
387+
*s++ = '.';
388+
}
346389
}
347-
if (dec == 0 && prec > 0) {
348-
*s++ = '.';
390+
--dec;
391+
--num_digits_left;
392+
if (digit_index <= 0) {
393+
// Once we get below 1.0, we scale up f instead of calculting
394+
// negative powers of 10 in u_base. This provides better
395+
// renditions of exact decimals like 1/16 etc.
396+
f *= FPCONST(10.0);
349397
}
350-
f -= (FPTYPE)d;
351-
f *= FPCONST(10.0);
352398
}
353-
354-
// Round
355-
// If we print non-exponential format (i.e. 'f'), but a digit we're going
356-
// to round by (e) is too far away, then there's nothing to round.
357-
if ((org_fmt != 'f' || e <= num_digits) && f >= FPCONST(5.0)) {
399+
// Rounding. If the next digit to print is >= 5, round up.
400+
if (d >= 5) {
358401
char *rs = s;
359402
rs--;
360403
while (1) {
@@ -394,7 +437,10 @@ int mp_format_float(FPTYPE f, char *buf, size_t buf_size, char fmt, int prec, ch
394437
}
395438
} else {
396439
// Need at extra digit at the end to make room for the leading '1'
397-
s++;
440+
// but if we're at the buffer size limit, just drop the final digit.
441+
if ((size_t)(s + 1 - buf) < buf_size) {
442+
s++;
443+
}
398444
}
399445
char *ss = s;
400446
while (ss > rs) {

tests/float/float_format_ftoe.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# check a case where rounding was suppressed inappropriately when "f" was
2+
# promoted to "e" for large numbers.
3+
v = 8.888e32
4+
print("%.2f" % v) # '%.2f' format with e32 becomes '%.2e', expect 8.89e+32.

tests/float/float_format_ftoe.py.exp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
8.89e+32

tests/float/float_format_ints.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Test that integers format to exact values.
2+
3+
for b in [13, 123, 457, 23456]:
4+
for r in range(1, 10):
5+
e_fmt = "{:." + str(r) + "e}"
6+
f_fmt = "{:." + str(r) + "f}"
7+
g_fmt = "{:." + str(r) + "g}"
8+
for e in range(0, 5):
9+
f = b * (10**e)
10+
title = str(b) + " x 10^" + str(e)
11+
print(title, "with format", e_fmt, "gives", e_fmt.format(f))
12+
print(title, "with format", f_fmt, "gives", f_fmt.format(f))
13+
print(title, "with format", g_fmt, "gives", g_fmt.format(f))
14+
15+
# Check that powers of 10 (that fit in float32) format correctly.
16+
for i in range(31):
17+
# It works to 12 digits on all platforms *except* qemu-arm, where
18+
# 10^11 comes out as 10000000820 or something.
19+
print("{:.7g}".format(float("1e" + str(i))))
20+
21+
# 16777215 is 2^24 - 1, the largest integer that can be completely held
22+
# in a float32.
23+
print("{:f}".format(16777215))
24+
# 4294967040 = 16777215 * 128 is the largest integer that is exactly
25+
# represented by a float32 and that will also fit within a (signed) int32.
26+
# The upper bound of our integer-handling code is actually double this,
27+
# but that constant might cause trouble on systems using 32 bit ints.
28+
print("{:f}".format(2147483520))
29+
# Very large positive integers can be a test for precision and resolution.
30+
# This is a weird way to represent 1e38 (largest power of 10 for float32).
31+
print("{:.6e}".format(float("9" * 30 + "e8")))
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# Test formatting of very large ints.
2+
# Relies on double-precision floats.
3+
4+
import array
5+
import sys
6+
7+
# Challenging way to express 1e200 and 1e100.
8+
print("{:.12e}".format(float("9" * 400 + "e-200")))
9+
print("{:.12e}".format(float("9" * 400 + "e-300")))
10+
11+
# These correspond to the binary representation of 1e200 in float64s:
12+
v1 = 0x54B249AD2594C37D # 1e100
13+
v2 = 0x6974E718D7D7625A # 1e200
14+
print("{:.12e}".format(array.array("d", v1.to_bytes(8, sys.byteorder))[0]))
15+
print("{:.12e}".format(array.array("d", v2.to_bytes(8, sys.byteorder))[0]))

tests/run-tests.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,7 @@ def run_tests(pyb, tests, args, result_dir, num_threads=1):
426426
if upy_float_precision < 64:
427427
skip_tests.add("float/float_divmod.py") # tested by float/float_divmod_relaxed.py instead
428428
skip_tests.add("float/float2int_doubleprec_intbig.py")
429+
skip_tests.add("float/float_format_ints_doubleprec.py")
429430
skip_tests.add("float/float_parse_doubleprec.py")
430431

431432
if not has_complex:

tools/tinytest-codegen.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ def script_to_map(test_file):
100100
"float/float_divmod.py",
101101
# requires double precision floating point to work
102102
"float/float2int_doubleprec_intbig.py",
103+
"float/float_format_ints_doubleprec.py",
103104
"float/float_parse_doubleprec.py",
104105
# inline asm FP tests (require Cortex-M4)
105106
"inlineasm/asmfpaddsub.py",

0 commit comments

Comments
 (0)