Skip to content

Commit 110e513

Browse files
authored
Float16 optimal formatting (#30862)
Extend SwiftDtoa to provide optimal formatting for Float16 and use that for `Float16.description` and `Float16.debugDescription`. Notes on signaling NaNs: LLVM's Float16 support passes Float16s on x86 by legalizing to Float32. This works well for most purposes but incidentally loses the signaling marker from any NaN (because it's a conversion as far as the hardware is concerned), with a side effect that the print code never actually sees a true sNaN. This is similar to what happens with Float and Double on i386 backends. The earlier code here tried to detect sNaN in a different way, but that approach isn't guaranteed to work so we decided to make this code use the correct detection logic -- sNaN printing will just be broken until we can get a better argument passing convention. Resolves rdar://61414101
1 parent 6d08ec3 commit 110e513

File tree

4 files changed

+389
-5
lines changed

4 files changed

+389
-5
lines changed

include/swift/Runtime/SwiftDtoa.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323

2424
// Essentially all modern platforms use IEEE 754 floating point
2525
// types now, so enable these by default:
26+
#define SWIFT_DTOA_FLOAT16_SUPPORT 1
2627
#define SWIFT_DTOA_FLOAT_SUPPORT 1
2728
#define SWIFT_DTOA_DOUBLE_SUPPORT 1
2829

@@ -102,6 +103,14 @@ int swift_decompose_double(double d,
102103
size_t swift_format_double(double, char *dest, size_t length);
103104
#endif
104105

106+
#if SWIFT_DTOA_FLOAT16_SUPPORT
107+
// See swift_decompose_double. `digits_length` must be at least 5.
108+
int swift_decompose_float16(const __fp16 *f,
109+
int8_t *digits, size_t digits_length, int *decimalExponent);
110+
// See swift_format_double.
111+
size_t swift_format_float16(const __fp16 *, char *dest, size_t length);
112+
#endif
113+
105114
#if SWIFT_DTOA_FLOAT_SUPPORT
106115
// See swift_decompose_double. `digits_length` must be at least 9.
107116
int swift_decompose_float(float f,

stdlib/public/runtime/SwiftDtoa.cpp

Lines changed: 227 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -624,9 +624,183 @@ int swift_decompose_double(double d,
624624
}
625625
#endif
626626

627+
#if SWIFT_DTOA_FLOAT16_SUPPORT
628+
// Decompose an IEEE 754 binary16 half-precision float
629+
// into decimal digits and a corresponding decimal exponent.
630+
631+
// See swift_decompose_double for detailed comments on the algorithm here
632+
// This can almost certainly be simplified a great deal. This
633+
// first iteration just copies the code from float.
634+
int swift_decompose_float16(const __fp16 *f,
635+
int8_t *digits, size_t digits_length, int *decimalExponent)
636+
{
637+
static const int significandBitCount = 10;
638+
static const uint32_t significandMask
639+
= ((uint32_t)1 << significandBitCount) - 1;
640+
static const int exponentBitCount = 5;
641+
static const int exponentMask = (1 << exponentBitCount) - 1;
642+
// See comments in swift_decompose_double
643+
static const int64_t exponentBias = (1 << (exponentBitCount - 1)) - 2; // 14
644+
645+
// Step 0: Deconstruct IEEE 754 binary16 format
646+
uint32_t raw = *(const uint16_t *)f;
647+
int exponentBitPattern = (raw >> significandBitCount) & exponentMask;
648+
uint32_t significandBitPattern = raw & significandMask;
649+
650+
// Step 1: Handle the various input cases:
651+
int binaryExponent;
652+
uint32_t significand;
653+
if (digits_length < 5) {
654+
// Ensure we have space for 5 digits
655+
return 0;
656+
} else if (exponentBitPattern == exponentMask) { // NaN or Infinity
657+
// Return no digits
658+
return 0;
659+
} else if (exponentBitPattern == 0) {
660+
if (significandBitPattern == 0) { // Zero
661+
// Return one zero digit and decimalExponent = 0.
662+
digits[0] = 0;
663+
*decimalExponent = 0;
664+
return 1;
665+
} else { // Subnormal
666+
binaryExponent = 1 - exponentBias;
667+
significand = significandBitPattern << (32 - significandBitCount - 1);
668+
}
669+
} else { // normal
670+
binaryExponent = exponentBitPattern - exponentBias;
671+
uint32_t hiddenBit = (uint32_t)1 << (uint32_t)significandBitCount;
672+
uint32_t fullSignificand = significandBitPattern + hiddenBit;
673+
significand = fullSignificand << (32 - significandBitCount - 1);
674+
}
675+
676+
// These numbers will typically get printed as 4- or 5-digit
677+
// integers anyway, so we may as well provide that many digits,
678+
// even though that's technically more digits than necessary.
679+
if (binaryExponent >= 13) {
680+
uint16_t intval = significand >> (32 - binaryExponent);
681+
int8_t *digit_p = digits;
682+
if (intval > 9999) {
683+
*digit_p++ = intval / 10000;
684+
}
685+
digit_p[0] = (intval / 1000) % 10;
686+
digit_p[1] = (intval / 100) % 10;
687+
digit_p[2] = (intval / 10) % 10;
688+
digit_p[3] = intval % 10;
689+
int digit_count = digit_p + 4 - digits;
690+
*decimalExponent = digit_count;
691+
return digit_count;
692+
}
693+
694+
// Step 2: Determine the exact unscaled target interval
695+
static const uint32_t halfUlp = (uint32_t)1 << (32 - significandBitCount - 2);
696+
uint32_t upperMidpointExact = significand + halfUlp;
697+
698+
int isBoundary = significandBitPattern == 0;
699+
static const uint32_t quarterUlp = halfUlp >> 1;
700+
uint32_t lowerMidpointExact
701+
= significand - (isBoundary ? quarterUlp : halfUlp);
702+
703+
// Step 3: Estimate the base 10 exponent
704+
int base10Exponent = decimalExponentFor2ToThe(binaryExponent);
705+
706+
// Step 4: Compute a power-of-10 scale factor
707+
uint64_t powerOfTenRoundedDown = 0;
708+
uint64_t powerOfTenRoundedUp = 0;
709+
int powerOfTenExponent = 0;
710+
intervalContainingPowerOf10_Float(-base10Exponent,
711+
&powerOfTenRoundedDown,
712+
&powerOfTenRoundedUp,
713+
&powerOfTenExponent);
714+
const int extraBits = binaryExponent + powerOfTenExponent;
715+
716+
// Step 5: Scale the interval (with rounding)
717+
static const int integerBits = 5;
718+
const int shift = integerBits - extraBits;
719+
const int roundUpBias = (1 << shift) - 1;
720+
static const int fractionBits = 64 - integerBits;
721+
uint64_t u, l;
722+
if (significandBitPattern & 1) {
723+
// Narrow the interval (odd significand)
724+
uint64_t u1 = multiply64x32RoundingDown(powerOfTenRoundedDown,
725+
upperMidpointExact);
726+
u = u1 >> shift; // Rounding down
727+
728+
uint64_t l1 = multiply64x32RoundingUp(powerOfTenRoundedUp,
729+
lowerMidpointExact);
730+
l = (l1 + roundUpBias) >> shift; // Rounding Up
731+
} else {
732+
// Widen the interval (even significand)
733+
uint64_t u1 = multiply64x32RoundingUp(powerOfTenRoundedUp,
734+
upperMidpointExact);
735+
u = (u1 + roundUpBias) >> shift; // Rounding Up
736+
737+
uint64_t l1 = multiply64x32RoundingDown(powerOfTenRoundedDown,
738+
lowerMidpointExact);
739+
l = l1 >> shift; // Rounding down
740+
}
741+
742+
// Step 6: Align first digit, adjust exponent
743+
// In particular, this prunes leading zeros from subnormals
744+
static const uint64_t fixedPointOne = (uint64_t)1 << fractionBits;
745+
static const uint64_t fixedPointMask = fixedPointOne - 1;
746+
uint64_t t = u;
747+
uint64_t delta = u - l;
748+
int exponent = base10Exponent + 1;
749+
750+
while (t < fixedPointOne) {
751+
exponent -= 1;
752+
delta *= 10;
753+
t *= 10;
754+
}
755+
756+
// Step 7: Generate digits
757+
int8_t *digit_p = digits;
758+
int nextDigit = (int)(t >> fractionBits);
759+
t &= fixedPointMask;
760+
761+
// Generate one digit at a time...
762+
while (t > delta) {
763+
*digit_p++ = nextDigit;
764+
delta *= 10;
765+
t *= 10;
766+
nextDigit = (int)(t >> fractionBits);
767+
t &= fixedPointMask;
768+
}
769+
770+
// Adjust the final digit to be closer to the original value
771+
if (delta > t + fixedPointOne) {
772+
uint64_t skew;
773+
if (isBoundary) {
774+
skew = delta - delta / 3 - t;
775+
} else {
776+
skew = delta / 2 - t;
777+
}
778+
uint64_t one = (uint64_t)(1) << (64 - integerBits);
779+
uint64_t lastAccurateBit = 1ULL << 24;
780+
uint64_t fractionMask = (one - 1) & ~(lastAccurateBit - 1);
781+
uint64_t oneHalf = one >> 1;
782+
if (((skew + (lastAccurateBit >> 1)) & fractionMask) == oneHalf) {
783+
// If the skew is exactly integer + 1/2, round the last
784+
// digit even after adjustment
785+
int adjust = (int)(skew >> (64 - integerBits));
786+
nextDigit = (nextDigit - adjust) & ~1;
787+
} else {
788+
// Else round to nearest...
789+
int adjust = (int)((skew + oneHalf) >> (64 - integerBits));
790+
nextDigit = (nextDigit - adjust);
791+
}
792+
}
793+
*digit_p++ = nextDigit;
794+
795+
*decimalExponent = exponent;
796+
return digit_p - digits;
797+
}
798+
#endif
799+
800+
627801
#if SWIFT_DTOA_FLOAT_SUPPORT
628802
// Return raw bits encoding the float
629-
static uint64_t bitPatternForFloat(float f) {
803+
static uint32_t bitPatternForFloat(float f) {
630804
union { float f; uint32_t u; } converter;
631805
converter.f = f;
632806
return converter.u;
@@ -982,7 +1156,7 @@ int swift_decompose_float80(long double d,
9821156
// These handle various exception cases (infinity, Nan, zero)
9831157
// before invoking the general base-10 conversion.
9841158

985-
#if SWIFT_DTOA_FLOAT_SUPPORT || SWIFT_DTOA_DOUBLE_SUPPORT || SWIFT_DTOA_FLOAT80_SUPPORT
1159+
#if SWIFT_DTOA_FLOAT16_SUPPORT || SWIFT_DTOA_FLOAT_SUPPORT || SWIFT_DTOA_DOUBLE_SUPPORT || SWIFT_DTOA_FLOAT80_SUPPORT
9861160
static size_t swift_format_constant(char *dest, size_t length, const char *s) {
9871161
const size_t l = strlen(s);
9881162
if (length <= l) {
@@ -993,6 +1167,57 @@ static size_t swift_format_constant(char *dest, size_t length, const char *s) {
9931167
}
9941168
#endif
9951169

1170+
#if SWIFT_DTOA_FLOAT16_SUPPORT
1171+
size_t swift_format_float16(const __fp16 *d, char *dest, size_t length)
1172+
{
1173+
uint16_t raw = *(const uint16_t *)d;
1174+
if ((raw & 0x7c00) == 0x7c00) { // Infinite or NaN
1175+
if (raw == 0x7c00) {
1176+
return swift_format_constant(dest, length, "inf");
1177+
} else if (raw == 0xfc00) {
1178+
return swift_format_constant(dest, length, "-inf");
1179+
} else {
1180+
// NaN
1181+
static const int significandBitCount = 10;
1182+
const char *sign = (raw & 0x8000) ? "-" : "";
1183+
const char *signaling = ((raw >> (significandBitCount - 1)) & 1) ? "" : "s";
1184+
uint32_t payload = raw & ((1L << (significandBitCount - 2)) - 1);
1185+
char buff[32];
1186+
if (payload != 0) {
1187+
snprintf(buff, sizeof(buff), "%s%snan(0x%x)",
1188+
sign, signaling, payload);
1189+
} else {
1190+
snprintf(buff, sizeof(buff), "%s%snan",
1191+
sign, signaling);
1192+
}
1193+
return swift_format_constant(dest, length, buff);
1194+
}
1195+
}
1196+
1197+
// zero
1198+
if (raw == 0x8000) {
1199+
return swift_format_constant(dest, length, "-0.0");
1200+
}
1201+
if (raw == 0x0000) {
1202+
return swift_format_constant(dest, length, "0.0");
1203+
}
1204+
1205+
// Decimal numeric formatting
1206+
int decimalExponent;
1207+
int8_t digits[9];
1208+
bool negative = raw & 0x8000;
1209+
int digitCount =
1210+
swift_decompose_float16(d, digits, sizeof(digits), &decimalExponent);
1211+
if (decimalExponent < -3) {
1212+
return swift_format_exponential(dest, length, negative,
1213+
digits, digitCount, decimalExponent);
1214+
} else {
1215+
return swift_format_decimal(dest, length, negative,
1216+
digits, digitCount, decimalExponent);
1217+
}
1218+
}
1219+
#endif
1220+
9961221
#if SWIFT_DTOA_FLOAT_SUPPORT
9971222
size_t swift_format_float(float d, char *dest, size_t length)
9981223
{

stdlib/public/stubs/Stubs.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,8 @@ static uint64_t swift_floatingPointToString(char *Buffer, size_t BufferLength,
269269
SWIFT_CC(swift) SWIFT_RUNTIME_STDLIB_API
270270
__swift_ssize_t swift_float16ToString(char *Buffer, size_t BufferLength,
271271
float Value, bool Debug) {
272-
return swift_format_float(Value, Buffer, BufferLength);
272+
__fp16 v = Value;
273+
return swift_format_float16(&v, Buffer, BufferLength);
273274
}
274275

275276
SWIFT_CC(swift) SWIFT_RUNTIME_STDLIB_API
@@ -546,4 +547,3 @@ int swift::_swift_stdlib_putc_stderr(int C) {
546547
size_t swift::_swift_stdlib_getHardwareConcurrency() {
547548
return std::thread::hardware_concurrency();
548549
}
549-

0 commit comments

Comments
 (0)