Skip to content

Commit 2906653

Browse files
committed
Float16 optimal formatting
Extend SwiftDtoa to provide optimal formatting for Float16 and use that for `Float16.description` and `Float16.debugDescription`. Resolves rdar://61414101
1 parent 4264b39 commit 2906653

File tree

4 files changed

+382
-5
lines changed

4 files changed

+382
-5
lines changed

include/swift/Runtime/SwiftDtoa.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323

2424
// Essentially all modern platforms use IEEE 754 floating point
2525
// types now, so enable these by default:
26+
#define SWIFT_DTOA_FLOAT16_SUPPORT 1
2627
#define SWIFT_DTOA_FLOAT_SUPPORT 1
2728
#define SWIFT_DTOA_DOUBLE_SUPPORT 1
2829

@@ -102,6 +103,14 @@ int swift_decompose_double(double d,
102103
size_t swift_format_double(double, char *dest, size_t length);
103104
#endif
104105

106+
#if SWIFT_DTOA_FLOAT16_SUPPORT
107+
// See swift_decompose_double. `digits_length` must be at least 9.
108+
int swift_decompose_float16(const __fp16 *f,
109+
int8_t *digits, size_t digits_length, int *decimalExponent);
110+
// See swift_format_double.
111+
size_t swift_format_float16(const __fp16 *, char *dest, size_t length);
112+
#endif
113+
105114
#if SWIFT_DTOA_FLOAT_SUPPORT
106115
// See swift_decompose_double. `digits_length` must be at least 9.
107116
int swift_decompose_float(float f,

stdlib/public/runtime/SwiftDtoa.cpp

Lines changed: 231 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -624,9 +624,184 @@ int swift_decompose_double(double d,
624624
}
625625
#endif
626626

627+
#if SWIFT_DTOA_FLOAT16_SUPPORT
628+
// Decompose an IEEE 754 binary16 half-precision float
629+
// into decimal digits and a corresponding decimal exponent.
630+
631+
// See swift_decompose_double for detailed comments on the algorithm here
632+
// This can almost certainly be simplified a great deal. This
633+
// first iteration just copies the code from float.
634+
int swift_decompose_float16(const __fp16 *f,
635+
int8_t *digits, size_t digits_length, int *decimalExponent)
636+
{
637+
static const int significandBitCount = 10;
638+
static const uint32_t significandMask
639+
= ((uint32_t)1 << significandBitCount) - 1;
640+
static const int exponentBitCount = 5;
641+
static const int exponentMask = (1 << exponentBitCount) - 1;
642+
// See comments in swift_decompose_double
643+
static const int64_t exponentBias = (1 << (exponentBitCount - 1)) - 2; // 14
644+
645+
// Step 0: Deconstruct the target number
646+
// Note: this strongly assumes IEEE 754 binary16 format
647+
uint32_t raw = *(const uint16_t *)f;
648+
int exponentBitPattern = (raw >> significandBitCount) & exponentMask;
649+
uint32_t significandBitPattern = raw & significandMask;
650+
651+
// Step 1: Handle the various input cases:
652+
int binaryExponent;
653+
uint32_t significand;
654+
if (digits_length < 5) {
655+
// Ensure we have space for 5 digits
656+
return 0;
657+
} else if (exponentBitPattern == exponentMask) { // NaN or Infinity
658+
// Return no digits
659+
return 0;
660+
} else if (exponentBitPattern == 0) {
661+
if (significandBitPattern == 0) { // Zero
662+
// Return one zero digit and decimalExponent = 0.
663+
digits[0] = 0;
664+
*decimalExponent = 0;
665+
return 1;
666+
} else { // Subnormal
667+
binaryExponent = 1 - exponentBias;
668+
significand = significandBitPattern << (32 - significandBitCount - 1);
669+
}
670+
} else { // normal
671+
binaryExponent = exponentBitPattern - exponentBias;
672+
uint32_t hiddenBit = (uint32_t)1 << (uint32_t)significandBitCount;
673+
uint32_t fullSignificand = significandBitPattern + hiddenBit;
674+
significand = fullSignificand << (32 - significandBitCount - 1);
675+
}
676+
677+
// These numbers will typically get printed as 4- or 5-digit
678+
// integers anyway, so we may as well provide that many digits,
679+
// even though that's technically more digits than necessary.
680+
if (binaryExponent >= 13) {
681+
uint16_t intval = significand >> (32 - binaryExponent);
682+
int8_t *digit_p = digits;
683+
if (intval > 9999) {
684+
*digit_p++ = intval / 10000;
685+
}
686+
digit_p[0] = (intval / 1000) % 10;
687+
digit_p[1] = (intval / 100) % 10;
688+
digit_p[2] = (intval / 10) % 10;
689+
digit_p[3] = intval % 10;
690+
int digit_count = digit_p + 4 - digits;
691+
*decimalExponent = digit_count;
692+
return digit_count;
693+
}
694+
695+
// Step 2: Determine the exact unscaled target interval
696+
static const uint32_t halfUlp = (uint32_t)1 << (32 - significandBitCount - 2);
697+
uint32_t upperMidpointExact = significand + halfUlp;
698+
699+
int isBoundary = significandBitPattern == 0;
700+
static const uint32_t quarterUlp = halfUlp >> 1;
701+
uint32_t lowerMidpointExact
702+
= significand - (isBoundary ? quarterUlp : halfUlp);
703+
704+
// Step 3: Estimate the base 10 exponent
705+
int base10Exponent = decimalExponentFor2ToThe(binaryExponent);
706+
707+
// Step 4: Compute a power-of-10 scale factor
708+
uint64_t powerOfTenRoundedDown = 0;
709+
uint64_t powerOfTenRoundedUp = 0;
710+
int powerOfTenExponent = 0;
711+
intervalContainingPowerOf10_Float(-base10Exponent,
712+
&powerOfTenRoundedDown,
713+
&powerOfTenRoundedUp,
714+
&powerOfTenExponent);
715+
const int extraBits = binaryExponent + powerOfTenExponent;
716+
717+
// Step 5: Scale the interval (with rounding)
718+
static const int integerBits = 5;
719+
const int shift = integerBits - extraBits;
720+
const int roundUpBias = (1 << shift) - 1;
721+
static const int fractionBits = 64 - integerBits;
722+
uint64_t u, l;
723+
if (significandBitPattern & 1) {
724+
// Narrow the interval (odd significand)
725+
uint64_t u1 = multiply64x32RoundingDown(powerOfTenRoundedDown,
726+
upperMidpointExact);
727+
u = u1 >> shift; // Rounding down
728+
729+
uint64_t l1 = multiply64x32RoundingUp(powerOfTenRoundedUp,
730+
lowerMidpointExact);
731+
l = (l1 + roundUpBias) >> shift; // Rounding Up
732+
} else {
733+
// Widen the interval (even significand)
734+
uint64_t u1 = multiply64x32RoundingUp(powerOfTenRoundedUp,
735+
upperMidpointExact);
736+
u = (u1 + roundUpBias) >> shift; // Rounding Up
737+
738+
uint64_t l1 = multiply64x32RoundingDown(powerOfTenRoundedDown,
739+
lowerMidpointExact);
740+
l = l1 >> shift; // Rounding down
741+
}
742+
743+
// Step 6: Align first digit, adjust exponent
744+
// In particular, this prunes leading zeros from subnormals
745+
static const uint64_t fixedPointOne = (uint64_t)1 << fractionBits;
746+
static const uint64_t fixedPointMask = fixedPointOne - 1;
747+
uint64_t t = u;
748+
uint64_t delta = u - l;
749+
int exponent = base10Exponent + 1;
750+
751+
while (t < fixedPointOne) {
752+
exponent -= 1;
753+
delta *= 10;
754+
t *= 10;
755+
}
756+
757+
// Step 7: Generate digits
758+
int8_t *digit_p = digits;
759+
int nextDigit = (int)(t >> fractionBits);
760+
t &= fixedPointMask;
761+
762+
// Generate one digit at a time...
763+
while (t > delta) {
764+
*digit_p++ = nextDigit;
765+
delta *= 10;
766+
t *= 10;
767+
nextDigit = (int)(t >> fractionBits);
768+
t &= fixedPointMask;
769+
}
770+
771+
// Adjust the final digit to be closer to the original value
772+
if (delta > t + fixedPointOne) {
773+
uint64_t skew;
774+
if (isBoundary) {
775+
skew = delta - delta / 3 - t;
776+
} else {
777+
skew = delta / 2 - t;
778+
}
779+
uint64_t one = (uint64_t)(1) << (64 - integerBits);
780+
uint64_t lastAccurateBit = 1ULL << 24;
781+
uint64_t fractionMask = (one - 1) & ~(lastAccurateBit - 1);
782+
uint64_t oneHalf = one >> 1;
783+
if (((skew + (lastAccurateBit >> 1)) & fractionMask) == oneHalf) {
784+
// If the skew is exactly integer + 1/2, round the last
785+
// digit even after adjustment
786+
int adjust = (int)(skew >> (64 - integerBits));
787+
nextDigit = (nextDigit - adjust) & ~1;
788+
} else {
789+
// Else round to nearest...
790+
int adjust = (int)((skew + oneHalf) >> (64 - integerBits));
791+
nextDigit = (nextDigit - adjust);
792+
}
793+
}
794+
*digit_p++ = nextDigit;
795+
796+
*decimalExponent = exponent;
797+
return digit_p - digits;
798+
}
799+
#endif
800+
801+
627802
#if SWIFT_DTOA_FLOAT_SUPPORT
628803
// Return raw bits encoding the float
629-
static uint64_t bitPatternForFloat(float f) {
804+
static uint32_t bitPatternForFloat(float f) {
630805
union { float f; uint32_t u; } converter;
631806
converter.f = f;
632807
return converter.u;
@@ -982,7 +1157,7 @@ int swift_decompose_float80(long double d,
9821157
// These handle various exception cases (infinity, Nan, zero)
9831158
// before invoking the general base-10 conversion.
9841159

985-
#if SWIFT_DTOA_FLOAT_SUPPORT || SWIFT_DTOA_DOUBLE_SUPPORT || SWIFT_DTOA_FLOAT80_SUPPORT
1160+
#if SWIFT_DTOA_FLOAT16_SUPPORT || SWIFT_DTOA_FLOAT_SUPPORT || SWIFT_DTOA_DOUBLE_SUPPORT || SWIFT_DTOA_FLOAT80_SUPPORT
9861161
static size_t swift_format_constant(char *dest, size_t length, const char *s) {
9871162
const size_t l = strlen(s);
9881163
if (length <= l) {
@@ -993,6 +1168,60 @@ static size_t swift_format_constant(char *dest, size_t length, const char *s) {
9931168
}
9941169
#endif
9951170

1171+
#if SWIFT_DTOA_FLOAT16_SUPPORT
1172+
size_t swift_format_float16(const __fp16 *d, char *dest, size_t length)
1173+
{
1174+
uint16_t raw = *(const uint16_t *)d;
1175+
if ((raw & 0x7c00) == 0x7c00) { // Infinite or NaN
1176+
if (raw == 0x7c00) {
1177+
return swift_format_constant(dest, length, "inf");
1178+
} else if (raw == 0xfc00) {
1179+
return swift_format_constant(dest, length, "-inf");
1180+
} else {
1181+
// NaN
1182+
static const int significandBitCount = 10;
1183+
const char *sign = (raw & 0x8000) ? "-" : "";
1184+
// XXX FIXME: This is different from Float/Double because
1185+
// Swift's Float16 NaN encoding gets altered before it gets
1186+
// here, probably because it gets converted to Float and back.
1187+
const char *signaling = ((raw >> (significandBitCount - 2)) & 1) ? "s" : "";
1188+
uint32_t payload = raw & ((1L << (significandBitCount - 2)) - 1);
1189+
char buff[32];
1190+
if (payload != 0) {
1191+
snprintf(buff, sizeof(buff), "%s%snan(0x%x)",
1192+
sign, signaling, payload);
1193+
} else {
1194+
snprintf(buff, sizeof(buff), "%s%snan",
1195+
sign, signaling);
1196+
}
1197+
return swift_format_constant(dest, length, buff);
1198+
}
1199+
}
1200+
1201+
// zero
1202+
if (raw == 0x8000) {
1203+
return swift_format_constant(dest, length, "-0.0");
1204+
}
1205+
if (raw == 0x0000) {
1206+
return swift_format_constant(dest, length, "0.0");
1207+
}
1208+
1209+
// Decimal numeric formatting
1210+
int decimalExponent;
1211+
int8_t digits[9];
1212+
bool negative = raw & 0x8000;
1213+
int digitCount =
1214+
swift_decompose_float16(d, digits, sizeof(digits), &decimalExponent);
1215+
if (decimalExponent < -3) {
1216+
return swift_format_exponential(dest, length, negative,
1217+
digits, digitCount, decimalExponent);
1218+
} else {
1219+
return swift_format_decimal(dest, length, negative,
1220+
digits, digitCount, decimalExponent);
1221+
}
1222+
}
1223+
#endif
1224+
9961225
#if SWIFT_DTOA_FLOAT_SUPPORT
9971226
size_t swift_format_float(float d, char *dest, size_t length)
9981227
{

stdlib/public/stubs/Stubs.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,8 @@ static uint64_t swift_floatingPointToString(char *Buffer, size_t BufferLength,
269269
SWIFT_CC(swift) SWIFT_RUNTIME_STDLIB_API
270270
__swift_ssize_t swift_float16ToString(char *Buffer, size_t BufferLength,
271271
float Value, bool Debug) {
272-
return swift_format_float(Value, Buffer, BufferLength);
272+
__fp16 v = Value;
273+
return swift_format_float16(&v, Buffer, BufferLength);
273274
}
274275

275276
SWIFT_CC(swift) SWIFT_RUNTIME_STDLIB_API
@@ -546,4 +547,3 @@ int swift::_swift_stdlib_putc_stderr(int C) {
546547
size_t swift::_swift_stdlib_getHardwareConcurrency() {
547548
return std::thread::hardware_concurrency();
548549
}
549-

0 commit comments

Comments
 (0)