@@ -624,9 +624,184 @@ int swift_decompose_double(double d,
624
624
}
625
625
#endif
626
626
627
+ #if SWIFT_DTOA_FLOAT16_SUPPORT
628
+ // Decompose an IEEE 754 binary16 half-precision float
629
+ // into decimal digits and a corresponding decimal exponent.
630
+
631
+ // See swift_decompose_double for detailed comments on the algorithm here
632
+ // This can almost certainly be simplified a great deal. This
633
+ // first iteration just copies the code from float.
634
+ int swift_decompose_float16 (const __fp16 * f ,
635
+ int8_t * digits , size_t digits_length , int * decimalExponent )
636
+ {
637
+ static const int significandBitCount = 10 ;
638
+ static const uint32_t significandMask
639
+ = ((uint32_t )1 << significandBitCount ) - 1 ;
640
+ static const int exponentBitCount = 5 ;
641
+ static const int exponentMask = (1 << exponentBitCount ) - 1 ;
642
+ // See comments in swift_decompose_double
643
+ static const int64_t exponentBias = (1 << (exponentBitCount - 1 )) - 2 ; // 14
644
+
645
+ // Step 0: Deconstruct the target number
646
+ // Note: this strongly assumes IEEE 754 binary16 format
647
+ uint32_t raw = * (const uint16_t * )f ;
648
+ int exponentBitPattern = (raw >> significandBitCount ) & exponentMask ;
649
+ uint32_t significandBitPattern = raw & significandMask ;
650
+
651
+ // Step 1: Handle the various input cases:
652
+ int binaryExponent ;
653
+ uint32_t significand ;
654
+ if (digits_length < 5 ) {
655
+ // Ensure we have space for 5 digits
656
+ return 0 ;
657
+ } else if (exponentBitPattern == exponentMask ) { // NaN or Infinity
658
+ // Return no digits
659
+ return 0 ;
660
+ } else if (exponentBitPattern == 0 ) {
661
+ if (significandBitPattern == 0 ) { // Zero
662
+ // Return one zero digit and decimalExponent = 0.
663
+ digits [0 ] = 0 ;
664
+ * decimalExponent = 0 ;
665
+ return 1 ;
666
+ } else { // Subnormal
667
+ binaryExponent = 1 - exponentBias ;
668
+ significand = significandBitPattern << (32 - significandBitCount - 1 );
669
+ }
670
+ } else { // normal
671
+ binaryExponent = exponentBitPattern - exponentBias ;
672
+ uint32_t hiddenBit = (uint32_t )1 << (uint32_t )significandBitCount ;
673
+ uint32_t fullSignificand = significandBitPattern + hiddenBit ;
674
+ significand = fullSignificand << (32 - significandBitCount - 1 );
675
+ }
676
+
677
+ // These numbers will typically get printed as 4- or 5-digit
678
+ // integers anyway, so we may as well provide that many digits,
679
+ // even though that's technically more digits than necessary.
680
+ if (binaryExponent >= 13 ) {
681
+ uint16_t intval = significand >> (32 - binaryExponent );
682
+ int8_t * digit_p = digits ;
683
+ if (intval > 9999 ) {
684
+ * digit_p ++ = intval / 10000 ;
685
+ }
686
+ digit_p [0 ] = (intval / 1000 ) % 10 ;
687
+ digit_p [1 ] = (intval / 100 ) % 10 ;
688
+ digit_p [2 ] = (intval / 10 ) % 10 ;
689
+ digit_p [3 ] = intval % 10 ;
690
+ int digit_count = digit_p + 4 - digits ;
691
+ * decimalExponent = digit_count ;
692
+ return digit_count ;
693
+ }
694
+
695
+ // Step 2: Determine the exact unscaled target interval
696
+ static const uint32_t halfUlp = (uint32_t )1 << (32 - significandBitCount - 2 );
697
+ uint32_t upperMidpointExact = significand + halfUlp ;
698
+
699
+ int isBoundary = significandBitPattern == 0 ;
700
+ static const uint32_t quarterUlp = halfUlp >> 1 ;
701
+ uint32_t lowerMidpointExact
702
+ = significand - (isBoundary ? quarterUlp : halfUlp );
703
+
704
+ // Step 3: Estimate the base 10 exponent
705
+ int base10Exponent = decimalExponentFor2ToThe (binaryExponent );
706
+
707
+ // Step 4: Compute a power-of-10 scale factor
708
+ uint64_t powerOfTenRoundedDown = 0 ;
709
+ uint64_t powerOfTenRoundedUp = 0 ;
710
+ int powerOfTenExponent = 0 ;
711
+ intervalContainingPowerOf10_Float (- base10Exponent ,
712
+ & powerOfTenRoundedDown ,
713
+ & powerOfTenRoundedUp ,
714
+ & powerOfTenExponent );
715
+ const int extraBits = binaryExponent + powerOfTenExponent ;
716
+
717
+ // Step 5: Scale the interval (with rounding)
718
+ static const int integerBits = 5 ;
719
+ const int shift = integerBits - extraBits ;
720
+ const int roundUpBias = (1 << shift ) - 1 ;
721
+ static const int fractionBits = 64 - integerBits ;
722
+ uint64_t u , l ;
723
+ if (significandBitPattern & 1 ) {
724
+ // Narrow the interval (odd significand)
725
+ uint64_t u1 = multiply64x32RoundingDown (powerOfTenRoundedDown ,
726
+ upperMidpointExact );
727
+ u = u1 >> shift ; // Rounding down
728
+
729
+ uint64_t l1 = multiply64x32RoundingUp (powerOfTenRoundedUp ,
730
+ lowerMidpointExact );
731
+ l = (l1 + roundUpBias ) >> shift ; // Rounding Up
732
+ } else {
733
+ // Widen the interval (even significand)
734
+ uint64_t u1 = multiply64x32RoundingUp (powerOfTenRoundedUp ,
735
+ upperMidpointExact );
736
+ u = (u1 + roundUpBias ) >> shift ; // Rounding Up
737
+
738
+ uint64_t l1 = multiply64x32RoundingDown (powerOfTenRoundedDown ,
739
+ lowerMidpointExact );
740
+ l = l1 >> shift ; // Rounding down
741
+ }
742
+
743
+ // Step 6: Align first digit, adjust exponent
744
+ // In particular, this prunes leading zeros from subnormals
745
+ static const uint64_t fixedPointOne = (uint64_t )1 << fractionBits ;
746
+ static const uint64_t fixedPointMask = fixedPointOne - 1 ;
747
+ uint64_t t = u ;
748
+ uint64_t delta = u - l ;
749
+ int exponent = base10Exponent + 1 ;
750
+
751
+ while (t < fixedPointOne ) {
752
+ exponent -= 1 ;
753
+ delta *= 10 ;
754
+ t *= 10 ;
755
+ }
756
+
757
+ // Step 7: Generate digits
758
+ int8_t * digit_p = digits ;
759
+ int nextDigit = (int )(t >> fractionBits );
760
+ t &= fixedPointMask ;
761
+
762
+ // Generate one digit at a time...
763
+ while (t > delta ) {
764
+ * digit_p ++ = nextDigit ;
765
+ delta *= 10 ;
766
+ t *= 10 ;
767
+ nextDigit = (int )(t >> fractionBits );
768
+ t &= fixedPointMask ;
769
+ }
770
+
771
+ // Adjust the final digit to be closer to the original value
772
+ if (delta > t + fixedPointOne ) {
773
+ uint64_t skew ;
774
+ if (isBoundary ) {
775
+ skew = delta - delta / 3 - t ;
776
+ } else {
777
+ skew = delta / 2 - t ;
778
+ }
779
+ uint64_t one = (uint64_t )(1 ) << (64 - integerBits );
780
+ uint64_t lastAccurateBit = 1ULL << 24 ;
781
+ uint64_t fractionMask = (one - 1 ) & ~(lastAccurateBit - 1 );
782
+ uint64_t oneHalf = one >> 1 ;
783
+ if (((skew + (lastAccurateBit >> 1 )) & fractionMask ) == oneHalf ) {
784
+ // If the skew is exactly integer + 1/2, round the last
785
+ // digit even after adjustment
786
+ int adjust = (int )(skew >> (64 - integerBits ));
787
+ nextDigit = (nextDigit - adjust ) & ~1 ;
788
+ } else {
789
+ // Else round to nearest...
790
+ int adjust = (int )((skew + oneHalf ) >> (64 - integerBits ));
791
+ nextDigit = (nextDigit - adjust );
792
+ }
793
+ }
794
+ * digit_p ++ = nextDigit ;
795
+
796
+ * decimalExponent = exponent ;
797
+ return digit_p - digits ;
798
+ }
799
+ #endif
800
+
801
+
627
802
#if SWIFT_DTOA_FLOAT_SUPPORT
628
803
// Return raw bits encoding the float
629
- static uint64_t bitPatternForFloat (float f ) {
804
+ static uint32_t bitPatternForFloat (float f ) {
630
805
union { float f ; uint32_t u ; } converter ;
631
806
converter .f = f ;
632
807
return converter .u ;
@@ -982,7 +1157,7 @@ int swift_decompose_float80(long double d,
982
1157
// These handle various exception cases (infinity, Nan, zero)
983
1158
// before invoking the general base-10 conversion.
984
1159
985
- #if SWIFT_DTOA_FLOAT_SUPPORT || SWIFT_DTOA_DOUBLE_SUPPORT || SWIFT_DTOA_FLOAT80_SUPPORT
1160
+ #if SWIFT_DTOA_FLOAT16_SUPPORT || SWIFT_DTOA_FLOAT_SUPPORT || SWIFT_DTOA_DOUBLE_SUPPORT || SWIFT_DTOA_FLOAT80_SUPPORT
986
1161
static size_t swift_format_constant (char * dest , size_t length , const char * s ) {
987
1162
const size_t l = strlen (s );
988
1163
if (length <= l ) {
@@ -993,6 +1168,60 @@ static size_t swift_format_constant(char *dest, size_t length, const char *s) {
993
1168
}
994
1169
#endif
995
1170
1171
+ #if SWIFT_DTOA_FLOAT16_SUPPORT
1172
+ size_t swift_format_float16 (const __fp16 * d , char * dest , size_t length )
1173
+ {
1174
+ uint16_t raw = * (const uint16_t * )d ;
1175
+ if ((raw & 0x7c00 ) == 0x7c00 ) { // Infinite or NaN
1176
+ if (raw == 0x7c00 ) {
1177
+ return swift_format_constant (dest , length , "inf" );
1178
+ } else if (raw == 0xfc00 ) {
1179
+ return swift_format_constant (dest , length , "-inf" );
1180
+ } else {
1181
+ // NaN
1182
+ static const int significandBitCount = 10 ;
1183
+ const char * sign = (raw & 0x8000 ) ? "-" : "" ;
1184
+ // XXX FIXME: This is different from Float/Double because
1185
+ // Swift's Float16 NaN encoding gets altered before it gets
1186
+ // here, probably because it gets converted to Float and back.
1187
+ const char * signaling = ((raw >> (significandBitCount - 2 )) & 1 ) ? "s" : "" ;
1188
+ uint32_t payload = raw & ((1L << (significandBitCount - 2 )) - 1 );
1189
+ char buff [32 ];
1190
+ if (payload != 0 ) {
1191
+ snprintf (buff , sizeof (buff ), "%s%snan(0x%x)" ,
1192
+ sign , signaling , payload );
1193
+ } else {
1194
+ snprintf (buff , sizeof (buff ), "%s%snan" ,
1195
+ sign , signaling );
1196
+ }
1197
+ return swift_format_constant (dest , length , buff );
1198
+ }
1199
+ }
1200
+
1201
+ // zero
1202
+ if (raw == 0x8000 ) {
1203
+ return swift_format_constant (dest , length , "-0.0" );
1204
+ }
1205
+ if (raw == 0x0000 ) {
1206
+ return swift_format_constant (dest , length , "0.0" );
1207
+ }
1208
+
1209
+ // Decimal numeric formatting
1210
+ int decimalExponent ;
1211
+ int8_t digits [9 ];
1212
+ bool negative = raw & 0x8000 ;
1213
+ int digitCount =
1214
+ swift_decompose_float16 (d , digits , sizeof (digits ), & decimalExponent );
1215
+ if (decimalExponent < -3 ) {
1216
+ return swift_format_exponential (dest , length , negative ,
1217
+ digits , digitCount , decimalExponent );
1218
+ } else {
1219
+ return swift_format_decimal (dest , length , negative ,
1220
+ digits , digitCount , decimalExponent );
1221
+ }
1222
+ }
1223
+ #endif
1224
+
996
1225
#if SWIFT_DTOA_FLOAT_SUPPORT
997
1226
size_t swift_format_float (float d , char * dest , size_t length )
998
1227
{
0 commit comments