@@ -624,9 +624,183 @@ int swift_decompose_double(double d,
624
624
}
625
625
#endif
626
626
627
+ #if SWIFT_DTOA_FLOAT16_SUPPORT
628
+ // Decompose an IEEE 754 binary16 half-precision float
629
+ // into decimal digits and a corresponding decimal exponent.
630
+
631
+ // See swift_decompose_double for detailed comments on the algorithm here
632
+ // This can almost certainly be simplified a great deal. This
633
+ // first iteration just copies the code from float.
634
+ int swift_decompose_float16 (const __fp16 * f ,
635
+ int8_t * digits , size_t digits_length , int * decimalExponent )
636
+ {
637
+ static const int significandBitCount = 10 ;
638
+ static const uint32_t significandMask
639
+ = ((uint32_t )1 << significandBitCount ) - 1 ;
640
+ static const int exponentBitCount = 5 ;
641
+ static const int exponentMask = (1 << exponentBitCount ) - 1 ;
642
+ // See comments in swift_decompose_double
643
+ static const int64_t exponentBias = (1 << (exponentBitCount - 1 )) - 2 ; // 14
644
+
645
+ // Step 0: Deconstruct IEEE 754 binary16 format
646
+ uint32_t raw = * (const uint16_t * )f ;
647
+ int exponentBitPattern = (raw >> significandBitCount ) & exponentMask ;
648
+ uint32_t significandBitPattern = raw & significandMask ;
649
+
650
+ // Step 1: Handle the various input cases:
651
+ int binaryExponent ;
652
+ uint32_t significand ;
653
+ if (digits_length < 5 ) {
654
+ // Ensure we have space for 5 digits
655
+ return 0 ;
656
+ } else if (exponentBitPattern == exponentMask ) { // NaN or Infinity
657
+ // Return no digits
658
+ return 0 ;
659
+ } else if (exponentBitPattern == 0 ) {
660
+ if (significandBitPattern == 0 ) { // Zero
661
+ // Return one zero digit and decimalExponent = 0.
662
+ digits [0 ] = 0 ;
663
+ * decimalExponent = 0 ;
664
+ return 1 ;
665
+ } else { // Subnormal
666
+ binaryExponent = 1 - exponentBias ;
667
+ significand = significandBitPattern << (32 - significandBitCount - 1 );
668
+ }
669
+ } else { // normal
670
+ binaryExponent = exponentBitPattern - exponentBias ;
671
+ uint32_t hiddenBit = (uint32_t )1 << (uint32_t )significandBitCount ;
672
+ uint32_t fullSignificand = significandBitPattern + hiddenBit ;
673
+ significand = fullSignificand << (32 - significandBitCount - 1 );
674
+ }
675
+
676
+ // These numbers will typically get printed as 4- or 5-digit
677
+ // integers anyway, so we may as well provide that many digits,
678
+ // even though that's technically more digits than necessary.
679
+ if (binaryExponent >= 13 ) {
680
+ uint16_t intval = significand >> (32 - binaryExponent );
681
+ int8_t * digit_p = digits ;
682
+ if (intval > 9999 ) {
683
+ * digit_p ++ = intval / 10000 ;
684
+ }
685
+ digit_p [0 ] = (intval / 1000 ) % 10 ;
686
+ digit_p [1 ] = (intval / 100 ) % 10 ;
687
+ digit_p [2 ] = (intval / 10 ) % 10 ;
688
+ digit_p [3 ] = intval % 10 ;
689
+ int digit_count = digit_p + 4 - digits ;
690
+ * decimalExponent = digit_count ;
691
+ return digit_count ;
692
+ }
693
+
694
+ // Step 2: Determine the exact unscaled target interval
695
+ static const uint32_t halfUlp = (uint32_t )1 << (32 - significandBitCount - 2 );
696
+ uint32_t upperMidpointExact = significand + halfUlp ;
697
+
698
+ int isBoundary = significandBitPattern == 0 ;
699
+ static const uint32_t quarterUlp = halfUlp >> 1 ;
700
+ uint32_t lowerMidpointExact
701
+ = significand - (isBoundary ? quarterUlp : halfUlp );
702
+
703
+ // Step 3: Estimate the base 10 exponent
704
+ int base10Exponent = decimalExponentFor2ToThe (binaryExponent );
705
+
706
+ // Step 4: Compute a power-of-10 scale factor
707
+ uint64_t powerOfTenRoundedDown = 0 ;
708
+ uint64_t powerOfTenRoundedUp = 0 ;
709
+ int powerOfTenExponent = 0 ;
710
+ intervalContainingPowerOf10_Float (- base10Exponent ,
711
+ & powerOfTenRoundedDown ,
712
+ & powerOfTenRoundedUp ,
713
+ & powerOfTenExponent );
714
+ const int extraBits = binaryExponent + powerOfTenExponent ;
715
+
716
+ // Step 5: Scale the interval (with rounding)
717
+ static const int integerBits = 5 ;
718
+ const int shift = integerBits - extraBits ;
719
+ const int roundUpBias = (1 << shift ) - 1 ;
720
+ static const int fractionBits = 64 - integerBits ;
721
+ uint64_t u , l ;
722
+ if (significandBitPattern & 1 ) {
723
+ // Narrow the interval (odd significand)
724
+ uint64_t u1 = multiply64x32RoundingDown (powerOfTenRoundedDown ,
725
+ upperMidpointExact );
726
+ u = u1 >> shift ; // Rounding down
727
+
728
+ uint64_t l1 = multiply64x32RoundingUp (powerOfTenRoundedUp ,
729
+ lowerMidpointExact );
730
+ l = (l1 + roundUpBias ) >> shift ; // Rounding Up
731
+ } else {
732
+ // Widen the interval (even significand)
733
+ uint64_t u1 = multiply64x32RoundingUp (powerOfTenRoundedUp ,
734
+ upperMidpointExact );
735
+ u = (u1 + roundUpBias ) >> shift ; // Rounding Up
736
+
737
+ uint64_t l1 = multiply64x32RoundingDown (powerOfTenRoundedDown ,
738
+ lowerMidpointExact );
739
+ l = l1 >> shift ; // Rounding down
740
+ }
741
+
742
+ // Step 6: Align first digit, adjust exponent
743
+ // In particular, this prunes leading zeros from subnormals
744
+ static const uint64_t fixedPointOne = (uint64_t )1 << fractionBits ;
745
+ static const uint64_t fixedPointMask = fixedPointOne - 1 ;
746
+ uint64_t t = u ;
747
+ uint64_t delta = u - l ;
748
+ int exponent = base10Exponent + 1 ;
749
+
750
+ while (t < fixedPointOne ) {
751
+ exponent -= 1 ;
752
+ delta *= 10 ;
753
+ t *= 10 ;
754
+ }
755
+
756
+ // Step 7: Generate digits
757
+ int8_t * digit_p = digits ;
758
+ int nextDigit = (int )(t >> fractionBits );
759
+ t &= fixedPointMask ;
760
+
761
+ // Generate one digit at a time...
762
+ while (t > delta ) {
763
+ * digit_p ++ = nextDigit ;
764
+ delta *= 10 ;
765
+ t *= 10 ;
766
+ nextDigit = (int )(t >> fractionBits );
767
+ t &= fixedPointMask ;
768
+ }
769
+
770
+ // Adjust the final digit to be closer to the original value
771
+ if (delta > t + fixedPointOne ) {
772
+ uint64_t skew ;
773
+ if (isBoundary ) {
774
+ skew = delta - delta / 3 - t ;
775
+ } else {
776
+ skew = delta / 2 - t ;
777
+ }
778
+ uint64_t one = (uint64_t )(1 ) << (64 - integerBits );
779
+ uint64_t lastAccurateBit = 1ULL << 24 ;
780
+ uint64_t fractionMask = (one - 1 ) & ~(lastAccurateBit - 1 );
781
+ uint64_t oneHalf = one >> 1 ;
782
+ if (((skew + (lastAccurateBit >> 1 )) & fractionMask ) == oneHalf ) {
783
+ // If the skew is exactly integer + 1/2, round the last
784
+ // digit even after adjustment
785
+ int adjust = (int )(skew >> (64 - integerBits ));
786
+ nextDigit = (nextDigit - adjust ) & ~1 ;
787
+ } else {
788
+ // Else round to nearest...
789
+ int adjust = (int )((skew + oneHalf ) >> (64 - integerBits ));
790
+ nextDigit = (nextDigit - adjust );
791
+ }
792
+ }
793
+ * digit_p ++ = nextDigit ;
794
+
795
+ * decimalExponent = exponent ;
796
+ return digit_p - digits ;
797
+ }
798
+ #endif
799
+
800
+
627
801
#if SWIFT_DTOA_FLOAT_SUPPORT
628
802
// Return raw bits encoding the float
629
- static uint64_t bitPatternForFloat (float f ) {
803
+ static uint32_t bitPatternForFloat (float f ) {
630
804
union { float f ; uint32_t u ; } converter ;
631
805
converter .f = f ;
632
806
return converter .u ;
@@ -982,7 +1156,7 @@ int swift_decompose_float80(long double d,
982
1156
// These handle various exception cases (infinity, Nan, zero)
983
1157
// before invoking the general base-10 conversion.
984
1158
985
- #if SWIFT_DTOA_FLOAT_SUPPORT || SWIFT_DTOA_DOUBLE_SUPPORT || SWIFT_DTOA_FLOAT80_SUPPORT
1159
+ #if SWIFT_DTOA_FLOAT16_SUPPORT || SWIFT_DTOA_FLOAT_SUPPORT || SWIFT_DTOA_DOUBLE_SUPPORT || SWIFT_DTOA_FLOAT80_SUPPORT
986
1160
static size_t swift_format_constant (char * dest , size_t length , const char * s ) {
987
1161
const size_t l = strlen (s );
988
1162
if (length <= l ) {
@@ -993,6 +1167,57 @@ static size_t swift_format_constant(char *dest, size_t length, const char *s) {
993
1167
}
994
1168
#endif
995
1169
1170
+ #if SWIFT_DTOA_FLOAT16_SUPPORT
1171
+ size_t swift_format_float16 (const __fp16 * d , char * dest , size_t length )
1172
+ {
1173
+ uint16_t raw = * (const uint16_t * )d ;
1174
+ if ((raw & 0x7c00 ) == 0x7c00 ) { // Infinite or NaN
1175
+ if (raw == 0x7c00 ) {
1176
+ return swift_format_constant (dest , length , "inf" );
1177
+ } else if (raw == 0xfc00 ) {
1178
+ return swift_format_constant (dest , length , "-inf" );
1179
+ } else {
1180
+ // NaN
1181
+ static const int significandBitCount = 10 ;
1182
+ const char * sign = (raw & 0x8000 ) ? "-" : "" ;
1183
+ const char * signaling = ((raw >> (significandBitCount - 1 )) & 1 ) ? "" : "s" ;
1184
+ uint32_t payload = raw & ((1L << (significandBitCount - 2 )) - 1 );
1185
+ char buff [32 ];
1186
+ if (payload != 0 ) {
1187
+ snprintf (buff , sizeof (buff ), "%s%snan(0x%x)" ,
1188
+ sign , signaling , payload );
1189
+ } else {
1190
+ snprintf (buff , sizeof (buff ), "%s%snan" ,
1191
+ sign , signaling );
1192
+ }
1193
+ return swift_format_constant (dest , length , buff );
1194
+ }
1195
+ }
1196
+
1197
+ // zero
1198
+ if (raw == 0x8000 ) {
1199
+ return swift_format_constant (dest , length , "-0.0" );
1200
+ }
1201
+ if (raw == 0x0000 ) {
1202
+ return swift_format_constant (dest , length , "0.0" );
1203
+ }
1204
+
1205
+ // Decimal numeric formatting
1206
+ int decimalExponent ;
1207
+ int8_t digits [9 ];
1208
+ bool negative = raw & 0x8000 ;
1209
+ int digitCount =
1210
+ swift_decompose_float16 (d , digits , sizeof (digits ), & decimalExponent );
1211
+ if (decimalExponent < -3 ) {
1212
+ return swift_format_exponential (dest , length , negative ,
1213
+ digits , digitCount , decimalExponent );
1214
+ } else {
1215
+ return swift_format_decimal (dest , length , negative ,
1216
+ digits , digitCount , decimalExponent );
1217
+ }
1218
+ }
1219
+ #endif
1220
+
996
1221
#if SWIFT_DTOA_FLOAT_SUPPORT
997
1222
size_t swift_format_float (float d , char * dest , size_t length )
998
1223
{
0 commit comments