@@ -145,6 +145,9 @@ static constexpr fltSemantics semFloat8E4M3B11FNUZ = {
145
145
4 , -10 , 4 , 8 , fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
146
146
static constexpr fltSemantics semFloat8E3M4 = {3 , -2 , 5 , 8 };
147
147
static constexpr fltSemantics semFloatTF32 = {127 , -126 , 11 , 19 };
148
+ static constexpr fltSemantics semFloat8E8M0FN = {
149
+ 127 , -127 , 1 , 8 , fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes};
150
+
148
151
static constexpr fltSemantics semFloat6E3M2FN = {
149
152
4 , -2 , 3 , 6 , fltNonfiniteBehavior::FiniteOnly};
150
153
static constexpr fltSemantics semFloat6E2M3FN = {
@@ -222,6 +225,8 @@ const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
222
225
return Float8E3M4 ();
223
226
case S_FloatTF32:
224
227
return FloatTF32 ();
228
+ case S_Float8E8M0FN:
229
+ return Float8E8M0FN ();
225
230
case S_Float6E3M2FN:
226
231
return Float6E3M2FN ();
227
232
case S_Float6E2M3FN:
@@ -264,6 +269,8 @@ APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
264
269
return S_Float8E3M4;
265
270
else if (&Sem == &llvm::APFloat::FloatTF32 ())
266
271
return S_FloatTF32;
272
+ else if (&Sem == &llvm::APFloat::Float8E8M0FN ())
273
+ return S_Float8E8M0FN;
267
274
else if (&Sem == &llvm::APFloat::Float6E3M2FN ())
268
275
return S_Float6E3M2FN;
269
276
else if (&Sem == &llvm::APFloat::Float6E2M3FN ())
@@ -294,6 +301,7 @@ const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() {
294
301
}
295
302
const fltSemantics &APFloatBase::Float8E3M4 () { return semFloat8E3M4; }
296
303
const fltSemantics &APFloatBase::FloatTF32 () { return semFloatTF32; }
304
+ const fltSemantics &APFloatBase::Float8E8M0FN () { return semFloat8E8M0FN; }
297
305
const fltSemantics &APFloatBase::Float6E3M2FN () { return semFloat6E3M2FN; }
298
306
const fltSemantics &APFloatBase::Float6E2M3FN () { return semFloat6E2M3FN; }
299
307
const fltSemantics &APFloatBase::Float4E2M1FN () { return semFloat4E2M1FN; }
@@ -396,6 +404,8 @@ static inline Error createError(const Twine &Err) {
396
404
}
397
405
398
406
static constexpr inline unsigned int partCountForBits (unsigned int bits) {
407
+ if (bits == 0 )
408
+ return 1 ;
399
409
return ((bits) + APFloatBase::integerPartWidth - 1 ) / APFloatBase::integerPartWidth;
400
410
}
401
411
@@ -955,6 +965,12 @@ void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
955
965
significand[part] = 0 ;
956
966
}
957
967
968
+ // For the E8M0 types, precision is just 1 and the
969
+ // the NaNBit handling below is not relevant.
970
+ // So, exit early.
971
+ if (semantics == &semFloat8E8M0FN)
972
+ return ;
973
+
958
974
unsigned QNaNBit = semantics->precision - 2 ;
959
975
960
976
if (SNaN) {
@@ -1007,6 +1023,10 @@ IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
1007
1023
}
1008
1024
1009
1025
bool IEEEFloat::isDenormal () const {
1026
+ // No denormals in Float8E8M0FN
1027
+ if (semantics == &semFloat8E8M0FN)
1028
+ return false ;
1029
+
1010
1030
return isFiniteNonZero () && (exponent == semantics->minExponent ) &&
1011
1031
(APInt::tcExtractBit (significandParts (),
1012
1032
semantics->precision - 1 ) == 0 );
@@ -1028,6 +1048,10 @@ bool IEEEFloat::isSmallestNormalized() const {
1028
1048
bool IEEEFloat::isSignificandAllOnes () const {
1029
1049
// Test if the significand excluding the integral bit is all ones. This allows
1030
1050
// us to test for binade boundaries.
1051
+ // For the E8M0 format, this is always false since there are no
1052
+ // actual significand bits.
1053
+ if (semantics == &semFloat8E8M0FN)
1054
+ return false ;
1031
1055
const integerPart *Parts = significandParts ();
1032
1056
const unsigned PartCount = partCountForBits (semantics->precision );
1033
1057
for (unsigned i = 0 ; i < PartCount - 1 ; i++)
@@ -1075,6 +1099,11 @@ bool IEEEFloat::isSignificandAllOnesExceptLSB() const {
1075
1099
}
1076
1100
1077
1101
bool IEEEFloat::isSignificandAllZeros () const {
1102
+ // For the E8M0 format, this is always true since there are no
1103
+ // actual significand bits.
1104
+ if (semantics == &semFloat8E8M0FN)
1105
+ return true ;
1106
+
1078
1107
// Test if the significand excluding the integral bit is all zeros. This
1079
1108
// allows us to test for binade boundaries.
1080
1109
const integerPart *Parts = significandParts ();
@@ -1113,6 +1142,8 @@ bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
1113
1142
}
1114
1143
1115
1144
bool IEEEFloat::isLargest () const {
1145
+ if (semantics == &semFloat8E8M0FN)
1146
+ return isFiniteNonZero () && exponent == semantics->maxExponent ;
1116
1147
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1117
1148
semantics->nanEncoding == fltNanEncoding::AllOnes) {
1118
1149
// The largest number by magnitude in our format will be the floating point
@@ -1165,6 +1196,12 @@ IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) {
1165
1196
1166
1197
IEEEFloat::IEEEFloat (const fltSemantics &ourSemantics) {
1167
1198
initialize (&ourSemantics);
1199
+ // The E8M0 type cannot represent the value zero.
1200
+ // So, initialize with the closest representation instead.
1201
+ if (semantics == &semFloat8E8M0FN) {
1202
+ makeSmallestNormalized (false );
1203
+ return ;
1204
+ }
1168
1205
makeZero (false );
1169
1206
}
1170
1207
@@ -1727,6 +1764,11 @@ IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1727
1764
/* Canonicalize zeroes. */
1728
1765
if (omsb == 0 ) {
1729
1766
category = fcZero;
1767
+ // The E8M0 type cannot represent the value zero and
1768
+ // thus the category cannot be fcZero. So, get the
1769
+ // closest representation to fcZero instead.
1770
+ if (semantics == &semFloat8E8M0FN)
1771
+ makeSmallestNormalized (false );
1730
1772
if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1731
1773
sign = false ;
1732
1774
}
@@ -2606,6 +2648,11 @@ IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
2606
2648
fs = opOK;
2607
2649
}
2608
2650
2651
+ // The E8M0 type cannot represent the value zero and
2652
+ // thus the category cannot be fcZero. So, get the
2653
+ // closest representation to fcZero instead.
2654
+ if (category == fcZero && semantics == &semFloat8E8M0FN)
2655
+ makeSmallestNormalized (false );
2609
2656
return fs;
2610
2657
}
2611
2658
@@ -3070,6 +3117,11 @@ IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
3070
3117
fs = opOK;
3071
3118
if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
3072
3119
sign = false ;
3120
+ // The E8M0 type cannot represent the value zero and
3121
+ // thus the category cannot be fcZero. So, get the
3122
+ // closest representation to fcZero instead.
3123
+ if (semantics == &semFloat8E8M0FN)
3124
+ makeSmallestNormalized (false );
3073
3125
3074
3126
/* Check whether the normalized exponent is high enough to overflow
3075
3127
max during the log-rebasing in the max-exponent check below. */
@@ -3533,15 +3585,16 @@ APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
3533
3585
template <const fltSemantics &S>
3534
3586
APInt IEEEFloat::convertIEEEFloatToAPInt () const {
3535
3587
assert (semantics == &S);
3536
-
3537
- constexpr int bias = -(S.minExponent - 1 );
3588
+ const int bias =
3589
+ (semantics == &semFloat8E8M0FN) ? -S. minExponent : -(S.minExponent - 1 );
3538
3590
constexpr unsigned int trailing_significand_bits = S.precision - 1 ;
3539
3591
constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth;
3540
3592
constexpr integerPart integer_bit =
3541
3593
integerPart{1 } << (trailing_significand_bits % integerPartWidth);
3542
3594
constexpr uint64_t significand_mask = integer_bit - 1 ;
3543
3595
constexpr unsigned int exponent_bits =
3544
- S.sizeInBits - 1 - trailing_significand_bits;
3596
+ trailing_significand_bits ? (S.sizeInBits - 1 - trailing_significand_bits)
3597
+ : S.sizeInBits ;
3545
3598
static_assert (exponent_bits < 64 );
3546
3599
constexpr uint64_t exponent_mask = (uint64_t {1 } << exponent_bits) - 1 ;
3547
3600
@@ -3557,6 +3610,8 @@ APInt IEEEFloat::convertIEEEFloatToAPInt() const {
3557
3610
!(significandParts ()[integer_bit_part] & integer_bit))
3558
3611
myexponent = 0 ; // denormal
3559
3612
} else if (category == fcZero) {
3613
+ if (semantics == &semFloat8E8M0FN)
3614
+ llvm_unreachable (" semantics does not support zero!" );
3560
3615
myexponent = ::exponentZero (S) + bias;
3561
3616
mysignificand.fill (0 );
3562
3617
} else if (category == fcInfinity) {
@@ -3659,6 +3714,11 @@ APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {
3659
3714
return convertIEEEFloatToAPInt<semFloatTF32>();
3660
3715
}
3661
3716
3717
+ APInt IEEEFloat::convertFloat8E8M0FNAPFloatToAPInt () const {
3718
+ assert (partCount () == 1 );
3719
+ return convertIEEEFloatToAPInt<semFloat8E8M0FN>();
3720
+ }
3721
+
3662
3722
APInt IEEEFloat::convertFloat6E3M2FNAPFloatToAPInt () const {
3663
3723
assert (partCount () == 1 );
3664
3724
return convertIEEEFloatToAPInt<semFloat6E3M2FN>();
@@ -3721,6 +3781,9 @@ APInt IEEEFloat::bitcastToAPInt() const {
3721
3781
if (semantics == (const llvm::fltSemantics *)&semFloatTF32)
3722
3782
return convertFloatTF32APFloatToAPInt ();
3723
3783
3784
+ if (semantics == (const llvm::fltSemantics *)&semFloat8E8M0FN)
3785
+ return convertFloat8E8M0FNAPFloatToAPInt ();
3786
+
3724
3787
if (semantics == (const llvm::fltSemantics *)&semFloat6E3M2FN)
3725
3788
return convertFloat6E3M2FNAPFloatToAPInt ();
3726
3789
@@ -3819,6 +3882,40 @@ void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
3819
3882
}
3820
3883
}
3821
3884
3885
+ // The E8M0 format has the following characteristics:
3886
+ // It is an 8-bit unsigned format with only exponents (no actual significand)
3887
+ // No encodings for {zero, infinities or denorms}
3888
+ // NaN is represented by all 1's
3889
+ // Bias is 127
3890
+ void IEEEFloat::initFromFloat8E8M0FNAPInt (const APInt &api) {
3891
+ const uint64_t exponent_mask = 0xff ;
3892
+ uint64_t val = api.getRawData ()[0 ];
3893
+ uint64_t myexponent = (val & exponent_mask);
3894
+
3895
+ initialize (&semFloat8E8M0FN);
3896
+ assert (partCount () == 1 );
3897
+
3898
+ // This format has unsigned representation only
3899
+ sign = 0 ;
3900
+
3901
+ // Set the significand
3902
+ // This format does not have any significand but the 'Pth' precision bit is
3903
+ // always set to 1 for consistency in APFloat's internal representation.
3904
+ uint64_t mysignificand = 1 ;
3905
+ significandParts ()[0 ] = mysignificand;
3906
+
3907
+ // This format can either have a NaN or fcNormal
3908
+ // All 1's i.e. 255 is a NaN
3909
+ if (val == exponent_mask) {
3910
+ category = fcNaN;
3911
+ exponent = exponentNaN ();
3912
+ return ;
3913
+ }
3914
+ // Handle fcNormal...
3915
+ category = fcNormal;
3916
+ exponent = myexponent - 127 ; // 127 is bias
3917
+ return ;
3918
+ }
3822
3919
template <const fltSemantics &S>
3823
3920
void IEEEFloat::initFromIEEEAPInt (const APInt &api) {
3824
3921
assert (api.getBitWidth () == S.sizeInBits );
@@ -3999,6 +4096,8 @@ void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3999
4096
return initFromFloat8E3M4APInt (api);
4000
4097
if (Sem == &semFloatTF32)
4001
4098
return initFromFloatTF32APInt (api);
4099
+ if (Sem == &semFloat8E8M0FN)
4100
+ return initFromFloat8E8M0FNAPInt (api);
4002
4101
if (Sem == &semFloat6E3M2FN)
4003
4102
return initFromFloat6E3M2FNAPInt (api);
4004
4103
if (Sem == &semFloat6E2M3FN)
@@ -4032,6 +4131,13 @@ void IEEEFloat::makeLargest(bool Negative) {
4032
4131
significand[PartCount - 1 ] = (NumUnusedHighBits < integerPartWidth)
4033
4132
? (~integerPart (0 ) >> NumUnusedHighBits)
4034
4133
: 0 ;
4134
+ // For E8M0 format, we only have the 'internal' precision bit
4135
+ // (aka 'P' the precision bit) which is always set to 1.
4136
+ // Hence, the below logic of setting the LSB to 0 does not apply.
4137
+ // For other cases, the LSB is meant to be any bit other than
4138
+ // the Pth precision bit.
4139
+ if (semantics == &semFloat8E8M0FN)
4140
+ return ;
4035
4141
4036
4142
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
4037
4143
semantics->nanEncoding == fltNanEncoding::AllOnes)
@@ -4509,6 +4615,11 @@ IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
4509
4615
exponent = 0 ;
4510
4616
if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
4511
4617
sign = false ;
4618
+ // The E8M0 type cannot represent the value zero and
4619
+ // thus the category cannot be fcZero. So, get the
4620
+ // closest representation to fcZero instead.
4621
+ if (semantics == &semFloat8E8M0FN)
4622
+ makeSmallestNormalized (false );
4512
4623
break ;
4513
4624
}
4514
4625
@@ -4575,6 +4686,11 @@ IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
4575
4686
// denormal always increment since moving denormals and the numbers in the
4576
4687
// smallest normal binade have the same exponent in our representation.
4577
4688
bool WillCrossBinadeBoundary = !isDenormal () && isSignificandAllOnes ();
4689
+ // The E8M0 format does not support Denorms.
4690
+ // Since there are only exponents, any increment always crosses the
4691
+ // 'BinadeBoundary'. So, make this true always.
4692
+ if (semantics == &semFloat8E8M0FN)
4693
+ WillCrossBinadeBoundary = true ;
4578
4694
4579
4695
if (WillCrossBinadeBoundary) {
4580
4696
integerPart *Parts = significandParts ();
@@ -4626,6 +4742,11 @@ void IEEEFloat::makeInf(bool Negative) {
4626
4742
}
4627
4743
4628
4744
void IEEEFloat::makeZero (bool Negative) {
4745
+ // The E8M0 type cannot represent the value zero.
4746
+ if (semantics == &semFloat8E8M0FN) {
4747
+ assert (false && " This floating point format does not support Zero\n " );
4748
+ return ;
4749
+ }
4629
4750
category = fcZero;
4630
4751
sign = Negative;
4631
4752
if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
0 commit comments