Skip to content

Commit 850fae2

Browse files
committed
[APFloat] Add APFloat support for E8M0 type
This patch adds an APFloat type for unsigned E8M0 format. This format is used for representing the "scale-format" in the MX specification: https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf This format does not support {Inf, denorms, zeroes}. Like FP32, this format's exponents are 8-bits (all bits here) and the bias value is 127. However, it differs from IEEE-FP32 in that the minExponent is -127 (instead of -126). There are updates done in the APFloat utility functions to handle these constraints for this format. * The bias calculation is different and convertIEEE* APIs are updated to handle this. * Since there are no significand bits, the isSignificandAll{Zeroes/Ones} methods are updated accordingly. * Although the format does not have any precision, the precision bit in the fltSemantics is set to 1 for consistency with APFloat's internal representation. * Many utility functions are updated to handle the fact that this format does not support Zero. * Provide a separate initFromAPInt() implementation to handle the quirks of the format. * Add specific tests to verify the range of values for this format. Signed-off-by: Durgadoss R <[email protected]>
1 parent b54be00 commit 850fae2

File tree

3 files changed

+445
-42
lines changed

3 files changed

+445
-42
lines changed

llvm/include/llvm/ADT/APFloat.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,12 @@ struct APFloatBase {
195195
// improved range compared to half (16-bit) formats, at (potentially)
196196
// greater throughput than single precision (32-bit) formats.
197197
S_FloatTF32,
198+
// 8-bit floating point number with (all the) 8 bits for the exponent
199+
// like in FP32. There are no zeroes, no infinities, and no denormal values.
200+
// NaN is represented with all bits set to 1. Bias is 127.
201+
// This represents the scale data type in the MX specification from
202+
// https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf
203+
S_Float8E8M0FN,
198204
// 6-bit floating point number with bit layout S1E3M2. Unlike IEEE-754
199205
// types, there are no infinity or NaN values. The format is detailed in
200206
// https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf
@@ -229,6 +235,7 @@ struct APFloatBase {
229235
static const fltSemantics &Float8E4M3B11FNUZ() LLVM_READNONE;
230236
static const fltSemantics &Float8E3M4() LLVM_READNONE;
231237
static const fltSemantics &FloatTF32() LLVM_READNONE;
238+
static const fltSemantics &Float8E8M0FN() LLVM_READNONE;
232239
static const fltSemantics &Float6E3M2FN() LLVM_READNONE;
233240
static const fltSemantics &Float6E2M3FN() LLVM_READNONE;
234241
static const fltSemantics &Float4E2M1FN() LLVM_READNONE;
@@ -591,6 +598,7 @@ class IEEEFloat final : public APFloatBase {
591598
unsigned int significandLSB() const;
592599
unsigned int significandMSB() const;
593600
void zeroSignificand();
601+
unsigned int getNumHighBits() const;
594602
/// Return true if the significand excluding the integral bit is all ones.
595603
bool isSignificandAllOnes() const;
596604
bool isSignificandAllOnesExceptLSB() const;
@@ -652,6 +660,7 @@ class IEEEFloat final : public APFloatBase {
652660
APInt convertFloat8E4M3B11FNUZAPFloatToAPInt() const;
653661
APInt convertFloat8E3M4APFloatToAPInt() const;
654662
APInt convertFloatTF32APFloatToAPInt() const;
663+
APInt convertFloat8E8M0FNAPFloatToAPInt() const;
655664
APInt convertFloat6E3M2FNAPFloatToAPInt() const;
656665
APInt convertFloat6E2M3FNAPFloatToAPInt() const;
657666
APInt convertFloat4E2M1FNAPFloatToAPInt() const;
@@ -672,6 +681,7 @@ class IEEEFloat final : public APFloatBase {
672681
void initFromFloat8E4M3B11FNUZAPInt(const APInt &api);
673682
void initFromFloat8E3M4APInt(const APInt &api);
674683
void initFromFloatTF32APInt(const APInt &api);
684+
void initFromFloat8E8M0FNAPInt(const APInt &api);
675685
void initFromFloat6E3M2FNAPInt(const APInt &api);
676686
void initFromFloat6E2M3FNAPInt(const APInt &api);
677687
void initFromFloat4E2M1FNAPInt(const APInt &api);
@@ -1079,6 +1089,9 @@ class APFloat : public APFloatBase {
10791089
/// \param Semantics - type float semantics
10801090
static APFloat getAllOnesValue(const fltSemantics &Semantics);
10811091

1092+
/// Returns true if the given semantics supports either NaN or Infinity.
1093+
///
1094+
/// \param Sem - type float semantics
10821095
static bool hasNanOrInf(const fltSemantics &Sem) {
10831096
switch (SemanticsToEnum(Sem)) {
10841097
default:
@@ -1091,6 +1104,28 @@ class APFloat : public APFloatBase {
10911104
}
10921105
}
10931106

1107+
/// Returns true if the given semantics can represent Zero.
1108+
///
1109+
/// \param Sem - type float semantics
1110+
static bool hasZero(const fltSemantics &Sem) {
1111+
return &Sem != &Float8E8M0FN();
1112+
}
1113+
1114+
/// Returns true if the given semantics has actual significand.
1115+
///
1116+
/// \param Sem - type float semantics
1117+
static bool hasSignificand(const fltSemantics &Sem) {
1118+
return &Sem != &Float8E8M0FN();
1119+
}
1120+
1121+
/// Returns true if the given semantics has only exponent
1122+
/// and no significand.
1123+
///
1124+
/// \param Sem - type float semantics
1125+
static bool hasExponentOnly(const fltSemantics &Sem) {
1126+
return !hasSignificand(Sem);
1127+
}
1128+
10941129
/// Used to insert APFloat objects, or objects that contain APFloat objects,
10951130
/// into FoldingSets.
10961131
void Profile(FoldingSetNodeID &NID) const;

0 commit comments

Comments
 (0)