Skip to content

Commit f22a65c

Browse files
authored
[libc][NFC] Reuse FloatProperties constant instead of creating new ones (#75187)
Also exposes a bit more of `FloatProperties` and adds documentation. This should have been several patches but I was lazy.
1 parent 300ac0a commit f22a65c

File tree

4 files changed

+55
-62
lines changed

4 files changed

+55
-62
lines changed

libc/src/__support/FPUtil/FloatProperties.h

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -87,49 +87,57 @@ template <FPType fp_type>
8787
struct FPProperties : public internal::FPBaseProperties<fp_type> {
8888
private:
8989
using UP = internal::FPBaseProperties<fp_type>;
90-
using UP::EXP_BITS;
91-
using UP::SIG_BITS;
92-
using UP::TOTAL_BITS;
90+
// The number of bits to represent sign. For documentation purpose, always 1.
91+
LIBC_INLINE_VAR static constexpr int SIGN_BITS = 1;
92+
using UP::EXP_BITS; // The number of bits for the *exponent* part
93+
using UP::SIG_BITS; // The number of bits for the *significand* part
94+
using UP::TOTAL_BITS; // For convenience, the sum of `SIG_BITS`, `EXP_BITS`,
95+
// and `SIGN_BITS`.
96+
static_assert(SIGN_BITS + EXP_BITS + SIG_BITS == TOTAL_BITS);
9397

9498
public:
99+
// An unsigned integer that is wide enough to contain all of the floating
100+
// point bits.
95101
using UIntType = typename UP::UIntType;
96102

97-
private:
98-
LIBC_INLINE_VAR static constexpr int STORAGE_BITS =
103+
// The number of bits in UIntType.
104+
LIBC_INLINE_VAR static constexpr int UINTTYPE_BITS =
99105
sizeof(UIntType) * CHAR_BIT;
100-
static_assert(STORAGE_BITS >= TOTAL_BITS);
101-
102-
// The number of bits to represent sign.
103-
// For documentation purpose, always 1.
104-
LIBC_INLINE_VAR static constexpr int SIGN_BITS = 1;
105-
static_assert(SIGN_BITS + EXP_BITS + SIG_BITS == TOTAL_BITS);
106+
static_assert(UINTTYPE_BITS >= TOTAL_BITS);
106107

108+
private:
107109
// The exponent bias. Always positive.
108110
LIBC_INLINE_VAR static constexpr int32_t EXP_BIAS =
109111
(1U << (EXP_BITS - 1U)) - 1U;
110112
static_assert(EXP_BIAS > 0);
111113

112-
// Shifts
114+
// The shift amount to get the *significand* part to the least significant
115+
// bit. Always `0` but kept for consistency.
113116
LIBC_INLINE_VAR static constexpr int SIG_MASK_SHIFT = 0;
117+
// The shift amount to get the *exponent* part to the least significant bit.
114118
LIBC_INLINE_VAR static constexpr int EXP_MASK_SHIFT = SIG_BITS;
119+
// The shift amount to get the *sign* part to the least significant bit.
115120
LIBC_INLINE_VAR static constexpr int SIGN_MASK_SHIFT = SIG_BITS + EXP_BITS;
116121

117-
// Masks
122+
// The bit pattern that keeps only the *significand* part.
118123
LIBC_INLINE_VAR static constexpr UIntType SIG_MASK =
119124
mask_trailing_ones<UIntType, SIG_BITS>() << SIG_MASK_SHIFT;
125+
// The bit pattern that keeps only the *exponent* part.
120126
LIBC_INLINE_VAR static constexpr UIntType EXP_MASK =
121127
mask_trailing_ones<UIntType, EXP_BITS>() << EXP_MASK_SHIFT;
122128

123129
public:
130+
// The bit pattern that keeps only the *sign* part.
124131
LIBC_INLINE_VAR static constexpr UIntType SIGN_MASK =
125132
mask_trailing_ones<UIntType, SIGN_BITS>() << SIGN_MASK_SHIFT;
126-
127-
private:
133+
// The bit pattern that keeps only the *sign + exponent + significand* part.
128134
LIBC_INLINE_VAR static constexpr UIntType FP_MASK =
129135
mask_trailing_ones<UIntType, TOTAL_BITS>();
136+
130137
static_assert((SIG_MASK & EXP_MASK & SIGN_MASK) == 0, "masks disjoint");
131138
static_assert((SIG_MASK | EXP_MASK | SIGN_MASK) == FP_MASK, "masks cover");
132139

140+
private:
133141
LIBC_INLINE static constexpr UIntType bit_at(int position) {
134142
return UIntType(1) << position;
135143
}

libc/src/__support/FPUtil/x86_64/LongDoubleBits.h

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -26,18 +26,6 @@
2626
namespace LIBC_NAMESPACE {
2727
namespace fputil {
2828

29-
template <unsigned Width> struct Padding;
30-
31-
// i386 padding.
32-
template <> struct Padding<4> {
33-
static constexpr unsigned VALUE = 16;
34-
};
35-
36-
// x86_64 padding.
37-
template <> struct Padding<8> {
38-
static constexpr unsigned VALUE = 48;
39-
};
40-
4129
template <> struct FPBits<long double> {
4230
using UIntType = UInt128;
4331

@@ -129,11 +117,7 @@ template <> struct FPBits<long double> {
129117

130118
LIBC_INLINE constexpr UIntType uintval() {
131119
// We zero the padding bits as they can contain garbage.
132-
constexpr UIntType MASK =
133-
(UIntType(1) << (sizeof(long double) * 8 -
134-
Padding<sizeof(uintptr_t)>::VALUE)) -
135-
1;
136-
return bits & MASK;
120+
return bits & FloatProp::FP_MASK;
137121
}
138122

139123
LIBC_INLINE constexpr long double get_val() const {

libc/src/__support/float_to_string.h

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ namespace LIBC_NAMESPACE {
105105
using BlockInt = uint32_t;
106106
constexpr uint32_t BLOCK_SIZE = 9;
107107

108-
using MantissaInt = fputil::FPBits<long double>::UIntType;
108+
using FloatProp = fputil::FloatProperties<long double>;
109109

110110
// Larger numbers prefer a slightly larger constant than is used for the smaller
111111
// numbers.
@@ -382,11 +382,10 @@ LIBC_INLINE uint32_t fast_uint_mod_1e9(const cpp::UInt<MID_INT_SIZE> &val) {
382382
(1000000000 * shifted));
383383
}
384384

385-
LIBC_INLINE uint32_t mul_shift_mod_1e9(const MantissaInt mantissa,
385+
LIBC_INLINE uint32_t mul_shift_mod_1e9(const FloatProp::UIntType mantissa,
386386
const cpp::UInt<MID_INT_SIZE> &large,
387387
const int32_t shift_amount) {
388-
constexpr size_t MANT_INT_SIZE = sizeof(MantissaInt) * 8;
389-
cpp::UInt<MID_INT_SIZE + MANT_INT_SIZE> val(large);
388+
cpp::UInt<MID_INT_SIZE + FloatProp::UINTTYPE_BITS> val(large);
390389
val = (val * mantissa) >> shift_amount;
391390
return static_cast<uint32_t>(
392391
val.div_uint32_times_pow_2(1000000000, 0).value());
@@ -415,7 +414,7 @@ class FloatToString {
415414
fputil::FPBits<T> float_bits;
416415
bool is_negative;
417416
int exponent;
418-
MantissaInt mantissa;
417+
FloatProp::UIntType mantissa;
419418

420419
static constexpr int MANT_WIDTH = fputil::MantissaWidth<T>::VALUE;
421420
static constexpr int EXP_BIAS = fputil::FPBits<T>::EXPONENT_BIAS;

libc/src/__support/str_to_float.h

Lines changed: 27 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,6 @@ eisel_lemire(ExpandedFloat<T> init_num,
7777
UIntType mantissa = init_num.mantissa;
7878
int32_t exp10 = init_num.exponent;
7979

80-
constexpr uint32_t BITS_IN_MANTISSA = sizeof(mantissa) * 8;
81-
8280
if (sizeof(T) > 8) { // This algorithm cannot handle anything longer than a
8381
// double, so we skip straight to the fallback.
8482
return cpp::nullopt;
@@ -94,8 +92,8 @@ eisel_lemire(ExpandedFloat<T> init_num,
9492
uint32_t clz = cpp::countl_zero<UIntType>(mantissa);
9593
mantissa <<= clz;
9694

97-
int32_t exp2 =
98-
exp10_to_exp2(exp10) + BITS_IN_MANTISSA + FloatProp::EXPONENT_BIAS - clz;
95+
int32_t exp2 = exp10_to_exp2(exp10) + FloatProp::UINTTYPE_BITS +
96+
FloatProp::EXPONENT_BIAS - clz;
9997

10098
// Multiplication
10199
const uint64_t *power_of_ten =
@@ -112,7 +110,9 @@ eisel_lemire(ExpandedFloat<T> init_num,
112110
// accuracy, and the most significant bit is ignored.) = 9 bits. Similarly,
113111
// it's 6 bits for floats in this case.
114112
const uint64_t halfway_constant =
115-
(uint64_t(1) << (BITS_IN_MANTISSA - (FloatProp::MANTISSA_WIDTH + 3))) - 1;
113+
(uint64_t(1) << (FloatProp::UINTTYPE_BITS -
114+
(FloatProp::MANTISSA_WIDTH + 3))) -
115+
1;
116116
if ((high64(first_approx) & halfway_constant) == halfway_constant &&
117117
low64(first_approx) + mantissa < mantissa) {
118118
UInt128 low_bits =
@@ -131,11 +131,11 @@ eisel_lemire(ExpandedFloat<T> init_num,
131131
}
132132

133133
// Shifting to 54 bits for doubles and 25 bits for floats
134-
UIntType msb =
135-
static_cast<UIntType>(high64(final_approx) >> (BITS_IN_MANTISSA - 1));
134+
UIntType msb = static_cast<UIntType>(high64(final_approx) >>
135+
(FloatProp::UINTTYPE_BITS - 1));
136136
UIntType final_mantissa = static_cast<UIntType>(
137137
high64(final_approx) >>
138-
(msb + BITS_IN_MANTISSA - (FloatProp::MANTISSA_WIDTH + 3)));
138+
(msb + FloatProp::UINTTYPE_BITS - (FloatProp::MANTISSA_WIDTH + 3)));
139139
exp2 -= static_cast<uint32_t>(1 ^ msb); // same as !msb
140140

141141
if (round == RoundDirection::Nearest) {
@@ -190,8 +190,6 @@ eisel_lemire<long double>(ExpandedFloat<long double> init_num,
190190
UIntType mantissa = init_num.mantissa;
191191
int32_t exp10 = init_num.exponent;
192192

193-
constexpr uint32_t BITS_IN_MANTISSA = sizeof(mantissa) * 8;
194-
195193
// Exp10 Range
196194
// This doesn't reach very far into the range for long doubles, since it's
197195
// sized for doubles and their 11 exponent bits, and not for long doubles and
@@ -211,8 +209,8 @@ eisel_lemire<long double>(ExpandedFloat<long double> init_num,
211209
uint32_t clz = cpp::countl_zero<UIntType>(mantissa);
212210
mantissa <<= clz;
213211

214-
int32_t exp2 =
215-
exp10_to_exp2(exp10) + BITS_IN_MANTISSA + FloatProp::EXPONENT_BIAS - clz;
212+
int32_t exp2 = exp10_to_exp2(exp10) + FloatProp::UINTTYPE_BITS +
213+
FloatProp::EXPONENT_BIAS - clz;
216214

217215
// Multiplication
218216
const uint64_t *power_of_ten =
@@ -249,19 +247,21 @@ eisel_lemire<long double>(ExpandedFloat<long double> init_num,
249247
// accuracy, and the most significant bit is ignored.) = 61 bits. Similarly,
250248
// it's 12 bits for 128 bit floats in this case.
251249
constexpr UInt128 HALFWAY_CONSTANT =
252-
(UInt128(1) << (BITS_IN_MANTISSA - (FloatProp::MANTISSA_WIDTH + 3))) - 1;
250+
(UInt128(1) << (FloatProp::UINTTYPE_BITS -
251+
(FloatProp::MANTISSA_WIDTH + 3))) -
252+
1;
253253

254254
if ((final_approx_upper & HALFWAY_CONSTANT) == HALFWAY_CONSTANT &&
255255
final_approx_lower + mantissa < mantissa) {
256256
return cpp::nullopt;
257257
}
258258

259259
// Shifting to 65 bits for 80 bit floats and 113 bits for 128 bit floats
260-
uint32_t msb =
261-
static_cast<uint32_t>(final_approx_upper >> (BITS_IN_MANTISSA - 1));
260+
uint32_t msb = static_cast<uint32_t>(final_approx_upper >>
261+
(FloatProp::UINTTYPE_BITS - 1));
262262
UIntType final_mantissa =
263263
final_approx_upper >>
264-
(msb + BITS_IN_MANTISSA - (FloatProp::MANTISSA_WIDTH + 3));
264+
(msb + FloatProp::UINTTYPE_BITS - (FloatProp::MANTISSA_WIDTH + 3));
265265
exp2 -= static_cast<uint32_t>(1 ^ msb); // same as !msb
266266

267267
if (round == RoundDirection::Nearest) {
@@ -622,9 +622,10 @@ template <> constexpr int32_t get_upper_bound<double>() { return 309; }
622622
// other out, and subnormal numbers allow for the result to be at the very low
623623
// end of the final mantissa.
624624
template <typename T> constexpr int32_t get_lower_bound() {
625-
return -((fputil::FloatProperties<T>::EXPONENT_BIAS +
626-
static_cast<int32_t>(fputil::FloatProperties<T>::MANTISSA_WIDTH +
627-
(sizeof(T) * 8))) /
625+
using FloatProp = typename fputil::FloatProperties<T>;
626+
return -((FloatProp::EXPONENT_BIAS +
627+
static_cast<int32_t>(FloatProp::MANTISSA_WIDTH +
628+
FloatProp::UINTTYPE_BITS)) /
628629
3);
629630
}
630631

@@ -733,7 +734,6 @@ LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
733734

734735
// This is the number of leading zeroes a properly normalized float of type T
735736
// should have.
736-
constexpr int32_t NUMBITS = sizeof(UIntType) * 8;
737737
constexpr int32_t INF_EXP = (1 << FloatProp::EXPONENT_WIDTH) - 1;
738738

739739
// Normalization step 1: Bring the leading bit to the highest bit of UIntType.
@@ -743,8 +743,9 @@ LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
743743
// Keep exp2 representing the exponent of the lowest bit of UIntType.
744744
exp2 -= amount_to_shift_left;
745745

746-
// biasedExponent represents the biased exponent of the most significant bit.
747-
int32_t biased_exponent = exp2 + NUMBITS + FPBits::EXPONENT_BIAS - 1;
746+
// biased_exponent represents the biased exponent of the most significant bit.
747+
int32_t biased_exponent =
748+
exp2 + FloatProp::UINTTYPE_BITS + FPBits::EXPONENT_BIAS - 1;
748749

749750
// Handle numbers that're too large and get squashed to inf
750751
if (biased_exponent >= INF_EXP) {
@@ -754,14 +755,15 @@ LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
754755
return output;
755756
}
756757

757-
uint32_t amount_to_shift_right = NUMBITS - FloatProp::MANTISSA_WIDTH - 1;
758+
uint32_t amount_to_shift_right =
759+
FloatProp::UINTTYPE_BITS - FloatProp::MANTISSA_WIDTH - 1;
758760

759761
// Handle subnormals.
760762
if (biased_exponent <= 0) {
761763
amount_to_shift_right += 1 - biased_exponent;
762764
biased_exponent = 0;
763765

764-
if (amount_to_shift_right > NUMBITS) {
766+
if (amount_to_shift_right > FloatProp::UINTTYPE_BITS) {
765767
// Return 0 if the exponent is too small.
766768
output.num = {0, 0};
767769
output.error = ERANGE;
@@ -774,7 +776,7 @@ LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
774776
bool round_bit = static_cast<bool>(mantissa & round_bit_mask);
775777
bool sticky_bit = static_cast<bool>(mantissa & sticky_mask) || truncated;
776778

777-
if (amount_to_shift_right < NUMBITS) {
779+
if (amount_to_shift_right < FloatProp::UINTTYPE_BITS) {
778780
// Shift the mantissa and clear the implicit bit.
779781
mantissa >>= amount_to_shift_right;
780782
mantissa &= FloatProp::MANTISSA_MASK;

0 commit comments

Comments
 (0)