@@ -77,8 +77,6 @@ eisel_lemire(ExpandedFloat<T> init_num,
77
77
UIntType mantissa = init_num.mantissa ;
78
78
int32_t exp10 = init_num.exponent ;
79
79
80
- constexpr uint32_t BITS_IN_MANTISSA = sizeof (mantissa) * 8 ;
81
-
82
80
if (sizeof (T) > 8 ) { // This algorithm cannot handle anything longer than a
83
81
// double, so we skip straight to the fallback.
84
82
return cpp::nullopt;
@@ -94,8 +92,8 @@ eisel_lemire(ExpandedFloat<T> init_num,
94
92
uint32_t clz = cpp::countl_zero<UIntType>(mantissa);
95
93
mantissa <<= clz;
96
94
97
- int32_t exp2 =
98
- exp10_to_exp2 (exp10) + BITS_IN_MANTISSA + FloatProp::EXPONENT_BIAS - clz;
95
+ int32_t exp2 = exp10_to_exp2 (exp10) + FloatProp::UINTTYPE_BITS +
96
+ FloatProp::EXPONENT_BIAS - clz;
99
97
100
98
// Multiplication
101
99
const uint64_t *power_of_ten =
@@ -112,7 +110,9 @@ eisel_lemire(ExpandedFloat<T> init_num,
112
110
// accuracy, and the most significant bit is ignored.) = 9 bits. Similarly,
113
111
// it's 6 bits for floats in this case.
114
112
const uint64_t halfway_constant =
115
- (uint64_t (1 ) << (BITS_IN_MANTISSA - (FloatProp::MANTISSA_WIDTH + 3 ))) - 1 ;
113
+ (uint64_t (1 ) << (FloatProp::UINTTYPE_BITS -
114
+ (FloatProp::MANTISSA_WIDTH + 3 ))) -
115
+ 1 ;
116
116
if ((high64 (first_approx) & halfway_constant) == halfway_constant &&
117
117
low64 (first_approx) + mantissa < mantissa) {
118
118
UInt128 low_bits =
@@ -131,11 +131,11 @@ eisel_lemire(ExpandedFloat<T> init_num,
131
131
}
132
132
133
133
// Shifting to 54 bits for doubles and 25 bits for floats
134
- UIntType msb =
135
- static_cast <UIntType>( high64 (final_approx) >> (BITS_IN_MANTISSA - 1 ));
134
+ UIntType msb = static_cast <UIntType>( high64 (final_approx) >>
135
+ (FloatProp::UINTTYPE_BITS - 1 ));
136
136
UIntType final_mantissa = static_cast <UIntType>(
137
137
high64 (final_approx) >>
138
- (msb + BITS_IN_MANTISSA - (FloatProp::MANTISSA_WIDTH + 3 )));
138
+ (msb + FloatProp::UINTTYPE_BITS - (FloatProp::MANTISSA_WIDTH + 3 )));
139
139
exp2 -= static_cast <uint32_t >(1 ^ msb); // same as !msb
140
140
141
141
if (round == RoundDirection::Nearest) {
@@ -190,8 +190,6 @@ eisel_lemire<long double>(ExpandedFloat<long double> init_num,
190
190
UIntType mantissa = init_num.mantissa ;
191
191
int32_t exp10 = init_num.exponent ;
192
192
193
- constexpr uint32_t BITS_IN_MANTISSA = sizeof (mantissa) * 8 ;
194
-
195
193
// Exp10 Range
196
194
// This doesn't reach very far into the range for long doubles, since it's
197
195
// sized for doubles and their 11 exponent bits, and not for long doubles and
@@ -211,8 +209,8 @@ eisel_lemire<long double>(ExpandedFloat<long double> init_num,
211
209
uint32_t clz = cpp::countl_zero<UIntType>(mantissa);
212
210
mantissa <<= clz;
213
211
214
- int32_t exp2 =
215
- exp10_to_exp2 (exp10) + BITS_IN_MANTISSA + FloatProp::EXPONENT_BIAS - clz;
212
+ int32_t exp2 = exp10_to_exp2 (exp10) + FloatProp::UINTTYPE_BITS +
213
+ FloatProp::EXPONENT_BIAS - clz;
216
214
217
215
// Multiplication
218
216
const uint64_t *power_of_ten =
@@ -249,19 +247,21 @@ eisel_lemire<long double>(ExpandedFloat<long double> init_num,
249
247
// accuracy, and the most significant bit is ignored.) = 61 bits. Similarly,
250
248
// it's 12 bits for 128 bit floats in this case.
251
249
constexpr UInt128 HALFWAY_CONSTANT =
252
- (UInt128 (1 ) << (BITS_IN_MANTISSA - (FloatProp::MANTISSA_WIDTH + 3 ))) - 1 ;
250
+ (UInt128 (1 ) << (FloatProp::UINTTYPE_BITS -
251
+ (FloatProp::MANTISSA_WIDTH + 3 ))) -
252
+ 1 ;
253
253
254
254
if ((final_approx_upper & HALFWAY_CONSTANT) == HALFWAY_CONSTANT &&
255
255
final_approx_lower + mantissa < mantissa) {
256
256
return cpp::nullopt;
257
257
}
258
258
259
259
// Shifting to 65 bits for 80 bit floats and 113 bits for 128 bit floats
260
- uint32_t msb =
261
- static_cast < uint32_t >(final_approx_upper >> (BITS_IN_MANTISSA - 1 ));
260
+ uint32_t msb = static_cast < uint32_t >(final_approx_upper >>
261
+ (FloatProp::UINTTYPE_BITS - 1 ));
262
262
UIntType final_mantissa =
263
263
final_approx_upper >>
264
- (msb + BITS_IN_MANTISSA - (FloatProp::MANTISSA_WIDTH + 3 ));
264
+ (msb + FloatProp::UINTTYPE_BITS - (FloatProp::MANTISSA_WIDTH + 3 ));
265
265
exp2 -= static_cast <uint32_t >(1 ^ msb); // same as !msb
266
266
267
267
if (round == RoundDirection::Nearest) {
@@ -622,9 +622,10 @@ template <> constexpr int32_t get_upper_bound<double>() { return 309; }
622
622
// other out, and subnormal numbers allow for the result to be at the very low
623
623
// end of the final mantissa.
624
624
template <typename T> constexpr int32_t get_lower_bound () {
625
- return -((fputil::FloatProperties<T>::EXPONENT_BIAS +
626
- static_cast <int32_t >(fputil::FloatProperties<T>::MANTISSA_WIDTH +
627
- (sizeof (T) * 8 ))) /
625
+ using FloatProp = typename fputil::FloatProperties<T>;
626
+ return -((FloatProp::EXPONENT_BIAS +
627
+ static_cast <int32_t >(FloatProp::MANTISSA_WIDTH +
628
+ FloatProp::UINTTYPE_BITS)) /
628
629
3 );
629
630
}
630
631
@@ -733,7 +734,6 @@ LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
733
734
734
735
// This is the number of leading zeroes a properly normalized float of type T
735
736
// should have.
736
- constexpr int32_t NUMBITS = sizeof (UIntType) * 8 ;
737
737
constexpr int32_t INF_EXP = (1 << FloatProp::EXPONENT_WIDTH) - 1 ;
738
738
739
739
// Normalization step 1: Bring the leading bit to the highest bit of UIntType.
@@ -743,8 +743,9 @@ LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
743
743
// Keep exp2 representing the exponent of the lowest bit of UIntType.
744
744
exp2 -= amount_to_shift_left;
745
745
746
- // biasedExponent represents the biased exponent of the most significant bit.
747
- int32_t biased_exponent = exp2 + NUMBITS + FPBits::EXPONENT_BIAS - 1 ;
746
+ // biased_exponent represents the biased exponent of the most significant bit.
747
+ int32_t biased_exponent =
748
+ exp2 + FloatProp::UINTTYPE_BITS + FPBits::EXPONENT_BIAS - 1 ;
748
749
749
750
// Handle numbers that're too large and get squashed to inf
750
751
if (biased_exponent >= INF_EXP) {
@@ -754,14 +755,15 @@ LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
754
755
return output;
755
756
}
756
757
757
- uint32_t amount_to_shift_right = NUMBITS - FloatProp::MANTISSA_WIDTH - 1 ;
758
+ uint32_t amount_to_shift_right =
759
+ FloatProp::UINTTYPE_BITS - FloatProp::MANTISSA_WIDTH - 1 ;
758
760
759
761
// Handle subnormals.
760
762
if (biased_exponent <= 0 ) {
761
763
amount_to_shift_right += 1 - biased_exponent;
762
764
biased_exponent = 0 ;
763
765
764
- if (amount_to_shift_right > NUMBITS ) {
766
+ if (amount_to_shift_right > FloatProp::UINTTYPE_BITS ) {
765
767
// Return 0 if the exponent is too small.
766
768
output.num = {0 , 0 };
767
769
output.error = ERANGE;
@@ -774,7 +776,7 @@ LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
774
776
bool round_bit = static_cast <bool >(mantissa & round_bit_mask);
775
777
bool sticky_bit = static_cast <bool >(mantissa & sticky_mask) || truncated;
776
778
777
- if (amount_to_shift_right < NUMBITS ) {
779
+ if (amount_to_shift_right < FloatProp::UINTTYPE_BITS ) {
778
780
// Shift the mantissa and clear the implicit bit.
779
781
mantissa >>= amount_to_shift_right;
780
782
mantissa &= FloatProp::MANTISSA_MASK;
0 commit comments