@@ -57,21 +57,13 @@ struct SqrtConfig<unsigned accum> : SqrtConfig<unsigned long fract> {};
57
57
// fixed, absolute);
58
58
// print("{", coeff(P, 1), "uhr,", coeff(P, 0), "uhr},");
59
59
// };
60
- // Clang is having issues with this array:
61
- // static constexpr unsigned short fract SQRT_FIRST_APPROX[12][2] = {
62
- // {0x1.e8p-1uhr, 0x1.0cp-2uhr}, {0x1.bap-1uhr, 0x1.28p-2uhr},
63
- // {0x1.94p-1uhr, 0x1.44p-2uhr}, {0x1.74p-1uhr, 0x1.6p-2uhr},
64
- // {0x1.6p-1uhr, 0x1.74p-2uhr}, {0x1.4ep-1uhr, 0x1.88p-2uhr},
65
- // {0x1.3ep-1uhr, 0x1.9cp-2uhr}, {0x1.32p-1uhr, 0x1.acp-2uhr},
66
- // {0x1.22p-1uhr, 0x1.c4p-2uhr}, {0x1.18p-1uhr, 0x1.d4p-2uhr},
67
- // {0x1.08p-1uhr, 0x1.fp-2uhr}, {0x1.04p-1uhr, 0x1.f8p-2uhr},
68
- // };
69
- // We are using their storage type instead.
70
- // TODO(https://github.com/llvm/llvm-project/issues/83050): Use fixed point
71
- // array when the bug is fixed.
72
- static constexpr uint8_t SQRT_FIRST_APPROX[12 ][2 ] = {
73
- {244 , 67 }, {221 , 74 }, {202 , 81 }, {186 , 88 }, {176 , 93 }, {167 , 98 },
74
- {159 , 103 }, {153 , 107 }, {145 , 113 }, {140 , 117 }, {132 , 124 }, {130 , 126 },
60
+ static constexpr unsigned short fract SQRT_FIRST_APPROX[12 ][2 ] = {
61
+ {0x1 .e8p -1uhr, 0x1 .0cp-2uhr}, {0x1 .bap -1uhr, 0x1 .28p-2uhr},
62
+ {0x1 .94p-1uhr, 0x1 .44p-2uhr}, {0x1 .74p-1uhr, 0x1 .6p-2uhr},
63
+ {0x1 .6p-1uhr, 0x1 .74p-2uhr}, {0x1 .4ep-1uhr, 0x1 .88p-2uhr},
64
+ {0x1 .3ep-1uhr, 0x1 .9cp-2uhr}, {0x1 .32p-1uhr, 0x1 .acp -2uhr},
65
+ {0x1 .22p-1uhr, 0x1 .c4p -2uhr}, {0x1 .18p-1uhr, 0x1 .d4p -2uhr},
66
+ {0x1 .08p-1uhr, 0x1 .fp -2uhr}, {0x1 .04p-1uhr, 0x1 .f8p -2uhr},
75
67
};
76
68
77
69
} // namespace internal
@@ -100,11 +92,13 @@ LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_fixed_point_v<T>, T> sqrt(T x) {
100
92
// For the initial values, we choose x_0
101
93
102
94
// Use the leading 4 bits to do look up for sqrt(x).
95
+ // After normalization, 0.25 <= x_frac < 1, so the leading 4 bits of x_frac
96
+ // are between 0b0100 and 0b1111. Hence the lookup table only needs 12
97
+ // entries, and we can get the index by subtracting the leading 4 bits of
98
+ // x_frac by 4 = 0b0100.
103
99
int index = (x_bit >> (STORAGE_LENGTH - 4 )) - 4 ;
104
- FracType a = static_cast <FracType>(cpp::bit_cast<unsigned short fract>(
105
- internal::SQRT_FIRST_APPROX[index][0 ]));
106
- FracType b = static_cast <FracType>(cpp::bit_cast<unsigned short fract>(
107
- internal::SQRT_FIRST_APPROX[index][1 ]));
100
+ FracType a = static_cast <FracType>(internal::SQRT_FIRST_APPROX[index][0 ]);
101
+ FracType b = static_cast <FracType>(internal::SQRT_FIRST_APPROX[index][1 ]);
108
102
109
103
// Initial approximation step.
110
104
// Estimated error bounds: | r - sqrt(x_frac) | < max(1.5 * 2^-11, eps).
@@ -118,15 +112,11 @@ LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_fixed_point_v<T>, T> sqrt(T x) {
118
112
// Blanchard, J. D. and Chamberland, M., "Newton's Method Without Division",
119
113
// The American Mathematical Monthly (2023).
120
114
// https://chamberland.math.grinnell.edu/papers/newton.pdf
121
- for (int i = 0 ; i < internal::SqrtConfig<T>::EXTRA_STEPS; ++i) {
115
+ for (int i = 0 ; i < internal::SqrtConfig<T>::EXTRA_STEPS; ++i)
122
116
r = (r >> 1 ) + (x_frac >> 1 ) / r;
123
- }
124
-
125
- int r_exp = (x_exp >> 1 );
126
- int shift_back = EXP_ADJUSTMENT - r_exp;
127
117
128
118
// Re-scaling
129
- r >>= shift_back ;
119
+ r >>= EXP_ADJUSTMENT - (x_exp >> 1 ) ;
130
120
131
121
// Return result.
132
122
return cpp::bit_cast<T>(r);
0 commit comments