Address comments.

lntue · lntue · commit 5a604c196659 · 2024-02-26T23:31:49.000Z
diff --git a/libc/src/__support/fixed_point/sqrt.h b/libc/src/__support/fixed_point/sqrt.h
@@ -57,21 +57,13 @@ struct SqrtConfig<unsigned accum> : SqrtConfig<unsigned long fract> {};
 //                   fixed, absolute);
 //     print("{", coeff(P, 1), "uhr,", coeff(P, 0), "uhr},");
 //   };
-// Clang is having issues with this array:
-// static constexpr unsigned short fract SQRT_FIRST_APPROX[12][2] = {
-//     {0x1.e8p-1uhr, 0x1.0cp-2uhr}, {0x1.bap-1uhr, 0x1.28p-2uhr},
-//     {0x1.94p-1uhr, 0x1.44p-2uhr}, {0x1.74p-1uhr, 0x1.6p-2uhr},
-//     {0x1.6p-1uhr, 0x1.74p-2uhr},  {0x1.4ep-1uhr, 0x1.88p-2uhr},
-//     {0x1.3ep-1uhr, 0x1.9cp-2uhr}, {0x1.32p-1uhr, 0x1.acp-2uhr},
-//     {0x1.22p-1uhr, 0x1.c4p-2uhr}, {0x1.18p-1uhr, 0x1.d4p-2uhr},
-//     {0x1.08p-1uhr, 0x1.fp-2uhr},  {0x1.04p-1uhr, 0x1.f8p-2uhr},
-// };
-// We are using their storage type instead.
-// TODO(https://github.com/llvm/llvm-project/issues/83050): Use fixed point
-// array when the bug is fixed.
-static constexpr uint8_t SQRT_FIRST_APPROX[12][2] = {
-    {244, 67},  {221, 74},  {202, 81},  {186, 88},  {176, 93},  {167, 98},
-    {159, 103}, {153, 107}, {145, 113}, {140, 117}, {132, 124}, {130, 126},
+static constexpr unsigned short fract SQRT_FIRST_APPROX[12][2] = {
+    {0x1.e8p-1uhr, 0x1.0cp-2uhr}, {0x1.bap-1uhr, 0x1.28p-2uhr},
+    {0x1.94p-1uhr, 0x1.44p-2uhr}, {0x1.74p-1uhr, 0x1.6p-2uhr},
+    {0x1.6p-1uhr, 0x1.74p-2uhr},  {0x1.4ep-1uhr, 0x1.88p-2uhr},
+    {0x1.3ep-1uhr, 0x1.9cp-2uhr}, {0x1.32p-1uhr, 0x1.acp-2uhr},
+    {0x1.22p-1uhr, 0x1.c4p-2uhr}, {0x1.18p-1uhr, 0x1.d4p-2uhr},
+    {0x1.08p-1uhr, 0x1.fp-2uhr},  {0x1.04p-1uhr, 0x1.f8p-2uhr},
 };
 
 } // namespace internal
@@ -100,11 +92,13 @@ LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_fixed_point_v<T>, T> sqrt(T x) {
   // For the initial values, we choose x_0
 
   // Use the leading 4 bits to do look up for sqrt(x).
+  // After normalization, 0.25 <= x_frac < 1, so the leading 4 bits of x_frac
+  // are between 0b0100 and 0b1111.  Hence the lookup table only needs 12
+  // entries, and we can get the index by subtracting the leading 4 bits of
+  // x_frac by 4 = 0b0100.
   int index = (x_bit >> (STORAGE_LENGTH - 4)) - 4;
-  FracType a = static_cast<FracType>(cpp::bit_cast<unsigned short fract>(
-      internal::SQRT_FIRST_APPROX[index][0]));
-  FracType b = static_cast<FracType>(cpp::bit_cast<unsigned short fract>(
-      internal::SQRT_FIRST_APPROX[index][1]));
+  FracType a = static_cast<FracType>(internal::SQRT_FIRST_APPROX[index][0]);
+  FracType b = static_cast<FracType>(internal::SQRT_FIRST_APPROX[index][1]);
 
   // Initial approximation step.
   // Estimated error bounds: | r - sqrt(x_frac) | < max(1.5 * 2^-11, eps).
@@ -118,15 +112,11 @@ LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_fixed_point_v<T>, T> sqrt(T x) {
   //   Blanchard, J. D. and Chamberland, M., "Newton's Method Without Division",
   //   The American Mathematical Monthly (2023).
   //   https://chamberland.math.grinnell.edu/papers/newton.pdf
-  for (int i = 0; i < internal::SqrtConfig<T>::EXTRA_STEPS; ++i) {
+  for (int i = 0; i < internal::SqrtConfig<T>::EXTRA_STEPS; ++i)
     r = (r >> 1) + (x_frac >> 1) / r;
-  }
-
-  int r_exp = (x_exp >> 1);
-  int shift_back = EXP_ADJUSTMENT - r_exp;
 
   // Re-scaling
-  r >>= shift_back;
+  r >>= EXP_ADJUSTMENT - (x_exp >> 1);
 
   // Return result.
   return cpp::bit_cast<T>(r);