llvm · lntue · Jun 30, 2024 · Jun 25, 2024 · Jun 25, 2024 · Jun 25, 2024
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
@@ -510,6 +510,7 @@ if(LIBC_TYPES_HAS_FLOAT16)
     libc.src.math.f16div
     libc.src.math.f16divf
     libc.src.math.f16fmaf
+    libc.src.math.f16sqrt
     libc.src.math.f16sqrtf
     libc.src.math.fabsf16
     libc.src.math.fdimf16

diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
@@ -543,7 +543,9 @@ if(LIBC_TYPES_HAS_FLOAT16)
     libc.src.math.f16fma
     libc.src.math.f16fmaf
     libc.src.math.f16fmal
+    libc.src.math.f16sqrt
     libc.src.math.f16sqrtf
+    libc.src.math.f16sqrtl
     libc.src.math.fabsf16
     libc.src.math.fdimf16
     libc.src.math.floorf16
@@ -596,8 +598,9 @@ if(LIBC_TYPES_HAS_FLOAT16)
   if(LIBC_TYPES_HAS_FLOAT128)
     list(APPEND TARGET_LIBM_ENTRYPOINTS
       # math.h C23 mixed _Float16 and _Float128 entrypoints
-      libc.src.math.f16fmaf128
       libc.src.math.f16divf128
+      libc.src.math.f16fmaf128
+      libc.src.math.f16sqrtf128
     )
   endif()
 endif()

diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst
@@ -292,7 +292,7 @@ Higher Math Functions
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | fma       | |check|          | |check|         |                        |                      |                        | 7.12.13.1              | F.10.10.1                  |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| f16sqrt   | |check|          |                 |                        | N/A                  |                        | 7.12.14.6              | F.10.11                    |
+| f16sqrt   | |check|\*        | |check|\*       | |check|\*              | N/A                  | |check|                | 7.12.14.6              | F.10.11                    |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | fsqrt     | N/A              |                 |                        | N/A                  |                        | 7.12.14.6              | F.10.11                    |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+

diff --git a/libc/spec/llvm_libc_ext.td b/libc/spec/llvm_libc_ext.td
@@ -60,6 +60,10 @@ def LLVMLibcExt : StandardSpec<"llvm_libc_ext"> {
           GuardedFunctionSpec<"f16div", RetValSpec<Float16Type>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>], "LIBC_TYPES_HAS_FLOAT16">,
           GuardedFunctionSpec<"f16divf", RetValSpec<Float16Type>, [ArgSpec<FloatType>, ArgSpec<FloatType>], "LIBC_TYPES_HAS_FLOAT16">,
           GuardedFunctionSpec<"f16divl", RetValSpec<Float16Type>, [ArgSpec<LongDoubleType>, ArgSpec<LongDoubleType>], "LIBC_TYPES_HAS_FLOAT16">,
+
+          GuardedFunctionSpec<"f16sqrt", RetValSpec<Float16Type>, [ArgSpec<DoubleType>], "LIBC_TYPES_HAS_FLOAT16">,
+          GuardedFunctionSpec<"f16sqrtf", RetValSpec<Float16Type>, [ArgSpec<FloatType>], "LIBC_TYPES_HAS_FLOAT16">,
+          GuardedFunctionSpec<"f16sqrtl", RetValSpec<Float16Type>, [ArgSpec<LongDoubleType>], "LIBC_TYPES_HAS_FLOAT16">,
       ]
   >;
 

diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td
@@ -731,7 +731,7 @@ def StdC : StandardSpec<"stdc"> {
 
           GuardedFunctionSpec<"f16divf128", RetValSpec<Float16Type>, [ArgSpec<Float128Type>, ArgSpec<Float128Type>], "LIBC_TYPES_HAS_FLOAT16_AND_FLOAT128">,
 
-          GuardedFunctionSpec<"f16sqrtf", RetValSpec<Float16Type>, [ArgSpec<FloatType>], "LIBC_TYPES_HAS_FLOAT16">,
+          GuardedFunctionSpec<"f16sqrtf128", RetValSpec<Float16Type>, [ArgSpec<Float128Type>], "LIBC_TYPES_HAS_FLOAT16_AND_FLOAT128">,
       ]
   >;
 

diff --git a/libc/src/__support/FPUtil/generic/CMakeLists.txt b/libc/src/__support/FPUtil/generic/CMakeLists.txt
@@ -8,6 +8,7 @@ add_header_library(
     libc.src.__support.common
     libc.src.__support.CPP.bit
     libc.src.__support.CPP.type_traits
+    libc.src.__support.FPUtil.dyadic_float
     libc.src.__support.FPUtil.fenv_impl
     libc.src.__support.FPUtil.fp_bits
     libc.src.__support.FPUtil.rounding_mode

diff --git a/libc/src/__support/FPUtil/generic/sqrt.h b/libc/src/__support/FPUtil/generic/sqrt.h
@@ -14,7 +14,7 @@
 #include "src/__support/CPP/type_traits.h"
 #include "src/__support/FPUtil/FEnvImpl.h"
 #include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/FPUtil/rounding_mode.h"
+#include "src/__support/FPUtil/dyadic_float.h"
 #include "src/__support/common.h"
 #include "src/__support/uint128.h"
 
@@ -78,16 +78,14 @@ sqrt(InType x) {
     return x86::sqrt(x);
   } else {
     // IEEE floating points formats.
-    using OutFPBits = typename fputil::FPBits<OutType>;
-    using OutStorageType = typename OutFPBits::StorageType;
-    using InFPBits = typename fputil::FPBits<InType>;
+    using OutFPBits = FPBits<OutType>;
+    using InFPBits = FPBits<InType>;
     using InStorageType = typename InFPBits::StorageType;
+    using DyadicFloat =
+        DyadicFloat<cpp::bit_ceil(static_cast<size_t>(InFPBits::STORAGE_LEN))>;
+
     constexpr InStorageType ONE = InStorageType(1) << InFPBits::FRACTION_LEN;
     constexpr auto FLT_NAN = OutFPBits::quiet_nan().get_val();
-    constexpr int EXTRA_FRACTION_LEN =
-        InFPBits::FRACTION_LEN - OutFPBits::FRACTION_LEN;
-    constexpr InStorageType EXTRA_FRACTION_MASK =
-        (InStorageType(1) << EXTRA_FRACTION_LEN) - 1;
 
     InFPBits bits(x);
 
@@ -135,6 +133,7 @@ sqrt(InType x) {
       InStorageType y = ONE;
       InStorageType r = x_mant - ONE;
 
+      // TODO: Reduce iteration count to OutFPBits::FRACTION_LEN + 2 or + 3.
       for (InStorageType current_bit = ONE >> 1; current_bit;
            current_bit >>= 1) {
         r <<= 1;
@@ -146,91 +145,19 @@ sqrt(InType x) {
       }
 
       // We compute one more iteration in order to round correctly.
-      bool lsb = (y & (InStorageType(1) << EXTRA_FRACTION_LEN)) !=
-                 0;    // Least significant bit
-      bool rb = false; // Round bit
       r <<= 2;
-      InStorageType tmp = (y << 2) + 1;
+      y <<= 2;
+      InStorageType tmp = y + 1;
       if (r >= tmp) {
         r -= tmp;
-        rb = true;
-      }
-
-      bool sticky = false;
-
-      if constexpr (EXTRA_FRACTION_LEN > 0) {
-        sticky = rb || (y & EXTRA_FRACTION_MASK) != 0;
-        rb = (y & (InStorageType(1) << (EXTRA_FRACTION_LEN - 1))) != 0;
-      }
-
-      // Remove hidden bit and append the exponent field.
-      x_exp = ((x_exp >> 1) + OutFPBits::EXP_BIAS);
-
-      OutStorageType y_out = static_cast<OutStorageType>(
-          ((y - ONE) >> EXTRA_FRACTION_LEN) |
-          (static_cast<OutStorageType>(x_exp) << OutFPBits::FRACTION_LEN));
-
-      if constexpr (EXTRA_FRACTION_LEN > 0) {
-        if (x_exp >= OutFPBits::MAX_BIASED_EXPONENT) {
-          switch (quick_get_round()) {
-          case FE_TONEAREST:
-          case FE_UPWARD:
-            return OutFPBits::inf().get_val();
-          default:
-            return OutFPBits::max_normal().get_val();
-          }
-        }
-
-        if (x_exp <
-            -OutFPBits::EXP_BIAS - OutFPBits::SIG_LEN + EXTRA_FRACTION_LEN) {
-          switch (quick_get_round()) {
-          case FE_UPWARD:
-            return OutFPBits::min_subnormal().get_val();
-          default:
-            return OutType(0.0);
-          }
-        }
-
-        if (x_exp <= 0) {
-          int underflow_extra_fraction_len = EXTRA_FRACTION_LEN - x_exp + 1;
-          InStorageType underflow_extra_fraction_mask =
-              (InStorageType(1) << underflow_extra_fraction_len) - 1;
-
-          rb = (y & (InStorageType(1) << (underflow_extra_fraction_len - 1))) !=
-               0;
-          OutStorageType subnormal_mant =
-              static_cast<OutStorageType>(y >> underflow_extra_fraction_len);
-          lsb = (subnormal_mant & 1) != 0;
-          sticky = sticky || (y & underflow_extra_fraction_mask) != 0;
-
-          switch (quick_get_round()) {
-          case FE_TONEAREST:
-            if (rb && (lsb || sticky))
-              ++subnormal_mant;
-            break;
-          case FE_UPWARD:
-            if (rb || sticky)
-              ++subnormal_mant;
-            break;
-          }
-
-          return cpp::bit_cast<OutType>(subnormal_mant);
-        }
-      }
-
-      switch (quick_get_round()) {
-      case FE_TONEAREST:
-        // Round to nearest, ties to even
-        if (rb && (lsb || (r != 0)))
-          ++y_out;
-        break;
-      case FE_UPWARD:
-        if (rb || (r != 0) || sticky)
-          ++y_out;
-        break;
+        // Rounding bit.
+        y |= 2;
       }
+      // Sticky bit.
+      y |= static_cast<unsigned int>(r != 0);
 
-      return cpp::bit_cast<OutType>(y_out);
+      DyadicFloat yd(Sign::POS, (x_exp >> 1) - 2 - InFPBits::FRACTION_LEN, y);
+      return yd.template as<OutType, /*ShouldSignalExceptions=*/true>();
     }
   }
 }

diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
@@ -109,7 +109,10 @@ add_math_entrypoint_object(f16fmaf)
 add_math_entrypoint_object(f16fmal)
 add_math_entrypoint_object(f16fmaf128)
 
+add_math_entrypoint_object(f16sqrt)
 add_math_entrypoint_object(f16sqrtf)
+add_math_entrypoint_object(f16sqrtl)
+add_math_entrypoint_object(f16sqrtf128)
 
 add_math_entrypoint_object(fabs)
 add_math_entrypoint_object(fabsf)

diff --git a/libc/src/math/f16sqrt.h b/libc/src/math/f16sqrt.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for f16sqrt -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_F16SQRT_H
+#define LLVM_LIBC_SRC_MATH_F16SQRT_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 f16sqrt(double x);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_F16SQRT_H
diff --git a/libc/src/math/f16sqrtf128.h b/libc/src/math/f16sqrtf128.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for f16sqrtf128 -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_F16SQRTF128_H
+#define LLVM_LIBC_SRC_MATH_F16SQRTF128_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 f16sqrtf128(float128 x);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_F16SQRTF128_H
diff --git a/libc/src/math/f16sqrtl.h b/libc/src/math/f16sqrtl.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for f16sqrtl ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_F16SQRTL_H
+#define LLVM_LIBC_SRC_MATH_F16SQRTL_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 f16sqrtl(long double x);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_F16SQRTL_H
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
@@ -3880,6 +3880,19 @@ add_entrypoint_object(
     -O3
 )
 
+add_entrypoint_object(
+  f16sqrt
+  SRCS
+    f16sqrt.cpp
+  HDRS
+    ../f16sqrt.h
+  DEPENDS
+    libc.src.__support.macros.properties.types
+    libc.src.__support.FPUtil.sqrt
+  COMPILE_OPTIONS
+    -O3
+)
+
 add_entrypoint_object(
   f16sqrtf
   SRCS
@@ -3892,3 +3905,29 @@ add_entrypoint_object(
   COMPILE_OPTIONS
     -O3
 )
+
+add_entrypoint_object(
+  f16sqrtl
+  SRCS
+    f16sqrtl.cpp
+  HDRS
+    ../f16sqrtl.h
+  DEPENDS
+    libc.src.__support.macros.properties.types
+    libc.src.__support.FPUtil.sqrt
+  COMPILE_OPTIONS
+    -O3
+)
+
+add_entrypoint_object(
+  f16sqrtf128
+  SRCS
+    f16sqrtf128.cpp
+  HDRS
+    ../f16sqrtf128.h
+  DEPENDS
+    libc.src.__support.macros.properties.types
+    libc.src.__support.FPUtil.sqrt
+  COMPILE_OPTIONS
+    -O3
+)
diff --git a/libc/src/math/generic/f16sqrt.cpp b/libc/src/math/generic/f16sqrt.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of f16sqrt function --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/f16sqrt.h"
+#include "src/__support/FPUtil/sqrt.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float16, f16sqrt, (double x)) {
+  return fputil::sqrt<float16>(x);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/f16sqrtf128.cpp b/libc/src/math/generic/f16sqrtf128.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of f16sqrtf128 function ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/f16sqrtf128.h"
+#include "src/__support/FPUtil/sqrt.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float16, f16sqrtf128, (float128 x)) {
+  return fputil::sqrt<float16>(x);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/f16sqrtl.cpp b/libc/src/math/generic/f16sqrtl.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of f16sqrtl function -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/f16sqrtl.h"
+#include "src/__support/FPUtil/sqrt.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float16, f16sqrtl, (long double x)) {
+  return fputil::sqrt<float16>(x);
+}
+
+} // namespace LIBC_NAMESPACE
-Original file line number
+Diff line change
@@ Expand Up / @@ -731,7 +731,7 @@ def StdC : StandardSpec<"stdc"> { @@
               GuardedFunctionSpec<"f16divf128", RetValSpec<Float16Type>, [ArgSpec<Float128Type>, ArgSpec<Float128Type>], "LIBC_TYPES_HAS_FLOAT16_AND_FLOAT128">,
-              GuardedFunctionSpec<"f16sqrtf", RetValSpec<Float16Type>, [ArgSpec<FloatType>], "LIBC_TYPES_HAS_FLOAT16">,
+              GuardedFunctionSpec<"f16sqrtf128", RetValSpec<Float16Type>, [ArgSpec<Float128Type>], "LIBC_TYPES_HAS_FLOAT16_AND_FLOAT128">,
           ]
       >;
@@ Expand Down @@