-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[libc][math] Add C23 ldexpf128 math function and fix DyadicFloat conversions for subnormal ranges and 80-bit floating points. #81780
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…ersions for subnormal ranges and 80-bit floating points.
@llvm/pr-subscribers-libc Author: None (lntue) ChangesPatch is 23.86 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/81780.diff 20 Files Affected:
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index bc09f488122865..6e194682df4bfc 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -387,6 +387,7 @@ if(LIBC_COMPILER_HAS_FLOAT128)
libc.src.math.fmaxf128
libc.src.math.fminf128
libc.src.math.frexpf128
+ libc.src.math.ldexpf128
libc.src.math.roundf128
libc.src.math.sqrtf128
libc.src.math.truncf128
diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt
index 02412e7549a3d5..71ff4bcfc35195 100644
--- a/libc/config/linux/riscv/entrypoints.txt
+++ b/libc/config/linux/riscv/entrypoints.txt
@@ -396,6 +396,7 @@ if(LIBC_COMPILER_HAS_FLOAT128)
libc.src.math.fmaxf128
libc.src.math.fminf128
libc.src.math.frexpf128
+ libc.src.math.ldexpf128
libc.src.math.roundf128
libc.src.math.sqrtf128
libc.src.math.truncf128
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 57b4a1e0f93d4f..33f6e97af0e183 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -435,6 +435,7 @@ if(LIBC_COMPILER_HAS_FLOAT128)
libc.src.math.fmaxf128
libc.src.math.fminf128
libc.src.math.frexpf128
+ libc.src.math.ldexpf128
libc.src.math.roundf128
libc.src.math.sqrtf128
libc.src.math.truncf128
diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst
index bd2af656d9eecd..c586fe6664e27f 100644
--- a/libc/docs/math/index.rst
+++ b/libc/docs/math/index.rst
@@ -191,6 +191,8 @@ Basic Operations
+--------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
| ldexpl | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | |
+--------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
+| ldexpf128 | |check| | |check| | | |check| | | | | | | | | |
++--------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
| llrint | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | |
+--------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
| llrintf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | |
diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td
index 9ed94638f522ca..79487cb697f320 100644
--- a/libc/spec/stdc.td
+++ b/libc/spec/stdc.td
@@ -413,6 +413,7 @@ def StdC : StandardSpec<"stdc"> {
FunctionSpec<"ldexp", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<IntType>]>,
FunctionSpec<"ldexpf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<IntType>]>,
FunctionSpec<"ldexpl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<IntType>]>,
+ GuardedFunctionSpec<"ldexpf128", RetValSpec<Float128Type>, [ArgSpec<Float128Type>, ArgSpec<IntType>], "LIBC_COMPILER_HAS_FLOAT128">,
FunctionSpec<"log10", RetValSpec<DoubleType>, [ArgSpec<DoubleType>]>,
FunctionSpec<"log10f", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
diff --git a/libc/src/__support/FPUtil/CMakeLists.txt b/libc/src/__support/FPUtil/CMakeLists.txt
index 3307d33434f0b3..0c932e8ffcd550 100644
--- a/libc/src/__support/FPUtil/CMakeLists.txt
+++ b/libc/src/__support/FPUtil/CMakeLists.txt
@@ -75,24 +75,6 @@ add_header_library(
libc.src.__support.common
)
-add_header_library(
- manipulation_functions
- HDRS
- ManipulationFunctions.h
- DEPENDS
- .fenv_impl
- .fp_bits
- .nearest_integer_operations
- .normal_float
- libc.src.__support.CPP.bit
- libc.src.__support.CPP.limits
- libc.src.__support.CPP.type_traits
- libc.src.__support.common
- libc.src.__support.macros.optimization
- libc.include.math
- libc.src.errno.errno
-)
-
add_header_library(
basic_operations
HDRS
@@ -221,4 +203,23 @@ add_header_library(
libc.src.__support.macros.optimization
)
+add_header_library(
+ manipulation_functions
+ HDRS
+ ManipulationFunctions.h
+ DEPENDS
+ .fenv_impl
+ .fp_bits
+ .dyadic_float
+ .nearest_integer_operations
+ .normal_float
+ libc.src.__support.CPP.bit
+ libc.src.__support.CPP.limits
+ libc.src.__support.CPP.type_traits
+ libc.src.__support.common
+ libc.src.__support.macros.optimization
+ libc.include.math
+ libc.src.errno.errno
+)
+
add_subdirectory(generic)
diff --git a/libc/src/__support/FPUtil/FPBits.h b/libc/src/__support/FPUtil/FPBits.h
index 6665c90845683b..92ffe0c5a1e749 100644
--- a/libc/src/__support/FPUtil/FPBits.h
+++ b/libc/src/__support/FPUtil/FPBits.h
@@ -633,13 +633,13 @@ struct FPRepImpl : public FPRepSem<fp_type, RetT> {
using typename UP::Significand;
using UP::FP_MASK;
- using UP::SIG_LEN;
public:
// Constants.
using UP::EXP_BIAS;
using UP::EXP_MASK;
using UP::FRACTION_MASK;
+ using UP::SIG_LEN;
using UP::SIGN_MASK;
LIBC_INLINE_VAR static constexpr int MAX_BIASED_EXPONENT =
(1 << UP::EXP_LEN) - 1;
@@ -735,8 +735,8 @@ struct FPRepImpl : public FPRepSem<fp_type, RetT> {
// FIXME: Use an uint32_t for 'biased_exp'.
LIBC_INLINE static constexpr RetT
create_value(Sign sign, StorageType biased_exp, StorageType mantissa) {
- static_assert(fp_type != FPType::X86_Binary80,
- "This function is not tested for X86 Extended Precision");
+ // static_assert(fp_type != FPType::X86_Binary80,
+ // "This function is not tested for X86 Extended Precision");
return RetT(encode(sign, BiasedExponent(static_cast<uint32_t>(biased_exp)),
Significand(mantissa)));
}
diff --git a/libc/src/__support/FPUtil/ManipulationFunctions.h b/libc/src/__support/FPUtil/ManipulationFunctions.h
index 9becbaa45eadeb..9e760a28f42d75 100644
--- a/libc/src/__support/FPUtil/ManipulationFunctions.h
+++ b/libc/src/__support/FPUtil/ManipulationFunctions.h
@@ -12,6 +12,8 @@
#include "FPBits.h"
#include "NearestIntegerOperations.h"
#include "NormalFloat.h"
+#include "dyadic_float.h"
+#include "rounding_mode.h"
#include "src/__support/CPP/bit.h"
#include "src/__support/CPP/limits.h" // INT_MAX, INT_MIN
@@ -117,10 +119,8 @@ LIBC_INLINE T logb(T x) {
template <typename T, cpp::enable_if_t<cpp::is_floating_point_v<T>, int> = 0>
LIBC_INLINE T ldexp(T x, int exp) {
- if (LIBC_UNLIKELY(exp == 0))
- return x;
FPBits<T> bits(x);
- if (LIBC_UNLIKELY(bits.is_zero() || bits.is_inf_or_nan()))
+ if (LIBC_UNLIKELY((exp == 0) || bits.is_zero() || bits.is_inf_or_nan()))
return x;
// NormalFloat uses int32_t to store the true exponent value. We should ensure
@@ -129,18 +129,40 @@ LIBC_INLINE T ldexp(T x, int exp) {
// early. Because the result of the ldexp operation can be a subnormal number,
// we need to accommodate the (mantissaWidth + 1) worth of shift in
// calculating the limit.
- int exp_limit = FPBits<T>::MAX_BIASED_EXPONENT + FPBits<T>::FRACTION_LEN + 1;
- if (exp > exp_limit)
- return FPBits<T>::inf(bits.sign()).get_val();
+ constexpr int EXP_LIMIT =
+ FPBits<T>::MAX_BIASED_EXPONENT + FPBits<T>::FRACTION_LEN + 1;
+ if (LIBC_UNLIKELY(exp > EXP_LIMIT)) {
+ int rounding_mode = quick_get_round();
+ Sign sign = bits.sign();
+
+ if ((sign == Sign::POS && rounding_mode == FE_DOWNWARD) ||
+ (sign == Sign::NEG && rounding_mode == FE_UPWARD) ||
+ (rounding_mode == FE_TOWARDZERO))
+ return FPBits<T>::max_normal(sign).get_val();
+
+ set_errno_if_required(ERANGE);
+ raise_except_if_required(FE_OVERFLOW);
+ return FPBits<T>::inf(sign).get_val();
+ }
// Similarly on the negative side we return zero early if |exp| is too small.
- if (exp < -exp_limit)
- return FPBits<T>::zero(bits.sign()).get_val();
+ if (LIBC_UNLIKELY(exp < -EXP_LIMIT)) {
+ int rounding_mode = quick_get_round();
+ Sign sign = bits.sign();
+
+ if ((sign == Sign::POS && rounding_mode == FE_UPWARD) ||
+ (sign == Sign::NEG && rounding_mode == FE_DOWNWARD))
+ return FPBits<T>::min_subnormal(sign).get_val();
+
+ set_errno_if_required(ERANGE);
+ raise_except_if_required(FE_UNDERFLOW);
+ return FPBits<T>::zero(sign).get_val();
+ }
// For all other values, NormalFloat to T conversion handles it the right way.
- NormalFloat<T> normal(bits);
+ DyadicFloat<FPBits<T>::STORAGE_LEN> normal(bits.get_val());
normal.exponent += exp;
- return normal;
+ return static_cast<T>(normal);
}
template <typename T, typename U,
diff --git a/libc/src/__support/FPUtil/dyadic_float.h b/libc/src/__support/FPUtil/dyadic_float.h
index a8b3ad7a16d3bb..382904cf13bddb 100644
--- a/libc/src/__support/FPUtil/dyadic_float.h
+++ b/libc/src/__support/FPUtil/dyadic_float.h
@@ -44,7 +44,7 @@ template <size_t Bits> struct DyadicFloat {
static_assert(FPBits<T>::FRACTION_LEN < Bits);
FPBits<T> x_bits(x);
sign = x_bits.sign();
- exponent = x_bits.get_exponent() - FPBits<T>::FRACTION_LEN;
+ exponent = x_bits.get_explicit_exponent() - FPBits<T>::FRACTION_LEN;
mantissa = MantissaType(x_bits.get_explicit_mantissa());
normalize();
}
@@ -79,25 +79,32 @@ template <size_t Bits> struct DyadicFloat {
return *this;
}
- // Assume that it is already normalized and output is not underflow.
+ // Assume that it is already normalized.
// Output is rounded correctly with respect to the current rounding mode.
- // TODO(lntue): Add support for underflow.
- // TODO(lntue): Test or add specialization for x86 long double.
template <typename T,
typename = cpp::enable_if_t<cpp::is_floating_point_v<T> &&
(FPBits<T>::FRACTION_LEN < Bits),
void>>
explicit operator T() const {
- // TODO(lntue): Do we need to treat signed zeros properly?
- if (mantissa.is_zero())
- return 0.0;
+ if (LIBC_UNLIKELY(mantissa.is_zero()))
+ return FPBits<T>::zero(sign).get_val();
// Assume that it is normalized, and output is also normal.
constexpr uint32_t PRECISION = FPBits<T>::FRACTION_LEN + 1;
using output_bits_t = typename FPBits<T>::StorageType;
+ constexpr output_bits_t IMPLICIT_MASK =
+ FPBits<T>::SIG_MASK - FPBits<T>::FRACTION_MASK;
int exp_hi = exponent + static_cast<int>((Bits - 1) + FPBits<T>::EXP_BIAS);
+ if (LIBC_UNLIKELY(exp_hi > 2 * FPBits<T>::EXP_BIAS)) {
+ // Results overflow.
+ T d_hi =
+ FPBits<T>::create_value(sign, 2 * FPBits<T>::EXP_BIAS, IMPLICIT_MASK)
+ .get_val();
+ return T(2) * d_hi;
+ }
+
bool denorm = false;
uint32_t shift = Bits - PRECISION;
if (LIBC_UNLIKELY(exp_hi <= 0)) {
@@ -112,49 +119,57 @@ template <size_t Bits> struct DyadicFloat {
MantissaType m_hi(mantissa >> shift);
- T d_hi = FPBits<T>::create_value(sign, exp_hi,
- static_cast<output_bits_t>(m_hi) &
- FPBits<T>::FRACTION_MASK)
+ T d_hi = FPBits<T>::create_value(
+ sign, exp_hi,
+ (static_cast<output_bits_t>(m_hi) & FPBits<T>::SIG_MASK) |
+ IMPLICIT_MASK)
.get_val();
- const MantissaType round_mask = MantissaType(1) << (shift - 1);
- const MantissaType sticky_mask = round_mask - MantissaType(1);
+ MantissaType round_mask = MantissaType(1) << (shift - 1);
+ MantissaType sticky_mask = round_mask - MantissaType(1);
bool round_bit = !(mantissa & round_mask).is_zero();
bool sticky_bit = !(mantissa & sticky_mask).is_zero();
int round_and_sticky = int(round_bit) * 2 + int(sticky_bit);
T d_lo;
+
if (LIBC_UNLIKELY(exp_lo <= 0)) {
// d_lo is denormal, but the output is normal.
int scale_up_exponent = 2 * PRECISION;
T scale_up_factor =
FPBits<T>::create_value(sign, FPBits<T>::EXP_BIAS + scale_up_exponent,
- output_bits_t(0))
+ IMPLICIT_MASK)
.get_val();
T scale_down_factor =
FPBits<T>::create_value(sign, FPBits<T>::EXP_BIAS - scale_up_exponent,
- output_bits_t(0))
+ IMPLICIT_MASK)
.get_val();
d_lo = FPBits<T>::create_value(sign, exp_lo + scale_up_exponent,
- output_bits_t(0))
+ IMPLICIT_MASK)
.get_val();
return multiply_add(d_lo, T(round_and_sticky), d_hi * scale_up_factor) *
scale_down_factor;
}
- d_lo = FPBits<T>::create_value(sign, exp_lo, output_bits_t(0)).get_val();
+ d_lo = FPBits<T>::create_value(sign, exp_lo, IMPLICIT_MASK).get_val();
// Still correct without FMA instructions if `d_lo` is not underflow.
T r = multiply_add(d_lo, T(round_and_sticky), d_hi);
if (LIBC_UNLIKELY(denorm)) {
- // Output is denormal, simply clear the exponent field.
- output_bits_t clear_exp = output_bits_t(exp_hi)
- << FPBits<T>::FRACTION_LEN;
+ // Exponent before rounding is in denormal range, simply clear the
+ // exponent field.
+ output_bits_t clear_exp = (output_bits_t(exp_hi) << FPBits<T>::SIG_LEN);
output_bits_t r_bits = FPBits<T>(r).uintval() - clear_exp;
+ if (!(r_bits & FPBits<T>::EXP_MASK)) {
+ // Output is denormal after rounding, clear the implicit bit for 80-bit
+ // long double.
+ r_bits -= IMPLICIT_MASK;
+ }
+
return FPBits<T>(r_bits).get_val();
}
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
index 985585cbfb8902..05ce51e8fc6503 100644
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@@ -149,6 +149,7 @@ add_math_entrypoint_object(ilogbl)
add_math_entrypoint_object(ldexp)
add_math_entrypoint_object(ldexpf)
add_math_entrypoint_object(ldexpl)
+add_math_entrypoint_object(ldexpf128)
add_math_entrypoint_object(log10)
add_math_entrypoint_object(log10f)
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index fdf383f070697e..259ae1c2793439 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -1001,10 +1001,10 @@ add_entrypoint_object(
ldexp.cpp
HDRS
../ldexp.h
+ COMPILE_OPTIONS
+ -O3
DEPENDS
libc.src.__support.FPUtil.manipulation_functions
- COMPILE_OPTIONS
- -O2
)
add_entrypoint_object(
@@ -1013,10 +1013,10 @@ add_entrypoint_object(
ldexpf.cpp
HDRS
../ldexpf.h
+ COMPILE_OPTIONS
+ -O3
DEPENDS
libc.src.__support.FPUtil.manipulation_functions
- COMPILE_OPTIONS
- -O2
)
add_entrypoint_object(
@@ -1025,10 +1025,23 @@ add_entrypoint_object(
ldexpl.cpp
HDRS
../ldexpl.h
+ COMPILE_OPTIONS
+ -O3
DEPENDS
libc.src.__support.FPUtil.manipulation_functions
+)
+
+add_entrypoint_object(
+ ldexpf128
+ SRCS
+ ldexpf128.cpp
+ HDRS
+ ../ldexpf128.h
COMPILE_OPTIONS
- -O2
+ -O3
+ DEPENDS
+ libc.src.__support.macros.properties.float
+ libc.src.__support.FPUtil.manipulation_functions
)
add_object_library(
diff --git a/libc/src/math/generic/ldexpf128.cpp b/libc/src/math/generic/ldexpf128.cpp
new file mode 100644
index 00000000000000..ed2ebd38dfae75
--- /dev/null
+++ b/libc/src/math/generic/ldexpf128.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of ldexpf128 function ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/ldexpf128.h"
+#include "src/__support/FPUtil/ManipulationFunctions.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float128, ldexpf128, (float128 x, int exp)) {
+ return fputil::ldexp(x, exp);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/ldexpf128.h b/libc/src/math/ldexpf128.h
new file mode 100644
index 00000000000000..adf9d8f56b3566
--- /dev/null
+++ b/libc/src/math/ldexpf128.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for ldexpf128 ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_LDEXPF128_H
+#define LLVM_LIBC_SRC_MATH_LDEXPF128_H
+
+#include "src/__support/macros/properties/float.h"
+
+namespace LIBC_NAMESPACE {
+
+float128 ldexpf128(float128 x, int exp);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_LDEXPF128_H
diff --git a/libc/test/src/__support/FPUtil/dyadic_float_test.cpp b/libc/test/src/__support/FPUtil/dyadic_float_test.cpp
index a9f9842c503057..625aa70973b9f1 100644
--- a/libc/test/src/__support/FPUtil/dyadic_float_test.cpp
+++ b/libc/test/src/__support/FPUtil/dyadic_float_test.cpp
@@ -56,3 +56,37 @@ TEST(LlvmLibcDyadicFloatTest, QuickMul) {
Float256 z = quick_mul(x, y);
EXPECT_FP_EQ_ALL_ROUNDING(double(x) * double(y), double(z));
}
+
+#define TEST_EDGE_RANGES(Name, Type) \
+ TEST(LlvmLibcDyadicFloatTest, EdgeRanges##Name) { \
+ using Bits = LIBC_NAMESPACE::fputil::FPBits<Type>; \
+ using DFType = LIBC_NAMESPACE::fputil::DyadicFloat<Bits::STORAGE_LEN>; \
+ Type max_normal = Bits::max_normal().get_val(); \
+ Type min_normal = Bits::min_normal().get_val(); \
+ Type min_subnormal = Bits::min_subnormal().get_val(); \
+ Type two(2); \
+ \
+ DFType x(min_normal); \
+ EXPECT_FP_EQ_ALL_ROUNDING(min_normal, static_cast<Type>(x)); \
+ --x.exponent; \
+ EXPECT_FP_EQ(min_normal / two, static_cast<Type>(x)); \
+ \
+ DFType y(two *min_normal - min_subnormal); \
+ --y.exponent; \
+ EXPECT_FP_EQ(min_normal, static_cast<Type>(y)); \
+ \
+ DFType z(min_subnormal); \
+ EXPECT_FP_EQ_ALL_ROUNDING(min_subnormal, static_cast<Type>(z)); \
+ --z.exponent; \
+ EXPECT_FP_EQ(Bits::zero().get_val(), static_cast<Type>(z)); \
+ \
+ DFType t(max_normal); \
+ EXPECT_FP_EQ_ALL_ROUNDING(max_normal, static_cast<Type>(t)); \
+ ++t.exponent; \
+ EXPECT_FP_EQ(Bits::inf().get_val(), static_cast<Type>(t)); \
+ } \
+ static_assert(true, "Require semicolon.")
+
+TEST_EDGE_RANGES(Float, float);
+TEST_EDGE_RANGES(Double, double);
+TEST_EDGE_RANGES(LongDouble, long double);
diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLis...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
libc/src/__support/FPUtil/FPBits.h
Outdated
@@ -735,8 +735,8 @@ struct FPRepImpl : public FPRepSem<fp_type, RetT> { | |||
// FIXME: Use an uint32_t for 'biased_exp'. | |||
LIBC_INLINE static constexpr RetT | |||
create_value(Sign sign, StorageType biased_exp, StorageType mantissa) { | |||
static_assert(fp_type != FPType::X86_Binary80, | |||
"This function is not tested for X86 Extended Precision"); | |||
// static_assert(fp_type != FPType::X86_Binary80, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should this still be commented out?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I removed this static_assert, and add comments about X86 extended precision for this function.
No description provided.