Skip to content

[libc][math] Add C23 ldexpf128 math function and fix DyadicFloat conversions for subnormal ranges and 80-bit floating points. #81780

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions libc/config/linux/aarch64/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,7 @@ if(LIBC_COMPILER_HAS_FLOAT128)
libc.src.math.fmaxf128
libc.src.math.fminf128
libc.src.math.frexpf128
libc.src.math.ldexpf128
libc.src.math.roundf128
libc.src.math.sqrtf128
libc.src.math.truncf128
Expand Down
1 change: 1 addition & 0 deletions libc/config/linux/riscv/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,7 @@ if(LIBC_COMPILER_HAS_FLOAT128)
libc.src.math.fmaxf128
libc.src.math.fminf128
libc.src.math.frexpf128
libc.src.math.ldexpf128
libc.src.math.roundf128
libc.src.math.sqrtf128
libc.src.math.truncf128
Expand Down
1 change: 1 addition & 0 deletions libc/config/linux/x86_64/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,7 @@ if(LIBC_COMPILER_HAS_FLOAT128)
libc.src.math.fmaxf128
libc.src.math.fminf128
libc.src.math.frexpf128
libc.src.math.ldexpf128
libc.src.math.roundf128
libc.src.math.sqrtf128
libc.src.math.truncf128
Expand Down
2 changes: 2 additions & 0 deletions libc/docs/math/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,8 @@ Basic Operations
+--------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
| ldexpl | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | |
+--------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
| ldexpf128 | |check| | |check| | | |check| | | | | | | | | |
+--------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
| llrint | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | |
+--------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
| llrintf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | |
Expand Down
1 change: 1 addition & 0 deletions libc/spec/stdc.td
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,7 @@ def StdC : StandardSpec<"stdc"> {
FunctionSpec<"ldexp", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<IntType>]>,
FunctionSpec<"ldexpf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<IntType>]>,
FunctionSpec<"ldexpl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<IntType>]>,
GuardedFunctionSpec<"ldexpf128", RetValSpec<Float128Type>, [ArgSpec<Float128Type>, ArgSpec<IntType>], "LIBC_COMPILER_HAS_FLOAT128">,

FunctionSpec<"log10", RetValSpec<DoubleType>, [ArgSpec<DoubleType>]>,
FunctionSpec<"log10f", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
Expand Down
37 changes: 19 additions & 18 deletions libc/src/__support/FPUtil/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -75,24 +75,6 @@ add_header_library(
libc.src.__support.common
)

add_header_library(
manipulation_functions
HDRS
ManipulationFunctions.h
DEPENDS
.fenv_impl
.fp_bits
.nearest_integer_operations
.normal_float
libc.src.__support.CPP.bit
libc.src.__support.CPP.limits
libc.src.__support.CPP.type_traits
libc.src.__support.common
libc.src.__support.macros.optimization
libc.include.math
libc.src.errno.errno
)

add_header_library(
basic_operations
HDRS
Expand Down Expand Up @@ -221,4 +203,23 @@ add_header_library(
libc.src.__support.macros.optimization
)

add_header_library(
manipulation_functions
HDRS
ManipulationFunctions.h
DEPENDS
.fenv_impl
.fp_bits
.dyadic_float
.nearest_integer_operations
.normal_float
libc.src.__support.CPP.bit
libc.src.__support.CPP.limits
libc.src.__support.CPP.type_traits
libc.src.__support.common
libc.src.__support.macros.optimization
libc.include.math
libc.src.errno.errno
)

add_subdirectory(generic)
9 changes: 6 additions & 3 deletions libc/src/__support/FPUtil/FPBits.h
Original file line number Diff line number Diff line change
Expand Up @@ -633,13 +633,13 @@ struct FPRepImpl : public FPRepSem<fp_type, RetT> {
using typename UP::Significand;

using UP::FP_MASK;
using UP::SIG_LEN;

public:
// Constants.
using UP::EXP_BIAS;
using UP::EXP_MASK;
using UP::FRACTION_MASK;
using UP::SIG_LEN;
using UP::SIGN_MASK;
LIBC_INLINE_VAR static constexpr int MAX_BIASED_EXPONENT =
(1 << UP::EXP_LEN) - 1;
Expand Down Expand Up @@ -732,11 +732,14 @@ struct FPRepImpl : public FPRepSem<fp_type, RetT> {
// Unsafe function to create a floating point representation.
// It simply packs the sign, biased exponent and mantissa values without
// checking bound nor normalization.
//
// WARNING: For X86 Extended Precision, implicit bit needs to be set correctly
// in the 'mantissa' by the caller. This function will not check for its
// validity.
//
// FIXME: Use an uint32_t for 'biased_exp'.
LIBC_INLINE static constexpr RetT
create_value(Sign sign, StorageType biased_exp, StorageType mantissa) {
static_assert(fp_type != FPType::X86_Binary80,
"This function is not tested for X86 Extended Precision");
return RetT(encode(sign, BiasedExponent(static_cast<uint32_t>(biased_exp)),
Significand(mantissa)));
}
Expand Down
42 changes: 32 additions & 10 deletions libc/src/__support/FPUtil/ManipulationFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
#include "FPBits.h"
#include "NearestIntegerOperations.h"
#include "NormalFloat.h"
#include "dyadic_float.h"
#include "rounding_mode.h"

#include "src/__support/CPP/bit.h"
#include "src/__support/CPP/limits.h" // INT_MAX, INT_MIN
Expand Down Expand Up @@ -117,10 +119,8 @@ LIBC_INLINE T logb(T x) {

template <typename T, cpp::enable_if_t<cpp::is_floating_point_v<T>, int> = 0>
LIBC_INLINE T ldexp(T x, int exp) {
if (LIBC_UNLIKELY(exp == 0))
return x;
FPBits<T> bits(x);
if (LIBC_UNLIKELY(bits.is_zero() || bits.is_inf_or_nan()))
if (LIBC_UNLIKELY((exp == 0) || bits.is_zero() || bits.is_inf_or_nan()))
return x;

// NormalFloat uses int32_t to store the true exponent value. We should ensure
Expand All @@ -129,18 +129,40 @@ LIBC_INLINE T ldexp(T x, int exp) {
// early. Because the result of the ldexp operation can be a subnormal number,
// we need to accommodate the (mantissaWidth + 1) worth of shift in
// calculating the limit.
int exp_limit = FPBits<T>::MAX_BIASED_EXPONENT + FPBits<T>::FRACTION_LEN + 1;
if (exp > exp_limit)
return FPBits<T>::inf(bits.sign()).get_val();
constexpr int EXP_LIMIT =
FPBits<T>::MAX_BIASED_EXPONENT + FPBits<T>::FRACTION_LEN + 1;
if (LIBC_UNLIKELY(exp > EXP_LIMIT)) {
int rounding_mode = quick_get_round();
Sign sign = bits.sign();

if ((sign == Sign::POS && rounding_mode == FE_DOWNWARD) ||
(sign == Sign::NEG && rounding_mode == FE_UPWARD) ||
(rounding_mode == FE_TOWARDZERO))
return FPBits<T>::max_normal(sign).get_val();

set_errno_if_required(ERANGE);
raise_except_if_required(FE_OVERFLOW);
return FPBits<T>::inf(sign).get_val();
}

// Similarly on the negative side we return zero early if |exp| is too small.
if (exp < -exp_limit)
return FPBits<T>::zero(bits.sign()).get_val();
if (LIBC_UNLIKELY(exp < -EXP_LIMIT)) {
int rounding_mode = quick_get_round();
Sign sign = bits.sign();

if ((sign == Sign::POS && rounding_mode == FE_UPWARD) ||
(sign == Sign::NEG && rounding_mode == FE_DOWNWARD))
return FPBits<T>::min_subnormal(sign).get_val();

set_errno_if_required(ERANGE);
raise_except_if_required(FE_UNDERFLOW);
return FPBits<T>::zero(sign).get_val();
}

// For all other values, NormalFloat to T conversion handles it the right way.
NormalFloat<T> normal(bits);
DyadicFloat<FPBits<T>::STORAGE_LEN> normal(bits.get_val());
normal.exponent += exp;
return normal;
return static_cast<T>(normal);
}

template <typename T, typename U,
Expand Down
53 changes: 34 additions & 19 deletions libc/src/__support/FPUtil/dyadic_float.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ template <size_t Bits> struct DyadicFloat {
static_assert(FPBits<T>::FRACTION_LEN < Bits);
FPBits<T> x_bits(x);
sign = x_bits.sign();
exponent = x_bits.get_exponent() - FPBits<T>::FRACTION_LEN;
exponent = x_bits.get_explicit_exponent() - FPBits<T>::FRACTION_LEN;
mantissa = MantissaType(x_bits.get_explicit_mantissa());
normalize();
}
Expand Down Expand Up @@ -79,25 +79,32 @@ template <size_t Bits> struct DyadicFloat {
return *this;
}

// Assume that it is already normalized and output is not underflow.
// Assume that it is already normalized.
// Output is rounded correctly with respect to the current rounding mode.
// TODO(lntue): Add support for underflow.
// TODO(lntue): Test or add specialization for x86 long double.
template <typename T,
typename = cpp::enable_if_t<cpp::is_floating_point_v<T> &&
(FPBits<T>::FRACTION_LEN < Bits),
void>>
explicit operator T() const {
// TODO(lntue): Do we need to treat signed zeros properly?
if (mantissa.is_zero())
return 0.0;
if (LIBC_UNLIKELY(mantissa.is_zero()))
return FPBits<T>::zero(sign).get_val();

// Assume that it is normalized, and output is also normal.
constexpr uint32_t PRECISION = FPBits<T>::FRACTION_LEN + 1;
using output_bits_t = typename FPBits<T>::StorageType;
constexpr output_bits_t IMPLICIT_MASK =
FPBits<T>::SIG_MASK - FPBits<T>::FRACTION_MASK;

int exp_hi = exponent + static_cast<int>((Bits - 1) + FPBits<T>::EXP_BIAS);

if (LIBC_UNLIKELY(exp_hi > 2 * FPBits<T>::EXP_BIAS)) {
// Results overflow.
T d_hi =
FPBits<T>::create_value(sign, 2 * FPBits<T>::EXP_BIAS, IMPLICIT_MASK)
.get_val();
return T(2) * d_hi;
}

bool denorm = false;
uint32_t shift = Bits - PRECISION;
if (LIBC_UNLIKELY(exp_hi <= 0)) {
Expand All @@ -112,49 +119,57 @@ template <size_t Bits> struct DyadicFloat {

MantissaType m_hi(mantissa >> shift);

T d_hi = FPBits<T>::create_value(sign, exp_hi,
static_cast<output_bits_t>(m_hi) &
FPBits<T>::FRACTION_MASK)
T d_hi = FPBits<T>::create_value(
sign, exp_hi,
(static_cast<output_bits_t>(m_hi) & FPBits<T>::SIG_MASK) |
IMPLICIT_MASK)
.get_val();

const MantissaType round_mask = MantissaType(1) << (shift - 1);
const MantissaType sticky_mask = round_mask - MantissaType(1);
MantissaType round_mask = MantissaType(1) << (shift - 1);
MantissaType sticky_mask = round_mask - MantissaType(1);

bool round_bit = !(mantissa & round_mask).is_zero();
bool sticky_bit = !(mantissa & sticky_mask).is_zero();
int round_and_sticky = int(round_bit) * 2 + int(sticky_bit);

T d_lo;

if (LIBC_UNLIKELY(exp_lo <= 0)) {
// d_lo is denormal, but the output is normal.
int scale_up_exponent = 2 * PRECISION;
T scale_up_factor =
FPBits<T>::create_value(sign, FPBits<T>::EXP_BIAS + scale_up_exponent,
output_bits_t(0))
IMPLICIT_MASK)
.get_val();
T scale_down_factor =
FPBits<T>::create_value(sign, FPBits<T>::EXP_BIAS - scale_up_exponent,
output_bits_t(0))
IMPLICIT_MASK)
.get_val();

d_lo = FPBits<T>::create_value(sign, exp_lo + scale_up_exponent,
output_bits_t(0))
IMPLICIT_MASK)
.get_val();

return multiply_add(d_lo, T(round_and_sticky), d_hi * scale_up_factor) *
scale_down_factor;
}

d_lo = FPBits<T>::create_value(sign, exp_lo, output_bits_t(0)).get_val();
d_lo = FPBits<T>::create_value(sign, exp_lo, IMPLICIT_MASK).get_val();

// Still correct without FMA instructions if `d_lo` is not underflow.
T r = multiply_add(d_lo, T(round_and_sticky), d_hi);

if (LIBC_UNLIKELY(denorm)) {
// Output is denormal, simply clear the exponent field.
output_bits_t clear_exp = output_bits_t(exp_hi)
<< FPBits<T>::FRACTION_LEN;
// Exponent before rounding is in denormal range, simply clear the
// exponent field.
output_bits_t clear_exp = (output_bits_t(exp_hi) << FPBits<T>::SIG_LEN);
output_bits_t r_bits = FPBits<T>(r).uintval() - clear_exp;
if (!(r_bits & FPBits<T>::EXP_MASK)) {
// Output is denormal after rounding, clear the implicit bit for 80-bit
// long double.
r_bits -= IMPLICIT_MASK;
}

return FPBits<T>(r_bits).get_val();
}

Expand Down
1 change: 1 addition & 0 deletions libc/src/math/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ add_math_entrypoint_object(ilogbl)
add_math_entrypoint_object(ldexp)
add_math_entrypoint_object(ldexpf)
add_math_entrypoint_object(ldexpl)
add_math_entrypoint_object(ldexpf128)

add_math_entrypoint_object(log10)
add_math_entrypoint_object(log10f)
Expand Down
23 changes: 18 additions & 5 deletions libc/src/math/generic/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1001,10 +1001,10 @@ add_entrypoint_object(
ldexp.cpp
HDRS
../ldexp.h
COMPILE_OPTIONS
-O3
DEPENDS
libc.src.__support.FPUtil.manipulation_functions
COMPILE_OPTIONS
-O2
)

add_entrypoint_object(
Expand All @@ -1013,10 +1013,10 @@ add_entrypoint_object(
ldexpf.cpp
HDRS
../ldexpf.h
COMPILE_OPTIONS
-O3
DEPENDS
libc.src.__support.FPUtil.manipulation_functions
COMPILE_OPTIONS
-O2
)

add_entrypoint_object(
Expand All @@ -1025,10 +1025,23 @@ add_entrypoint_object(
ldexpl.cpp
HDRS
../ldexpl.h
COMPILE_OPTIONS
-O3
DEPENDS
libc.src.__support.FPUtil.manipulation_functions
)

add_entrypoint_object(
ldexpf128
SRCS
ldexpf128.cpp
HDRS
../ldexpf128.h
COMPILE_OPTIONS
-O2
-O3
DEPENDS
libc.src.__support.macros.properties.float
libc.src.__support.FPUtil.manipulation_functions
)

add_object_library(
Expand Down
19 changes: 19 additions & 0 deletions libc/src/math/generic/ldexpf128.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
//===-- Implementation of ldexpf128 function ------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/math/ldexpf128.h"
#include "src/__support/FPUtil/ManipulationFunctions.h"
#include "src/__support/common.h"

namespace LIBC_NAMESPACE {

LLVM_LIBC_FUNCTION(float128, ldexpf128, (float128 x, int exp)) {
return fputil::ldexp(x, exp);
}

} // namespace LIBC_NAMESPACE
Loading