Skip to content

Commit ff409d3

Browse files
authored
[libc][math] Add C23 ldexpf128 math function and fix DyadicFloat conversions for subnormal ranges and 80-bit floating points. (#81780)
1 parent 8ce1448 commit ff409d3

File tree

20 files changed

+222
-63
lines changed

20 files changed

+222
-63
lines changed

libc/config/linux/aarch64/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,7 @@ if(LIBC_COMPILER_HAS_FLOAT128)
387387
libc.src.math.fmaxf128
388388
libc.src.math.fminf128
389389
libc.src.math.frexpf128
390+
libc.src.math.ldexpf128
390391
libc.src.math.roundf128
391392
libc.src.math.sqrtf128
392393
libc.src.math.truncf128

libc/config/linux/riscv/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,7 @@ if(LIBC_COMPILER_HAS_FLOAT128)
396396
libc.src.math.fmaxf128
397397
libc.src.math.fminf128
398398
libc.src.math.frexpf128
399+
libc.src.math.ldexpf128
399400
libc.src.math.roundf128
400401
libc.src.math.sqrtf128
401402
libc.src.math.truncf128

libc/config/linux/x86_64/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,7 @@ if(LIBC_COMPILER_HAS_FLOAT128)
435435
libc.src.math.fmaxf128
436436
libc.src.math.fminf128
437437
libc.src.math.frexpf128
438+
libc.src.math.ldexpf128
438439
libc.src.math.roundf128
439440
libc.src.math.sqrtf128
440441
libc.src.math.truncf128

libc/docs/math/index.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,8 @@ Basic Operations
191191
+--------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
192192
| ldexpl | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | |
193193
+--------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
194+
| ldexpf128 | |check| | |check| | | |check| | | | | | | | | |
195+
+--------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
194196
| llrint | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | |
195197
+--------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
196198
| llrintf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | |

libc/spec/stdc.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -413,6 +413,7 @@ def StdC : StandardSpec<"stdc"> {
413413
FunctionSpec<"ldexp", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<IntType>]>,
414414
FunctionSpec<"ldexpf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<IntType>]>,
415415
FunctionSpec<"ldexpl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<IntType>]>,
416+
GuardedFunctionSpec<"ldexpf128", RetValSpec<Float128Type>, [ArgSpec<Float128Type>, ArgSpec<IntType>], "LIBC_COMPILER_HAS_FLOAT128">,
416417

417418
FunctionSpec<"log10", RetValSpec<DoubleType>, [ArgSpec<DoubleType>]>,
418419
FunctionSpec<"log10f", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,

libc/src/__support/FPUtil/CMakeLists.txt

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -75,24 +75,6 @@ add_header_library(
7575
libc.src.__support.common
7676
)
7777

78-
add_header_library(
79-
manipulation_functions
80-
HDRS
81-
ManipulationFunctions.h
82-
DEPENDS
83-
.fenv_impl
84-
.fp_bits
85-
.nearest_integer_operations
86-
.normal_float
87-
libc.src.__support.CPP.bit
88-
libc.src.__support.CPP.limits
89-
libc.src.__support.CPP.type_traits
90-
libc.src.__support.common
91-
libc.src.__support.macros.optimization
92-
libc.include.math
93-
libc.src.errno.errno
94-
)
95-
9678
add_header_library(
9779
basic_operations
9880
HDRS
@@ -221,4 +203,23 @@ add_header_library(
221203
libc.src.__support.macros.optimization
222204
)
223205

206+
add_header_library(
207+
manipulation_functions
208+
HDRS
209+
ManipulationFunctions.h
210+
DEPENDS
211+
.fenv_impl
212+
.fp_bits
213+
.dyadic_float
214+
.nearest_integer_operations
215+
.normal_float
216+
libc.src.__support.CPP.bit
217+
libc.src.__support.CPP.limits
218+
libc.src.__support.CPP.type_traits
219+
libc.src.__support.common
220+
libc.src.__support.macros.optimization
221+
libc.include.math
222+
libc.src.errno.errno
223+
)
224+
224225
add_subdirectory(generic)

libc/src/__support/FPUtil/FPBits.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -633,13 +633,13 @@ struct FPRepImpl : public FPRepSem<fp_type, RetT> {
633633
using typename UP::Significand;
634634

635635
using UP::FP_MASK;
636-
using UP::SIG_LEN;
637636

638637
public:
639638
// Constants.
640639
using UP::EXP_BIAS;
641640
using UP::EXP_MASK;
642641
using UP::FRACTION_MASK;
642+
using UP::SIG_LEN;
643643
using UP::SIGN_MASK;
644644
LIBC_INLINE_VAR static constexpr int MAX_BIASED_EXPONENT =
645645
(1 << UP::EXP_LEN) - 1;
@@ -732,11 +732,14 @@ struct FPRepImpl : public FPRepSem<fp_type, RetT> {
732732
// Unsafe function to create a floating point representation.
733733
// It simply packs the sign, biased exponent and mantissa values without
734734
// checking bound nor normalization.
735+
//
736+
// WARNING: For X86 Extended Precision, implicit bit needs to be set correctly
737+
// in the 'mantissa' by the caller. This function will not check for its
738+
// validity.
739+
//
735740
// FIXME: Use an uint32_t for 'biased_exp'.
736741
LIBC_INLINE static constexpr RetT
737742
create_value(Sign sign, StorageType biased_exp, StorageType mantissa) {
738-
static_assert(fp_type != FPType::X86_Binary80,
739-
"This function is not tested for X86 Extended Precision");
740743
return RetT(encode(sign, BiasedExponent(static_cast<uint32_t>(biased_exp)),
741744
Significand(mantissa)));
742745
}

libc/src/__support/FPUtil/ManipulationFunctions.h

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
#include "FPBits.h"
1313
#include "NearestIntegerOperations.h"
1414
#include "NormalFloat.h"
15+
#include "dyadic_float.h"
16+
#include "rounding_mode.h"
1517

1618
#include "src/__support/CPP/bit.h"
1719
#include "src/__support/CPP/limits.h" // INT_MAX, INT_MIN
@@ -117,10 +119,8 @@ LIBC_INLINE T logb(T x) {
117119

118120
template <typename T, cpp::enable_if_t<cpp::is_floating_point_v<T>, int> = 0>
119121
LIBC_INLINE T ldexp(T x, int exp) {
120-
if (LIBC_UNLIKELY(exp == 0))
121-
return x;
122122
FPBits<T> bits(x);
123-
if (LIBC_UNLIKELY(bits.is_zero() || bits.is_inf_or_nan()))
123+
if (LIBC_UNLIKELY((exp == 0) || bits.is_zero() || bits.is_inf_or_nan()))
124124
return x;
125125

126126
// NormalFloat uses int32_t to store the true exponent value. We should ensure
@@ -129,18 +129,40 @@ LIBC_INLINE T ldexp(T x, int exp) {
129129
// early. Because the result of the ldexp operation can be a subnormal number,
130130
// we need to accommodate the (mantissaWidth + 1) worth of shift in
131131
// calculating the limit.
132-
int exp_limit = FPBits<T>::MAX_BIASED_EXPONENT + FPBits<T>::FRACTION_LEN + 1;
133-
if (exp > exp_limit)
134-
return FPBits<T>::inf(bits.sign()).get_val();
132+
constexpr int EXP_LIMIT =
133+
FPBits<T>::MAX_BIASED_EXPONENT + FPBits<T>::FRACTION_LEN + 1;
134+
if (LIBC_UNLIKELY(exp > EXP_LIMIT)) {
135+
int rounding_mode = quick_get_round();
136+
Sign sign = bits.sign();
137+
138+
if ((sign == Sign::POS && rounding_mode == FE_DOWNWARD) ||
139+
(sign == Sign::NEG && rounding_mode == FE_UPWARD) ||
140+
(rounding_mode == FE_TOWARDZERO))
141+
return FPBits<T>::max_normal(sign).get_val();
142+
143+
set_errno_if_required(ERANGE);
144+
raise_except_if_required(FE_OVERFLOW);
145+
return FPBits<T>::inf(sign).get_val();
146+
}
135147

136148
// Similarly on the negative side we return zero early if |exp| is too small.
137-
if (exp < -exp_limit)
138-
return FPBits<T>::zero(bits.sign()).get_val();
149+
if (LIBC_UNLIKELY(exp < -EXP_LIMIT)) {
150+
int rounding_mode = quick_get_round();
151+
Sign sign = bits.sign();
152+
153+
if ((sign == Sign::POS && rounding_mode == FE_UPWARD) ||
154+
(sign == Sign::NEG && rounding_mode == FE_DOWNWARD))
155+
return FPBits<T>::min_subnormal(sign).get_val();
156+
157+
set_errno_if_required(ERANGE);
158+
raise_except_if_required(FE_UNDERFLOW);
159+
return FPBits<T>::zero(sign).get_val();
160+
}
139161

140162
// For all other values, NormalFloat to T conversion handles it the right way.
141-
NormalFloat<T> normal(bits);
163+
DyadicFloat<FPBits<T>::STORAGE_LEN> normal(bits.get_val());
142164
normal.exponent += exp;
143-
return normal;
165+
return static_cast<T>(normal);
144166
}
145167

146168
template <typename T, typename U,

libc/src/__support/FPUtil/dyadic_float.h

Lines changed: 34 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ template <size_t Bits> struct DyadicFloat {
4444
static_assert(FPBits<T>::FRACTION_LEN < Bits);
4545
FPBits<T> x_bits(x);
4646
sign = x_bits.sign();
47-
exponent = x_bits.get_exponent() - FPBits<T>::FRACTION_LEN;
47+
exponent = x_bits.get_explicit_exponent() - FPBits<T>::FRACTION_LEN;
4848
mantissa = MantissaType(x_bits.get_explicit_mantissa());
4949
normalize();
5050
}
@@ -79,25 +79,32 @@ template <size_t Bits> struct DyadicFloat {
7979
return *this;
8080
}
8181

82-
// Assume that it is already normalized and output is not underflow.
82+
// Assume that it is already normalized.
8383
// Output is rounded correctly with respect to the current rounding mode.
84-
// TODO(lntue): Add support for underflow.
85-
// TODO(lntue): Test or add specialization for x86 long double.
8684
template <typename T,
8785
typename = cpp::enable_if_t<cpp::is_floating_point_v<T> &&
8886
(FPBits<T>::FRACTION_LEN < Bits),
8987
void>>
9088
explicit operator T() const {
91-
// TODO(lntue): Do we need to treat signed zeros properly?
92-
if (mantissa.is_zero())
93-
return 0.0;
89+
if (LIBC_UNLIKELY(mantissa.is_zero()))
90+
return FPBits<T>::zero(sign).get_val();
9491

9592
// Assume that it is normalized, and output is also normal.
9693
constexpr uint32_t PRECISION = FPBits<T>::FRACTION_LEN + 1;
9794
using output_bits_t = typename FPBits<T>::StorageType;
95+
constexpr output_bits_t IMPLICIT_MASK =
96+
FPBits<T>::SIG_MASK - FPBits<T>::FRACTION_MASK;
9897

9998
int exp_hi = exponent + static_cast<int>((Bits - 1) + FPBits<T>::EXP_BIAS);
10099

100+
if (LIBC_UNLIKELY(exp_hi > 2 * FPBits<T>::EXP_BIAS)) {
101+
// Results overflow.
102+
T d_hi =
103+
FPBits<T>::create_value(sign, 2 * FPBits<T>::EXP_BIAS, IMPLICIT_MASK)
104+
.get_val();
105+
return T(2) * d_hi;
106+
}
107+
101108
bool denorm = false;
102109
uint32_t shift = Bits - PRECISION;
103110
if (LIBC_UNLIKELY(exp_hi <= 0)) {
@@ -112,49 +119,57 @@ template <size_t Bits> struct DyadicFloat {
112119

113120
MantissaType m_hi(mantissa >> shift);
114121

115-
T d_hi = FPBits<T>::create_value(sign, exp_hi,
116-
static_cast<output_bits_t>(m_hi) &
117-
FPBits<T>::FRACTION_MASK)
122+
T d_hi = FPBits<T>::create_value(
123+
sign, exp_hi,
124+
(static_cast<output_bits_t>(m_hi) & FPBits<T>::SIG_MASK) |
125+
IMPLICIT_MASK)
118126
.get_val();
119127

120-
const MantissaType round_mask = MantissaType(1) << (shift - 1);
121-
const MantissaType sticky_mask = round_mask - MantissaType(1);
128+
MantissaType round_mask = MantissaType(1) << (shift - 1);
129+
MantissaType sticky_mask = round_mask - MantissaType(1);
122130

123131
bool round_bit = !(mantissa & round_mask).is_zero();
124132
bool sticky_bit = !(mantissa & sticky_mask).is_zero();
125133
int round_and_sticky = int(round_bit) * 2 + int(sticky_bit);
126134

127135
T d_lo;
136+
128137
if (LIBC_UNLIKELY(exp_lo <= 0)) {
129138
// d_lo is denormal, but the output is normal.
130139
int scale_up_exponent = 2 * PRECISION;
131140
T scale_up_factor =
132141
FPBits<T>::create_value(sign, FPBits<T>::EXP_BIAS + scale_up_exponent,
133-
output_bits_t(0))
142+
IMPLICIT_MASK)
134143
.get_val();
135144
T scale_down_factor =
136145
FPBits<T>::create_value(sign, FPBits<T>::EXP_BIAS - scale_up_exponent,
137-
output_bits_t(0))
146+
IMPLICIT_MASK)
138147
.get_val();
139148

140149
d_lo = FPBits<T>::create_value(sign, exp_lo + scale_up_exponent,
141-
output_bits_t(0))
150+
IMPLICIT_MASK)
142151
.get_val();
143152

144153
return multiply_add(d_lo, T(round_and_sticky), d_hi * scale_up_factor) *
145154
scale_down_factor;
146155
}
147156

148-
d_lo = FPBits<T>::create_value(sign, exp_lo, output_bits_t(0)).get_val();
157+
d_lo = FPBits<T>::create_value(sign, exp_lo, IMPLICIT_MASK).get_val();
149158

150159
// Still correct without FMA instructions if `d_lo` is not underflow.
151160
T r = multiply_add(d_lo, T(round_and_sticky), d_hi);
152161

153162
if (LIBC_UNLIKELY(denorm)) {
154-
// Output is denormal, simply clear the exponent field.
155-
output_bits_t clear_exp = output_bits_t(exp_hi)
156-
<< FPBits<T>::FRACTION_LEN;
163+
// Exponent before rounding is in denormal range, simply clear the
164+
// exponent field.
165+
output_bits_t clear_exp = (output_bits_t(exp_hi) << FPBits<T>::SIG_LEN);
157166
output_bits_t r_bits = FPBits<T>(r).uintval() - clear_exp;
167+
if (!(r_bits & FPBits<T>::EXP_MASK)) {
168+
// Output is denormal after rounding, clear the implicit bit for 80-bit
169+
// long double.
170+
r_bits -= IMPLICIT_MASK;
171+
}
172+
158173
return FPBits<T>(r_bits).get_val();
159174
}
160175

libc/src/math/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,7 @@ add_math_entrypoint_object(ilogbl)
149149
add_math_entrypoint_object(ldexp)
150150
add_math_entrypoint_object(ldexpf)
151151
add_math_entrypoint_object(ldexpl)
152+
add_math_entrypoint_object(ldexpf128)
152153

153154
add_math_entrypoint_object(log10)
154155
add_math_entrypoint_object(log10f)

libc/src/math/generic/CMakeLists.txt

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1001,10 +1001,10 @@ add_entrypoint_object(
10011001
ldexp.cpp
10021002
HDRS
10031003
../ldexp.h
1004+
COMPILE_OPTIONS
1005+
-O3
10041006
DEPENDS
10051007
libc.src.__support.FPUtil.manipulation_functions
1006-
COMPILE_OPTIONS
1007-
-O2
10081008
)
10091009

10101010
add_entrypoint_object(
@@ -1013,10 +1013,10 @@ add_entrypoint_object(
10131013
ldexpf.cpp
10141014
HDRS
10151015
../ldexpf.h
1016+
COMPILE_OPTIONS
1017+
-O3
10161018
DEPENDS
10171019
libc.src.__support.FPUtil.manipulation_functions
1018-
COMPILE_OPTIONS
1019-
-O2
10201020
)
10211021

10221022
add_entrypoint_object(
@@ -1025,10 +1025,23 @@ add_entrypoint_object(
10251025
ldexpl.cpp
10261026
HDRS
10271027
../ldexpl.h
1028+
COMPILE_OPTIONS
1029+
-O3
10281030
DEPENDS
10291031
libc.src.__support.FPUtil.manipulation_functions
1032+
)
1033+
1034+
add_entrypoint_object(
1035+
ldexpf128
1036+
SRCS
1037+
ldexpf128.cpp
1038+
HDRS
1039+
../ldexpf128.h
10301040
COMPILE_OPTIONS
1031-
-O2
1041+
-O3
1042+
DEPENDS
1043+
libc.src.__support.macros.properties.float
1044+
libc.src.__support.FPUtil.manipulation_functions
10321045
)
10331046

10341047
add_object_library(

libc/src/math/generic/ldexpf128.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
//===-- Implementation of ldexpf128 function ------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "src/math/ldexpf128.h"
10+
#include "src/__support/FPUtil/ManipulationFunctions.h"
11+
#include "src/__support/common.h"
12+
13+
namespace LIBC_NAMESPACE {
14+
15+
LLVM_LIBC_FUNCTION(float128, ldexpf128, (float128 x, int exp)) {
16+
return fputil::ldexp(x, exp);
17+
}
18+
19+
} // namespace LIBC_NAMESPACE

0 commit comments

Comments
 (0)