Skip to content

Commit c9ee6b1

Browse files
authored
[libc][math] Implement cbrtf function correctly rounded to all rounding modes. (#97936)
Fixes #92874 Algorithm: Let `x = (-1)^s * 2^e * (1 + m)`. - Step 1: Range reduction: reduce the exponent with: ``` y = cbrt(x) = (-1)^s * 2^(floor(e/3)) * 2^((e % 3)/3) * (1 + m)^(1/3) ``` - Step 2: Use the first 4 bit fractional bits of `m` to look up for a degree-7 polynomial approximation to: ``` (1 + m)^(1/3) ~ 1 + m * P(m). ``` - Step 3: Perform the multiplication: ``` 2^((e % 3)/3) * (1 + m)^(1/3). ``` - Step 4: Check for exact cases to prevent rounding and clear `FE_INEXACT` floating point exception. - Step 5: Combine with the exponent and sign before converting down to `float` and return.
1 parent 7e054c3 commit c9ee6b1

File tree

24 files changed

+374
-8
lines changed

24 files changed

+374
-8
lines changed

libc/config/darwin/arm/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ set(TARGET_LIBM_ENTRYPOINTS
123123
libc.src.math.atan2f
124124
libc.src.math.atanf
125125
libc.src.math.atanhf
126+
libc.src.math.cbrtf
126127
libc.src.math.copysign
127128
libc.src.math.copysignf
128129
libc.src.math.copysignl

libc/config/gpu/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,7 @@ set(TARGET_LIBM_ENTRYPOINTS
242242
libc.src.math.atanf
243243
libc.src.math.atanh
244244
libc.src.math.atanhf
245+
libc.src.math.cbrtf
245246
libc.src.math.ceil
246247
libc.src.math.ceilf
247248
libc.src.math.copysign

libc/config/linux/aarch64/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,7 @@ set(TARGET_LIBM_ENTRYPOINTS
345345
libc.src.math.atan2f
346346
libc.src.math.atanf
347347
libc.src.math.atanhf
348+
libc.src.math.cbrtf
348349
libc.src.math.ceil
349350
libc.src.math.ceilf
350351
libc.src.math.ceill

libc/config/linux/arm/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,7 @@ set(TARGET_LIBM_ENTRYPOINTS
216216
libc.src.math.atan2f
217217
libc.src.math.atanf
218218
libc.src.math.atanhf
219+
libc.src.math.cbrtf
219220
libc.src.math.ceil
220221
libc.src.math.ceilf
221222
libc.src.math.ceill

libc/config/linux/riscv/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,7 @@ set(TARGET_LIBM_ENTRYPOINTS
347347
libc.src.math.atan2f
348348
libc.src.math.atanf
349349
libc.src.math.atanhf
350+
libc.src.math.cbrtf
350351
libc.src.math.ceil
351352
libc.src.math.ceilf
352353
libc.src.math.ceill

libc/config/linux/x86_64/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,7 @@ set(TARGET_LIBM_ENTRYPOINTS
370370
libc.src.math.canonicalize
371371
libc.src.math.canonicalizef
372372
libc.src.math.canonicalizel
373+
libc.src.math.cbrtf
373374
libc.src.math.ceil
374375
libc.src.math.ceilf
375376
libc.src.math.ceill

libc/config/windows/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ set(TARGET_LIBM_ENTRYPOINTS
121121
libc.src.math.atan2f
122122
libc.src.math.atanf
123123
libc.src.math.atanhf
124+
libc.src.math.cbrtf
124125
libc.src.math.copysign
125126
libc.src.math.copysignf
126127
libc.src.math.copysignl

libc/docs/math/index.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ Higher Math Functions
266266
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
267267
| atanpi | | | | | | 7.12.4.10 | F.10.1.10 |
268268
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
269-
| cbrt | | | | | | 7.12.7.1 | F.10.4.1 |
269+
| cbrt | |check| | | | | | 7.12.7.1 | F.10.4.1 |
270270
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
271271
| compoundn | | | | | | 7.12.7.2 | F.10.4.2 |
272272
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+

libc/spec/stdc.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,8 @@ def StdC : StandardSpec<"stdc"> {
382382
],
383383
[], // Enumerations
384384
[
385+
FunctionSpec<"cbrtf", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
386+
385387
FunctionSpec<"copysign", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
386388
FunctionSpec<"copysignf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<FloatType>]>,
387389
FunctionSpec<"copysignl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<LongDoubleType>]>,

libc/src/__support/FPUtil/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,8 @@ add_header_library(
155155
multiply_add.h
156156
DEPENDS
157157
libc.src.__support.common
158+
FLAGS
159+
FMA_OPT
158160
)
159161

160162
add_header_library(

libc/src/__support/FPUtil/FEnvImpl.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,12 @@ LIBC_INLINE int set_env(const fenv_t *) { return 0; }
6767

6868
namespace LIBC_NAMESPACE::fputil {
6969

70+
LIBC_INLINE int clear_except_if_required(int excepts) {
71+
if (math_errhandling & MATH_ERREXCEPT)
72+
return clear_except(excepts);
73+
return 0;
74+
}
75+
7076
LIBC_INLINE int set_except_if_required(int excepts) {
7177
if (math_errhandling & MATH_ERREXCEPT)
7278
return set_except(excepts);

libc/src/math/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ add_math_entrypoint_object(canonicalizel)
6565
add_math_entrypoint_object(canonicalizef16)
6666
add_math_entrypoint_object(canonicalizef128)
6767

68+
add_math_entrypoint_object(cbrtf)
69+
6870
add_math_entrypoint_object(ceil)
6971
add_math_entrypoint_object(ceilf)
7072
add_math_entrypoint_object(ceill)

libc/src/math/cbrtf.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
//===-- Implementation header for cbrtf -------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SRC_MATH_CBRTF_H
10+
#define LLVM_LIBC_SRC_MATH_CBRTF_H
11+
12+
namespace LIBC_NAMESPACE {
13+
14+
float cbrtf(float x);
15+
16+
} // namespace LIBC_NAMESPACE
17+
18+
#endif // LLVM_LIBC_SRC_MATH_CBRTF_H

libc/src/math/generic/CMakeLists.txt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4092,3 +4092,19 @@ add_entrypoint_object(
40924092
COMPILE_OPTIONS
40934093
-O3
40944094
)
4095+
4096+
add_entrypoint_object(
4097+
cbrtf
4098+
SRCS
4099+
cbrtf.cpp
4100+
HDRS
4101+
../cbrtf.h
4102+
COMPILE_OPTIONS
4103+
-O3
4104+
DEPENDS
4105+
libc.hdr.fenv_macros
4106+
libc.src.__support.FPUtil.fenv_impl
4107+
libc.src.__support.FPUtil.fp_bits
4108+
libc.src.__support.FPUtil.multiply_add
4109+
libc.src.__support.macros.optimization
4110+
)

libc/src/math/generic/cbrtf.cpp

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
//===-- Implementation of cbrtf function ----------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "src/math/cbrtf.h"
10+
#include "hdr/fenv_macros.h"
11+
#include "src/__support/FPUtil/FEnvImpl.h"
12+
#include "src/__support/FPUtil/FPBits.h"
13+
#include "src/__support/FPUtil/multiply_add.h"
14+
#include "src/__support/common.h"
15+
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
16+
17+
namespace LIBC_NAMESPACE {
18+
19+
namespace {
20+
21+
// Look up table for 2^(i/3) for i = 0, 1, 2.
22+
constexpr double CBRT2[3] = {1.0, 0x1.428a2f98d728bp0, 0x1.965fea53d6e3dp0};
23+
24+
// Degree-7 polynomials approximation of ((1 + x)^(1/3) - 1)/x for 0 <= x <= 1
25+
// generated by Sollya with:
26+
// > for i from 0 to 15 do {
27+
// P = fpminimax((1 + x)^(1/3) - 1)/x, 6, [|D...|], [i/16, (i + 1)/16]);
28+
// print("{", coeff(P, 0), ",", coeff(P, 1), ",", coeff(P, 2), ",",
29+
// coeff(P, 3), ",", coeff(P, 4), ",", coeff(P, 5), ",",
30+
// coeff(P, 6), "},");
31+
// };
32+
// Then (1 + x)^(1/3) ~ 1 + x * P(x).
33+
constexpr double COEFFS[16][7] = {
34+
{0x1.55555555554ebp-2, -0x1.c71c71c678c0cp-4, 0x1.f9add2776de81p-5,
35+
-0x1.511e10aa964a7p-5, 0x1.ee44165937fa2p-6, -0x1.7c5c9e059345dp-6,
36+
0x1.047f75e0aff14p-6},
37+
{0x1.5555554d1149ap-2, -0x1.c71c676fcb5bp-4, 0x1.f9ab127dc57ebp-5,
38+
-0x1.50ea8fd1d4c15p-5, 0x1.e9d68f28ced43p-6, -0x1.60e0e1e661311p-6,
39+
0x1.716eca1d6e3bcp-7},
40+
{0x1.5555546377d45p-2, -0x1.c71bc1c6d49d2p-4, 0x1.f9924cc0ed24dp-5,
41+
-0x1.4fea3beb53b3bp-5, 0x1.de028a9a07b1bp-6, -0x1.3b090d2233524p-6,
42+
0x1.0aeca34893785p-7},
43+
{0x1.55554dce9f649p-2, -0x1.c7188b34b98f8p-4, 0x1.f93e1af34af49p-5,
44+
-0x1.4d9a06be75c63p-5, 0x1.cb943f4f68992p-6, -0x1.139a685a5e3c4p-6,
45+
0x1.88410674c6a5dp-8},
46+
{0x1.5555347d211c3p-2, -0x1.c70f2a4b1a5fap-4, 0x1.f88420e8602c3p-5,
47+
-0x1.49becfa4ed3ep-5, 0x1.b475cd9013162p-6, -0x1.dcfee1dd2f8efp-7,
48+
0x1.249bb51a1c498p-8},
49+
{0x1.5554f01b33dbap-2, -0x1.c6facb929dbf1p-4, 0x1.f73fb7861252ep-5,
50+
-0x1.4459a4a0071fap-5, 0x1.9a8df2b504fc2p-6, -0x1.9a7ce3006d06ep-7,
51+
0x1.ba9230918fa2ep-9},
52+
{0x1.55545c695db5fp-2, -0x1.c6d6089f20275p-4, 0x1.f556e0ea80efp-5,
53+
-0x1.3d91372d083f4p-5, 0x1.7f66cff331f4p-6, -0x1.606a562491737p-7,
54+
0x1.52e3e17c71069p-9},
55+
{0x1.55534a879232ap-2, -0x1.c69b836998b84p-4, 0x1.f2bb26dac0e4cp-5,
56+
-0x1.359eed43716d7p-5, 0x1.64218cd824fbcp-6, -0x1.2e703e2e091e8p-7,
57+
0x1.0677d9af6aad4p-9},
58+
{0x1.5551836bb5494p-2, -0x1.c64658c15353bp-4, 0x1.ef68517451a6ep-5,
59+
-0x1.2cc20a980dceep-5, 0x1.49843e0fad93ap-6, -0x1.03c59ccb68e54p-7,
60+
0x1.9ad325dc7adcbp-10},
61+
{0x1.554ecacb0d035p-2, -0x1.c5d2664026ffcp-4, 0x1.eb624796ba809p-5,
62+
-0x1.233803d19a535p-5, 0x1.300decb1c3c28p-6, -0x1.befe18031ec3dp-8,
63+
0x1.449f5ee175c69p-10},
64+
{0x1.554ae1f5ae815p-2, -0x1.c53c6b14ff6b2p-4, 0x1.e6b2d5127bb5bp-5,
65+
-0x1.19387336788a3p-5, 0x1.180955a6ab255p-6, -0x1.81696703ba369p-8,
66+
0x1.02cb36389bd79p-10},
67+
{0x1.55458a59f356ep-2, -0x1.c4820dd631ae9p-4, 0x1.e167af818bd15p-5,
68+
-0x1.0ef35f6f72e52p-5, 0x1.019c33b65e4ebp-6, -0x1.4d25bdd52d3a5p-8,
69+
0x1.a008ae91f5936p-11},
70+
{0x1.553e878eafee1p-2, -0x1.c3a1d0b2a3db2p-4, 0x1.db90d8ed9f89bp-5,
71+
-0x1.0490e20f1ae91p-5, 0x1.d9a5d1fc42fe3p-7, -0x1.20bf8227c2abfp-8,
72+
0x1.50f8174cdb6e9p-11},
73+
{0x1.5535a0dedf1b1p-2, -0x1.c29afb8bd01a1p-4, 0x1.d53f6371c1e27p-5,
74+
-0x1.f463209b433e2p-6, 0x1.b35222a17e44p-7, -0x1.f5efbf505e133p-9,
75+
0x1.12e0e94e8586dp-11},
76+
{0x1.552aa25e57bfdp-2, -0x1.c16d811e4acadp-4, 0x1.ce8489b47aa51p-5,
77+
-0x1.dfde7ff758ea8p-6, 0x1.901f43aac38c8p-7, -0x1.b581d07df5ad5p-9,
78+
0x1.c3726535f1fc6p-12},
79+
{0x1.551d5d9b204d3p-2, -0x1.c019e328f8db1p-4, 0x1.c7710f44fc3cep-5,
80+
-0x1.cbbbe25ea8ba4p-6, 0x1.6fe270088623dp-7, -0x1.7e6fc79733761p-9,
81+
0x1.75077abf18d84p-12},
82+
};
83+
84+
} // anonymous namespace
85+
86+
LLVM_LIBC_FUNCTION(float, cbrtf, (float x)) {
87+
using FloatBits = typename fputil::FPBits<float>;
88+
using DoubleBits = typename fputil::FPBits<double>;
89+
90+
FloatBits x_bits(x);
91+
92+
uint32_t x_abs = x_bits.uintval() & 0x7fff'ffff;
93+
uint32_t sign_bit = (x_bits.uintval() >> 31) << DoubleBits::EXP_LEN;
94+
95+
if (LIBC_UNLIKELY(x_abs == 0 || x_abs >= 0x7f80'0000)) {
96+
// x is 0, Inf, or NaN.
97+
return x;
98+
}
99+
100+
double xd = static_cast<double>(x);
101+
DoubleBits xd_bits(xd);
102+
103+
// When using biased exponent of x in double precision,
104+
// x_e = real_exponent_of_x + 1023
105+
// Then:
106+
// x_e / 3 = real_exponent_of_x / 3 + 1023/3
107+
// = real_exponent_of_x / 3 + 341
108+
// So to make it the correct biased exponent of x^(1/3), we add
109+
// 1023 - 341 = 682
110+
// to the quotient x_e / 3.
111+
unsigned x_e = static_cast<unsigned>(xd_bits.get_biased_exponent());
112+
unsigned out_e = (x_e / 3 + 682) | sign_bit;
113+
unsigned shift_e = x_e % 3;
114+
115+
// Set x_m = 2^(x_e % 3) * (1.mantissa)
116+
uint64_t x_m = xd_bits.get_mantissa();
117+
// Use the leading 4 bits for look up table
118+
unsigned idx = static_cast<unsigned>(x_m >> (DoubleBits::FRACTION_LEN - 4));
119+
120+
x_m |= static_cast<uint64_t>(DoubleBits::EXP_BIAS)
121+
<< DoubleBits::FRACTION_LEN;
122+
123+
double x_reduced = DoubleBits(x_m).get_val();
124+
double dx = x_reduced - 1.0;
125+
126+
double dx_sq = dx * dx;
127+
double c0 = fputil::multiply_add(dx, COEFFS[idx][0], 1.0);
128+
double c1 = fputil::multiply_add(dx, COEFFS[idx][2], COEFFS[idx][1]);
129+
double c2 = fputil::multiply_add(dx, COEFFS[idx][4], COEFFS[idx][3]);
130+
double c3 = fputil::multiply_add(dx, COEFFS[idx][6], COEFFS[idx][5]);
131+
132+
double dx_4 = dx_sq * dx_sq;
133+
double p0 = fputil::multiply_add(dx_sq, c1, c0);
134+
double p1 = fputil::multiply_add(dx_sq, c3, c2);
135+
136+
double r = fputil::multiply_add(dx_4, p1, p0) * CBRT2[shift_e];
137+
138+
uint64_t r_m = DoubleBits(r).get_mantissa();
139+
// Check if the output is exact. To be exact, the smallest 1-bit of the
140+
// output has to be at least 2^-7 or higher. So we check the lowest 44 bits
141+
// to see if they are within 2^(-52 + 3) errors from all zeros, then the
142+
// result cube root is exact.
143+
if (LIBC_UNLIKELY(((r_m + 8) & 0xfffffffffff) <= 16)) {
144+
if ((r_m & 0xfffffffffff) <= 8)
145+
r_m &= 0xffff'ffff'ffff'ffe0;
146+
else
147+
r_m = (r_m & 0xffff'ffff'ffff'ffe0) + 0x20;
148+
fputil::clear_except_if_required(FE_INEXACT);
149+
}
150+
// Adjust exponent and sign.
151+
uint64_t r_bits =
152+
r_m | (static_cast<uint64_t>(out_e) << DoubleBits::FRACTION_LEN);
153+
154+
return static_cast<float>(DoubleBits(r_bits).get_val());
155+
}
156+
157+
} // namespace LIBC_NAMESPACE

libc/test/src/math/CMakeLists.txt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2213,6 +2213,18 @@ add_fp_unittest(
22132213
libc.src.math.f16sqrtl
22142214
)
22152215

2216+
add_fp_unittest(
2217+
cbrtf_test
2218+
NEED_MPFR
2219+
SUITE
2220+
libc-math-unittests
2221+
SRCS
2222+
cbrtf_test.cpp
2223+
DEPENDS
2224+
libc.src.math.cbrtf
2225+
libc.src.__support.FPUtil.fp_bits
2226+
)
2227+
22162228
add_subdirectory(generic)
22172229
add_subdirectory(smoke)
22182230

libc/test/src/math/cbrtf_test.cpp

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
//===-- Unittests for cbrtf -----------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "hdr/math_macros.h"
10+
#include "src/__support/FPUtil/FPBits.h"
11+
#include "src/math/cbrtf.h"
12+
#include "test/UnitTest/FPMatcher.h"
13+
#include "test/UnitTest/Test.h"
14+
#include "utils/MPFRWrapper/MPFRUtils.h"
15+
16+
using LlvmLibcCbrtfTest = LIBC_NAMESPACE::testing::FPTest<float>;
17+
18+
namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
19+
20+
TEST_F(LlvmLibcCbrtfTest, InFloatRange) {
21+
constexpr uint32_t COUNT = 100'000;
22+
const uint32_t STEP = FPBits(inf).uintval() / COUNT;
23+
for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) {
24+
float x = FPBits(v).get_val();
25+
EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Cbrt, x,
26+
LIBC_NAMESPACE::cbrtf(x), 0.5);
27+
EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Cbrt, -x,
28+
LIBC_NAMESPACE::cbrtf(-x), 0.5);
29+
}
30+
}
31+
32+
TEST_F(LlvmLibcCbrtfTest, SpecialValues) {
33+
constexpr float INPUTS[] = {
34+
0x1.60451p2f, 0x1.31304cp1f, 0x1.d17cp2f, 0x1.bp-143f, 0x1.338cp2f,
35+
};
36+
for (float v : INPUTS) {
37+
float x = FPBits(v).get_val();
38+
mpfr::ForceRoundingMode r(mpfr::RoundingMode::Upward);
39+
EXPECT_MPFR_MATCH(mpfr::Operation::Cbrt, x, LIBC_NAMESPACE::cbrtf(x), 0.5,
40+
mpfr::RoundingMode::Upward);
41+
}
42+
}

libc/test/src/math/exhaustive/CMakeLists.txt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -486,3 +486,19 @@ add_fp_unittest(
486486
LINK_LIBRARIES
487487
-lpthread
488488
)
489+
490+
add_fp_unittest(
491+
cbrtf_test
492+
NO_RUN_POSTBUILD
493+
NEED_MPFR
494+
SUITE
495+
libc_math_exhaustive_tests
496+
SRCS
497+
cbrtf_test.cpp
498+
DEPENDS
499+
.exhaustive_test
500+
libc.src.math.cbrtf
501+
libc.src.__support.FPUtil.fp_bits
502+
LINK_LIBRARIES
503+
-lpthread
504+
)

0 commit comments

Comments
 (0)