Skip to content

Commit 5e20d89

Browse files
committed
[libc][math] Implement double precision sin correctly rounded to all rounding modes.
1 parent e4e350e commit 5e20d89

File tree

18 files changed

+1259
-49
lines changed

18 files changed

+1259
-49
lines changed

libc/config/darwin/arm/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@ set(TARGET_LIBM_ENTRYPOINTS
226226
libc.src.math.scalbnl
227227
libc.src.math.sincosf
228228
libc.src.math.sinhf
229+
libc.src.math.sin
229230
libc.src.math.sinf
230231
libc.src.math.sqrt
231232
libc.src.math.sqrtf

libc/config/linux/aarch64/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,7 @@ set(TARGET_LIBM_ENTRYPOINTS
481481
libc.src.math.scalbnl
482482
libc.src.math.sincosf
483483
libc.src.math.sinhf
484+
libc.src.math.sin
484485
libc.src.math.sinf
485486
libc.src.math.sqrt
486487
libc.src.math.sqrtf

libc/config/linux/arm/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,7 @@ set(TARGET_LIBM_ENTRYPOINTS
345345
libc.src.math.scalbnf
346346
libc.src.math.scalbnl
347347
libc.src.math.sincosf
348+
libc.src.math.sin
348349
libc.src.math.sinf
349350
libc.src.math.sinhf
350351
libc.src.math.sqrt

libc/config/linux/riscv/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -489,6 +489,7 @@ set(TARGET_LIBM_ENTRYPOINTS
489489
libc.src.math.scalbnl
490490
libc.src.math.sincosf
491491
libc.src.math.sinhf
492+
libc.src.math.sin
492493
libc.src.math.sinf
493494
libc.src.math.sqrt
494495
libc.src.math.sqrtf

libc/docs/math/index.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@ Higher Math Functions
314314
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
315315
| rsqrt | | | | | | 7.12.7.9 | F.10.4.9 |
316316
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
317-
| sin | |check| | large | | | | 7.12.4.6 | F.10.1.6 |
317+
| sin | |check| | |check| | | | | 7.12.4.6 | F.10.1.6 |
318318
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
319319
| sincos | |check| | large | | | | | |
320320
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+

libc/src/__support/FPUtil/double_double.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,12 @@ LIBC_INLINE constexpr DoubleDouble add(const DoubleDouble &a, double b) {
4444
return exact_add(r.hi, r.lo + a.lo);
4545
}
4646

47-
// Velkamp's Splitting for double precision.
47+
// Veltkamp's Splitting for double precision.
48+
// Note: This is the original version of Veltkamp's Splitting, which is only
49+
// correct for round-to-nearest mode. See:
50+
// Graillat, S., Lafevre, V., and Muller, J.-M., "Alternative Split Functions
51+
// and Dekker's Product," ARITH'2020.
52+
// http://arith2020.arithsymposium.org/resources/paper_31.pdf
4853
LIBC_INLINE constexpr DoubleDouble split(double a) {
4954
DoubleDouble r{0.0, 0.0};
5055
// Splitting constant = 2^ceil(prec(double)/2) + 1 = 2^27 + 1.
@@ -56,6 +61,9 @@ LIBC_INLINE constexpr DoubleDouble split(double a) {
5661
return r;
5762
}
5863

64+
// Note: When FMA instruction is not available, the `exact_mult` function relies
65+
// on Veltkamp's Splitting algorithm, and is only correct for round-to-nearest
66+
// mode.
5967
LIBC_INLINE DoubleDouble exact_mult(double a, double b) {
6068
DoubleDouble r{0.0, 0.0};
6169

libc/src/__support/FPUtil/dyadic_float.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -270,11 +270,11 @@ LIBC_INLINE constexpr DyadicFloat<Bits> quick_add(DyadicFloat<Bits> a,
270270
// don't need to normalize the inputs again in this function. If the inputs are
271271
// not normalized, the results might lose precision significantly.
272272
template <size_t Bits>
273-
LIBC_INLINE constexpr DyadicFloat<Bits> quick_mul(DyadicFloat<Bits> a,
274-
DyadicFloat<Bits> b) {
273+
LIBC_INLINE constexpr DyadicFloat<Bits> quick_mul(const DyadicFloat<Bits> &a,
274+
const DyadicFloat<Bits> &b) {
275275
DyadicFloat<Bits> result;
276276
result.sign = (a.sign != b.sign) ? Sign::NEG : Sign::POS;
277-
result.exponent = a.exponent + b.exponent + int(Bits);
277+
result.exponent = a.exponent + b.exponent + static_cast<int>(Bits);
278278

279279
if (!(a.mantissa.is_zero() || b.mantissa.is_zero())) {
280280
result.mantissa = a.mantissa.quick_mul_hi(b.mantissa);
@@ -301,7 +301,7 @@ multiply_add(const DyadicFloat<Bits> &a, const DyadicFloat<Bits> &b,
301301
// Simple exponentiation implementation for printf. Only handles positive
302302
// exponents, since division isn't implemented.
303303
template <size_t Bits>
304-
LIBC_INLINE constexpr DyadicFloat<Bits> pow_n(DyadicFloat<Bits> a,
304+
LIBC_INLINE constexpr DyadicFloat<Bits> pow_n(const DyadicFloat<Bits> &a,
305305
uint32_t power) {
306306
DyadicFloat<Bits> result = 1.0;
307307
DyadicFloat<Bits> cur_power = a;
@@ -317,7 +317,7 @@ LIBC_INLINE constexpr DyadicFloat<Bits> pow_n(DyadicFloat<Bits> a,
317317
}
318318

319319
template <size_t Bits>
320-
LIBC_INLINE constexpr DyadicFloat<Bits> mul_pow_2(DyadicFloat<Bits> a,
320+
LIBC_INLINE constexpr DyadicFloat<Bits> mul_pow_2(const DyadicFloat<Bits> &a,
321321
int32_t pow_2) {
322322
DyadicFloat<Bits> result = a;
323323
result.exponent += pow_2;

libc/src/__support/macros/optimization.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,4 +33,18 @@ LIBC_INLINE constexpr bool expects_bool_condition(T value, T expected) {
3333
#error "Unhandled compiler"
3434
#endif
3535

36+
// Defining optimization options for math functions.
37+
// TODO: Exporting this to public generated headers?
38+
#define LIBC_MATH_SKIP_ACCURATE_PASS 0x01
39+
#define LIBC_MATH_SMALL_TABLES 0x02
40+
#define LIBC_MATH_NO_ERRNO 0x04
41+
#define LIBC_MATH_NO_EXCEPT 0x08
42+
#define LIBC_MATH_FAST \
43+
(LIBC_MATH_SKIP_ACCURATE_PASS | LIBC_MATH_SMALL_TABLES | \
44+
LIBC_MATH_NO_ERRNO | LIBC_MATH_NO_EXCEPT)
45+
46+
#ifndef LIBC_MATH
47+
#define LIBC_MATH 0
48+
#endif // LIBC_MATH
49+
3650
#endif // LLVM_LIBC_SRC___SUPPORT_MACROS_OPTIMIZATION_H

libc/src/math/generic/CMakeLists.txt

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,22 @@ add_header_library(
135135
libc.src.__support.common
136136
)
137137

138+
add_header_library(
139+
range_reduction_double
140+
HDRS
141+
range_reduction_double.h
142+
range_reduction_double_fma.h
143+
DEPENDS
144+
libc.src.__support.FPUtil.double_double
145+
libc.src.__support.FPUtil.dyadic_float
146+
libc.src.__support.FPUtil.fp_bits
147+
libc.src.__support.FPUtil.fma
148+
libc.src.__support.FPUtil.multiply_add
149+
libc.src.__support.FPUtil.nearest_integer
150+
libc.src.__support.common
151+
libc.src.__support.integer_literals
152+
)
153+
138154
add_header_library(
139155
sincosf_utils
140156
HDRS
@@ -146,6 +162,15 @@ add_header_library(
146162
libc.src.__support.common
147163
)
148164

165+
add_header_library(
166+
sincos_eval
167+
HDRS
168+
sincos_eval.h
169+
DEPENDS
170+
libc.src.__support.FPUtil.double_double
171+
libc.src.__support.FPUtil.multiply_add
172+
)
173+
149174
add_entrypoint_object(
150175
cosf
151176
SRCS
@@ -167,6 +192,29 @@ add_entrypoint_object(
167192
-O3
168193
)
169194

195+
add_entrypoint_object(
196+
sin
197+
SRCS
198+
sin.cpp
199+
HDRS
200+
../sin.h
201+
DEPENDS
202+
libc.hdr.errno_macros
203+
libc.src.errno.errno
204+
libc.src.__support.FPUtil.double_double
205+
libc.src.__support.FPUtil.dyadic_float
206+
libc.src.__support.FPUtil.fenv_impl
207+
libc.src.__support.FPUtil.fp_bits
208+
libc.src.__support.FPUtil.fma
209+
libc.src.__support.FPUtil.multiply_add
210+
libc.src.__support.FPUtil.nearest_integer
211+
libc.src.__support.FPUtil.polyeval
212+
libc.src.__support.FPUtil.rounding_mode
213+
libc.src.__support.macros.optimization
214+
COMPILE_OPTIONS
215+
-O3
216+
)
217+
170218
add_entrypoint_object(
171219
sinf
172220
SRCS
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
//===-- Range reduction for double precision sin/cos/tan --------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_DOUBLE_H
10+
#define LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_DOUBLE_H
11+
12+
#include "src/__support/FPUtil/FPBits.h"
13+
#include "src/__support/FPUtil/double_double.h"
14+
#include "src/__support/FPUtil/multiply_add.h"
15+
#include "src/__support/FPUtil/nearest_integer.h"
16+
#include "src/__support/common.h"
17+
18+
namespace LIBC_NAMESPACE {
19+
20+
using fputil::DoubleDouble;
21+
22+
LIBC_INLINE constexpr int FAST_PASS_EXPONENT = 23;
23+
24+
// Digits of pi/128, generated by Sollya with:
25+
// > a = round(pi/128, D, RN);
26+
// > b = round(pi/128 - a, D, RN);
27+
LIBC_INLINE constexpr DoubleDouble PI_OVER_128 = {0x1.1a62633145c07p-60,
28+
0x1.921fb54442d18p-6};
29+
30+
// Digits of -pi/128, generated by Sollya with:
31+
// > a = round(pi/128, 25, RN);
32+
// > b = round(pi/128 - a, 23, RN);
33+
// > c = round(pi/128 - a - b, 25, RN);
34+
// > d = round(pi/128 - a - b - c, D, RN);
35+
// The precisions of the parts are chosen so that:
36+
// 1) k * a, k * b, k * c are exact in double precision
37+
// 2) k * b + fractional part of (k * a) is exact in double precsion
38+
LIBC_INLINE constexpr double MPI_OVER_128[4] = {
39+
-0x1.921fb5p-6, -0x1.110b48p-32, +0x1.ee59dap-56, -0x1.98a2e03707345p-83};
40+
41+
LIBC_INLINE constexpr double ONE_TWENTY_EIGHT_OVER_PI_D = 0x1.45f306dc9c883p5;
42+
43+
namespace generic {
44+
45+
LIBC_INLINE int range_reduction_small(double x, DoubleDouble &u) {
46+
double prod_hi = x * ONE_TWENTY_EIGHT_OVER_PI_D;
47+
double kd = fputil::nearest_integer(prod_hi);
48+
int k = static_cast<int>(kd);
49+
50+
// x - k * (pi/128)
51+
double c = fputil::multiply_add(kd, MPI_OVER_128[0], x); // Exact
52+
double y_hi = fputil::multiply_add(kd, MPI_OVER_128[1], c); // Exact
53+
double y_lo = fputil::multiply_add(kd, MPI_OVER_128[2], kd * MPI_OVER_128[3]);
54+
u = fputil::exact_add(y_hi, y_lo);
55+
56+
return k;
57+
}
58+
59+
// TODO: Implement generic's range_reduction_large correctly rounded for all
60+
// rounding modes. The current fma's range_reduction_large only works for
61+
// round-to-nearest without FMA instruction.
62+
63+
} // namespace generic
64+
65+
} // namespace LIBC_NAMESPACE
66+
67+
#endif // LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_DOUBLE_H

0 commit comments

Comments
 (0)