Skip to content

Commit 7d3fc1f

Browse files
committed
[libc][math] Add use float-only option for atan2f.
1 parent 72225ca commit 7d3fc1f

File tree

7 files changed

+318
-55
lines changed

7 files changed

+318
-55
lines changed

libc/src/__support/FPUtil/double_double.h

Lines changed: 50 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -20,41 +20,48 @@ namespace fputil {
2020

2121
#define DEFAULT_DOUBLE_SPLIT 27
2222

23-
using DoubleDouble = LIBC_NAMESPACE::NumberPair<double>;
23+
template <typename T> struct DefaultSplit;
24+
template <> struct DefaultSplit<float> { static constexpr size_t VALUE = 12; };
25+
template <> struct DefaultSplit<double> { static constexpr size_t VALUE = 27; };
26+
27+
using DoubleDouble = NumberPair<double>;
28+
using FloatFloat = NumberPair<float>;
2429

2530
// The output of Dekker's FastTwoSum algorithm is correct, i.e.:
2631
// r.hi + r.lo = a + b exactly
2732
// and |r.lo| < eps(r.lo)
2833
// Assumption: |a| >= |b|, or a = 0.
29-
template <bool FAST2SUM = true>
30-
LIBC_INLINE constexpr DoubleDouble exact_add(double a, double b) {
31-
DoubleDouble r{0.0, 0.0};
34+
template <bool FAST2SUM = true, typename T = double>
35+
LIBC_INLINE constexpr NumberPair<T> exact_add(T a, T b) {
36+
NumberPair<T> r{0.0, 0.0};
3237
if constexpr (FAST2SUM) {
3338
r.hi = a + b;
34-
double t = r.hi - a;
39+
T t = r.hi - a;
3540
r.lo = b - t;
3641
} else {
3742
r.hi = a + b;
38-
double t1 = r.hi - a;
39-
double t2 = r.hi - t1;
40-
double t3 = b - t1;
41-
double t4 = a - t2;
43+
T t1 = r.hi - a;
44+
T t2 = r.hi - t1;
45+
T t3 = b - t1;
46+
T t4 = a - t2;
4247
r.lo = t3 + t4;
4348
}
4449
return r;
4550
}
4651

4752
// Assumption: |a.hi| >= |b.hi|
48-
LIBC_INLINE constexpr DoubleDouble add(const DoubleDouble &a,
49-
const DoubleDouble &b) {
50-
DoubleDouble r = exact_add(a.hi, b.hi);
51-
double lo = a.lo + b.lo;
53+
template <typename T>
54+
LIBC_INLINE constexpr NumberPair<T> add(const NumberPair<T> &a,
55+
const NumberPair<T> &b) {
56+
NumberPair<T> r = exact_add(a.hi, b.hi);
57+
T lo = a.lo + b.lo;
5258
return exact_add(r.hi, r.lo + lo);
5359
}
5460

5561
// Assumption: |a.hi| >= |b|
56-
LIBC_INLINE constexpr DoubleDouble add(const DoubleDouble &a, double b) {
57-
DoubleDouble r = exact_add<false>(a.hi, b);
62+
template <typename T>
63+
LIBC_INLINE constexpr NumberPair<T> add(const NumberPair<T> &a, T b) {
64+
NumberPair<T> r = exact_add<false>(a.hi, b);
5865
return exact_add(r.hi, r.lo + a.lo);
5966
}
6067

@@ -63,12 +70,12 @@ LIBC_INLINE constexpr DoubleDouble add(const DoubleDouble &a, double b) {
6370
// Zimmermann, P., "Note on the Veltkamp/Dekker Algorithms with Directed
6471
// Roundings," https://inria.hal.science/hal-04480440.
6572
// Default splitting constant = 2^ceil(prec(double)/2) + 1 = 2^27 + 1.
66-
template <size_t N = DEFAULT_DOUBLE_SPLIT>
67-
LIBC_INLINE constexpr DoubleDouble split(double a) {
68-
DoubleDouble r{0.0, 0.0};
73+
template <typename T = double, size_t N = DefaultSplit<T>::VALUE>
74+
LIBC_INLINE constexpr NumberPair<T> split(T a) {
75+
NumberPair<T> r{0.0, 0.0};
6976
// CN = 2^N.
70-
constexpr double CN = static_cast<double>(1 << N);
71-
constexpr double C = CN + 1.0;
77+
constexpr T CN = static_cast<T>(1 << N);
78+
constexpr T C = CN + 1.0;
7279
double t1 = C * a;
7380
double t2 = a - t1;
7481
r.hi = t1 + t2;
@@ -77,16 +84,15 @@ LIBC_INLINE constexpr DoubleDouble split(double a) {
7784
}
7885

7986
// Helper for non-fma exact mult where the first number is already split.
80-
template <size_t SPLIT_B = DEFAULT_DOUBLE_SPLIT>
81-
LIBC_INLINE DoubleDouble exact_mult(const DoubleDouble &as, double a,
82-
double b) {
83-
DoubleDouble bs = split<SPLIT_B>(b);
84-
DoubleDouble r{0.0, 0.0};
87+
template <typename T = double, size_t SPLIT_B = DefaultSplit<T>::VALUE>
88+
LIBC_INLINE NumberPair<T> exact_mult(const NumberPair<T> &as, T a, T b) {
89+
NumberPair<T> bs = split<T, SPLIT_B>(b);
90+
NumberPair<T> r{0.0, 0.0};
8591

8692
r.hi = a * b;
87-
double t1 = as.hi * bs.hi - r.hi;
88-
double t2 = as.hi * bs.lo + t1;
89-
double t3 = as.lo * bs.hi + t2;
93+
T t1 = as.hi * bs.hi - r.hi;
94+
T t2 = as.hi * bs.lo + t1;
95+
T t3 = as.lo * bs.hi + t2;
9096
r.lo = as.lo * bs.lo + t3;
9197

9298
return r;
@@ -99,18 +105,18 @@ LIBC_INLINE DoubleDouble exact_mult(const DoubleDouble &as, double a,
99105
// Using Theorem 1 in the paper above, without FMA instruction, if we restrict
100106
// the generated constants to precision <= 51, and splitting it by 2^28 + 1,
101107
// then a * b = r.hi + r.lo is exact for all rounding modes.
102-
template <size_t SPLIT_B = 27>
103-
LIBC_INLINE DoubleDouble exact_mult(double a, double b) {
104-
DoubleDouble r{0.0, 0.0};
108+
template <typename T = double, size_t SPLIT_B = DefaultSplit<T>::VALUE>
109+
LIBC_INLINE NumberPair<T> exact_mult(T a, T b) {
110+
NumberPair<T> r{0.0, 0.0};
105111

106112
#ifdef LIBC_TARGET_CPU_HAS_FMA
107113
r.hi = a * b;
108114
r.lo = fputil::multiply_add(a, b, -r.hi);
109115
#else
110116
// Dekker's Product.
111-
DoubleDouble as = split(a);
117+
NumberPair<T> as = split(a);
112118

113-
r = exact_mult<SPLIT_B>(as, a, b);
119+
r = exact_mult<T, SPLIT_B>(as, a, b);
114120
#endif // LIBC_TARGET_CPU_HAS_FMA
115121

116122
return r;
@@ -125,7 +131,7 @@ LIBC_INLINE DoubleDouble quick_mult(double a, const DoubleDouble &b) {
125131
template <size_t SPLIT_B = 27>
126132
LIBC_INLINE DoubleDouble quick_mult(const DoubleDouble &a,
127133
const DoubleDouble &b) {
128-
DoubleDouble r = exact_mult<SPLIT_B>(a.hi, b.hi);
134+
DoubleDouble r = exact_mult<double, SPLIT_B>(a.hi, b.hi);
129135
double t1 = multiply_add(a.hi, b.lo, r.lo);
130136
double t2 = multiply_add(a.lo, b.hi, t1);
131137
r.lo = t2;
@@ -157,19 +163,20 @@ LIBC_INLINE DoubleDouble multiply_add<DoubleDouble>(const DoubleDouble &a,
157163
// rl = q * (ah - bh * rh) + q * (al - bl * rh)
158164
// as accurate as possible, then the error is bounded by:
159165
// |(ah + al) / (bh + bl) - (rh + rl)| < O(bl/bh) * (2^-52 + al/ah + bl/bh)
160-
LIBC_INLINE DoubleDouble div(const DoubleDouble &a, const DoubleDouble &b) {
161-
DoubleDouble r;
162-
double q = 1.0 / b.hi;
166+
template <typename T>
167+
LIBC_INLINE NumberPair<T> div(const NumberPair<T> &a, const NumberPair<T> &b) {
168+
NumberPair<T> r;
169+
T q = T(1) / b.hi;
163170
r.hi = a.hi * q;
164171

165172
#ifdef LIBC_TARGET_CPU_HAS_FMA
166-
double e_hi = fputil::multiply_add(b.hi, -r.hi, a.hi);
167-
double e_lo = fputil::multiply_add(b.lo, -r.hi, a.lo);
173+
T e_hi = fputil::multiply_add(b.hi, -r.hi, a.hi);
174+
T e_lo = fputil::multiply_add(b.lo, -r.hi, a.lo);
168175
#else
169-
DoubleDouble b_hi_r_hi = fputil::exact_mult(b.hi, -r.hi);
170-
DoubleDouble b_lo_r_hi = fputil::exact_mult(b.lo, -r.hi);
171-
double e_hi = (a.hi + b_hi_r_hi.hi) + b_hi_r_hi.lo;
172-
double e_lo = (a.lo + b_lo_r_hi.hi) + b_lo_r_hi.lo;
176+
NumberPair<T> b_hi_r_hi = fputil::exact_mult(b.hi, -r.hi);
177+
NumberPair<T> b_lo_r_hi = fputil::exact_mult(b.lo, -r.hi);
178+
T e_hi = (a.hi + b_hi_r_hi.hi) + b_hi_r_hi.lo;
179+
T e_lo = (a.lo + b_lo_r_hi.hi) + b_lo_r_hi.lo;
173180
#endif // LIBC_TARGET_CPU_HAS_FMA
174181

175182
r.lo = q * (e_hi + e_lo);

libc/src/__support/macros/optimization.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ LIBC_INLINE constexpr bool expects_bool_condition(T value, T expected) {
4545
#define LIBC_MATH_FAST \
4646
(LIBC_MATH_SKIP_ACCURATE_PASS | LIBC_MATH_SMALL_TABLES | \
4747
LIBC_MATH_NO_ERRNO | LIBC_MATH_NO_EXCEPT)
48+
#define LIBC_MATH_INTERMEDIATE_COMP_IN_FLOAT 0x10
4849

4950
#ifndef LIBC_MATH
5051
#define LIBC_MATH 0
@@ -58,4 +59,8 @@ LIBC_INLINE constexpr bool expects_bool_condition(T value, T expected) {
5859
#define LIBC_MATH_HAS_SMALL_TABLES
5960
#endif
6061

62+
#if (LIBC_MATH & LIBC_MATH_INTERMEDIATE_COMP_IN_FLOAT)
63+
#define LIBC_MATH_HAS_INTERMEDIATE_COMP_IN_FLOAT
64+
#endif
65+
6166
#endif // LLVM_LIBC_SRC___SUPPORT_MACROS_OPTIMIZATION_H

libc/src/math/generic/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4641,10 +4641,12 @@ add_entrypoint_object(
46414641
atan2f.cpp
46424642
HDRS
46434643
../atan2f.h
4644+
atan2f_float.h
46444645
COMPILE_OPTIONS
46454646
${libc_opt_high_flag}
46464647
DEPENDS
46474648
.inv_trigf_utils
4649+
libc.src.__support.FPUtil.double_double
46484650
libc.src.__support.FPUtil.fp_bits
46494651
libc.src.__support.FPUtil.multiply_add
46504652
libc.src.__support.FPUtil.nearest_integer

libc/src/math/generic/atan2f.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,14 @@
1717
#include "src/__support/macros/config.h"
1818
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
1919

20+
#if defined(LIBC_MATH_HAS_SKIP_ACCURATE_PASS) && \
21+
defined(LIBC_MATH_HAS_INTERMEDIATE_COMP_IN_FLOAT)
22+
23+
// We use float-float implementation to reduce size.
24+
#include "src/math/generic/atan2f_float.h"
25+
26+
#else
27+
2028
namespace LIBC_NAMESPACE_DECL {
2129

2230
namespace {
@@ -324,3 +332,5 @@ LLVM_LIBC_FUNCTION(float, atan2f, (float y, float x)) {
324332
}
325333

326334
} // namespace LIBC_NAMESPACE_DECL
335+
336+
#endif

0 commit comments

Comments
 (0)