Skip to content

Commit b91e78d

Browse files
committed
[libc][math] Implement double precision log1p correctly rounded to all rounding modes.
Implement double precision log1p function correctly rounded to all rounding modes. **Performance** - For `0.5 <= x <= 2`, the fast pass hitting rate is about 99.93%. - Benchmarks with `./perf.sh` tool from the CORE-MATH project, unit is (CPU clocks / call). - Reciprocal throughput from CORE-MATH's perf tool on Ryzen 5900X: ``` $ ./perf.sh log1p GNU libc version: 2.35 GNU libc release: stable -- CORE-MATH reciprocal throughput -- with FMA [####################] 100 % Ntrial = 20 ; Min = 39.792 + 1.011 clc/call; Median-Min = 0.940 clc/call; Max = 41.373 clc/call; -- CORE-MATH reciprocal throughput -- without FMA (-march=x86-64-v2) [####################] 100 % Ntrial = 20 ; Min = 87.285 + 1.135 clc/call; Median-Min = 1.299 clc/call; Max = 89.715 clc/call; -- System LIBC reciprocal throughput -- [####################] 100 % Ntrial = 20 ; Min = 20.666 + 0.123 clc/call; Median-Min = 0.125 clc/call; Max = 20.828 clc/call; -- LIBC reciprocal throughput -- with FMA [####################] 100 % Ntrial = 20 ; Min = 20.928 + 0.771 clc/call; Median-Min = 0.725 clc/call; Max = 22.767 clc/call; -- LIBC reciprocal throughput -- without FMA [####################] 100 % Ntrial = 20 ; Min = 31.461 + 0.528 clc/call; Median-Min = 0.602 clc/call; Max = 36.809 clc/call; ``` - Latency from CORE-MATH's perf tool on Ryzen 5900X: ``` $ ./perf.sh log1p --latency GNU libc version: 2.35 GNU libc release: stable -- CORE-MATH latency -- with FMA [####################] 100 % Ntrial = 20 ; Min = 77.875 + 0.062 clc/call; Median-Min = 0.051 clc/call; Max = 78.003 clc/call; -- CORE-MATH latency -- without FMA (-march=x86-64-v2) [####################] 100 % Ntrial = 20 ; Min = 101.958 + 1.202 clc/call; Median-Min = 1.325 clc/call; Max = 104.452 clc/call; -- System LIBC latency -- [####################] 100 % Ntrial = 20 ; Min = 60.581 + 1.443 clc/call; Median-Min = 1.611 clc/call; Max = 62.285 clc/call; -- LIBC latency -- with FMA [####################] 100 % Ntrial = 20 ; Min = 48.817 + 1.108 clc/call; Median-Min = 1.300 clc/call; Max = 50.282 clc/call; -- LIBC latency -- without FMA [####################] 100 % Ntrial = 20 ; Min = 61.121 + 0.599 clc/call; Median-Min = 0.761 clc/call; Max = 62.020 clc/call; ``` - Accurate pass latency: ``` $ ./perf.sh log1p --latency --simple_stat GNU libc version: 2.35 GNU libc release: stable -- CORE-MATH latency -- with FMA 760.444 -- CORE-MATH latency -- without FMA (-march=x86-64-v2) 827.880 -- LIBC latency -- with FMA 711.837 -- LIBC latency -- without FMA 764.317 ``` Reviewed By: zimmermann6 Differential Revision: https://reviews.llvm.org/D151049
1 parent 18a5bd7 commit b91e78d

File tree

11 files changed

+1262
-0
lines changed

11 files changed

+1262
-0
lines changed

libc/config/darwin/arm/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,7 @@ set(TARGET_LIBM_ENTRYPOINTS
176176
libc.src.math.ldexpl
177177
libc.src.math.log10
178178
libc.src.math.log10f
179+
libc.src.math.log1p
179180
libc.src.math.log1pf
180181
libc.src.math.log2
181182
libc.src.math.log2f

libc/config/linux/aarch64/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,7 @@ set(TARGET_LIBM_ENTRYPOINTS
287287
libc.src.math.ldexpl
288288
libc.src.math.log10
289289
libc.src.math.log10f
290+
libc.src.math.log1p
290291
libc.src.math.log1pf
291292
libc.src.math.log2
292293
libc.src.math.log2f

libc/config/linux/x86_64/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,7 @@ set(TARGET_LIBM_ENTRYPOINTS
292292
libc.src.math.llroundl
293293
libc.src.math.log10
294294
libc.src.math.log10f
295+
libc.src.math.log1p
295296
libc.src.math.log1pf
296297
libc.src.math.log2
297298
libc.src.math.log2f

libc/config/windows/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ set(TARGET_LIBM_ENTRYPOINTS
169169
libc.src.math.llroundl
170170
libc.src.math.log10
171171
libc.src.math.log10f
172+
libc.src.math.log1p
172173
libc.src.math.log1pf
173174
libc.src.math.log2
174175
libc.src.math.log2f

libc/spec/stdc.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,7 @@ def StdC : StandardSpec<"stdc"> {
408408
FunctionSpec<"log10", RetValSpec<DoubleType>, [ArgSpec<DoubleType>]>,
409409
FunctionSpec<"log10f", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
410410

411+
FunctionSpec<"log1p", RetValSpec<DoubleType>, [ArgSpec<DoubleType>]>,
411412
FunctionSpec<"log1pf", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
412413

413414
FunctionSpec<"log2", RetValSpec<DoubleType>, [ArgSpec<DoubleType>]>,

libc/src/math/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ add_math_entrypoint_object(ldexpl)
114114
add_math_entrypoint_object(log10)
115115
add_math_entrypoint_object(log10f)
116116

117+
add_math_entrypoint_object(log1p)
117118
add_math_entrypoint_object(log1pf)
118119

119120
add_math_entrypoint_object(log2)

libc/src/math/generic/CMakeLists.txt

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -814,6 +814,26 @@ add_entrypoint_object(
814814
-O3
815815
)
816816

817+
add_entrypoint_object(
818+
log1p
819+
SRCS
820+
log1p.cpp
821+
HDRS
822+
../log1p.h
823+
DEPENDS
824+
.common_constants
825+
.log_range_reduction
826+
libc.src.__support.FPUtil.fenv_impl
827+
libc.src.__support.FPUtil.fp_bits
828+
libc.src.__support.FPUtil.multiply_add
829+
libc.src.__support.FPUtil.polyeval
830+
libc.src.__support.FPUtil.double_double
831+
libc.src.__support.FPUtil.dyadic_float
832+
libc.src.__support.macros.optimization
833+
COMPILE_OPTIONS
834+
-O3
835+
)
836+
817837
add_entrypoint_object(
818838
log1pf
819839
SRCS

libc/src/math/generic/log1p.cpp

Lines changed: 1038 additions & 0 deletions
Large diffs are not rendered by default.

libc/src/math/log1p.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
//===-- Implementation header for log1p -------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SRC_MATH_LOG1P_H
10+
#define LLVM_LIBC_SRC_MATH_LOG1P_H
11+
12+
namespace __llvm_libc {
13+
14+
double log1p(double x);
15+
16+
} // namespace __llvm_libc
17+
18+
#endif // LLVM_LIBC_SRC_MATH_LOG1P_H

libc/test/src/math/CMakeLists.txt

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1351,6 +1351,20 @@ add_fp_unittest(
13511351
libc.src.__support.FPUtil.fp_bits
13521352
)
13531353

1354+
add_fp_unittest(
1355+
log1p_test
1356+
NEED_MPFR
1357+
SUITE
1358+
libc_math_unittests
1359+
SRCS
1360+
log1p_test.cpp
1361+
DEPENDS
1362+
libc.src.errno.errno
1363+
libc.include.math
1364+
libc.src.math.log1p
1365+
libc.src.__support.FPUtil.fp_bits
1366+
)
1367+
13541368
add_fp_unittest(
13551369
log1pf_test
13561370
NEED_MPFR

libc/test/src/math/log1p_test.cpp

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
//===-- Unittests for log1p -----------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "src/__support/FPUtil/FPBits.h"
10+
#include "src/errno/libc_errno.h"
11+
#include "src/math/log1p.h"
12+
#include "test/UnitTest/FPMatcher.h"
13+
#include "test/UnitTest/Test.h"
14+
#include "utils/MPFRWrapper/MPFRUtils.h"
15+
#include <math.h>
16+
17+
#include <errno.h>
18+
#include <stdint.h>
19+
20+
namespace mpfr = __llvm_libc::testing::mpfr;
21+
using __llvm_libc::testing::tlog;
22+
23+
DECLARE_SPECIAL_CONSTANTS(double)
24+
25+
TEST(LlvmLibcLog1pTest, SpecialNumbers) {
26+
EXPECT_FP_EQ(aNaN, __llvm_libc::log1p(aNaN));
27+
EXPECT_FP_EQ(inf, __llvm_libc::log1p(inf));
28+
EXPECT_FP_IS_NAN_WITH_EXCEPTION(__llvm_libc::log1p(neg_inf), FE_INVALID);
29+
EXPECT_FP_IS_NAN_WITH_EXCEPTION(__llvm_libc::log1p(-2.0), FE_INVALID);
30+
EXPECT_FP_EQ(zero, __llvm_libc::log1p(0.0));
31+
EXPECT_FP_EQ(neg_zero, __llvm_libc::log1p(-0.0));
32+
EXPECT_FP_EQ_WITH_EXCEPTION(neg_inf, __llvm_libc::log1p(-1.0), FE_DIVBYZERO);
33+
}
34+
35+
TEST(LlvmLibcLog1pTest, TrickyInputs) {
36+
constexpr int N = 41;
37+
constexpr uint64_t INPUTS[N] = {
38+
0x3ff0000000000000, // x = 1.0
39+
0x4024000000000000, // x = 10.0
40+
0x4059000000000000, // x = 10^2
41+
0x408f400000000000, // x = 10^3
42+
0x40c3880000000000, // x = 10^4
43+
0x40f86a0000000000, // x = 10^5
44+
0x412e848000000000, // x = 10^6
45+
0x416312d000000000, // x = 10^7
46+
0x4197d78400000000, // x = 10^8
47+
0x41cdcd6500000000, // x = 10^9
48+
0x4202a05f20000000, // x = 10^10
49+
0x42374876e8000000, // x = 10^11
50+
0x426d1a94a2000000, // x = 10^12
51+
0x42a2309ce5400000, // x = 10^13
52+
0x42d6bcc41e900000, // x = 10^14
53+
0x430c6bf526340000, // x = 10^15
54+
0x4341c37937e08000, // x = 10^16
55+
0x4376345785d8a000, // x = 10^17
56+
0x43abc16d674ec800, // x = 10^18
57+
0x43e158e460913d00, // x = 10^19
58+
0x4415af1d78b58c40, // x = 10^20
59+
0x444b1ae4d6e2ef50, // x = 10^21
60+
0x4480f0cf064dd592, // x = 10^22
61+
0x3fefffffffef06ad, 0x3fefde0f22c7d0eb, 0x225e7812faadb32f,
62+
0x3fee1076964c2903, 0x3fdfe93fff7fceb0, 0x3ff012631ad8df10,
63+
0x3fefbfdaa448ed98, 0x3fd00a8cefe9a5f8, 0x3fd0b4d870eb22f8,
64+
0x3c90c40cef04efb5, 0x449d2ccad399848e, 0x4aa12ccdffd9d2ec,
65+
0x5656f070b92d36ce, 0x6db06dcb74f76bcc, 0x7f1954e72ffd4596,
66+
0x5671e2f1628093e4, 0x73dac56e2bf1a951, 0x8001bc6879ea14c5,
67+
};
68+
for (int i = 0; i < N; ++i) {
69+
double x = double(FPBits(INPUTS[i]));
70+
EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Log1p, x,
71+
__llvm_libc::log1p(x), 0.5);
72+
}
73+
}
74+
75+
TEST(LlvmLibcLog1pTest, AllExponents) {
76+
double x = 0x1.0p-1074;
77+
for (int i = -1074; i < 1024; ++i, x *= 2.0) {
78+
ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Log1p, x,
79+
__llvm_libc::log1p(x), 0.5);
80+
}
81+
}
82+
83+
TEST(LlvmLibcLog1pTest, InDoubleRange) {
84+
constexpr uint64_t COUNT = 1234561;
85+
86+
auto test = [&](uint64_t start, uint64_t stop,
87+
mpfr::RoundingMode rounding_mode) {
88+
mpfr::ForceRoundingMode __r(rounding_mode);
89+
uint64_t fails = 0;
90+
uint64_t count = 0;
91+
uint64_t cc = 0;
92+
double mx, mr = 0.0;
93+
double tol = 0.5;
94+
95+
uint64_t step = (stop - start) / COUNT;
96+
97+
for (uint64_t i = 0, v = start; i <= COUNT; ++i, v += step) {
98+
double x = FPBits(v).get_val();
99+
if (isnan(x) || isinf(x) || x < 0.0)
100+
continue;
101+
libc_errno = 0;
102+
double result = __llvm_libc::log1p(x);
103+
++cc;
104+
if (isnan(result) || isinf(result))
105+
continue;
106+
107+
++count;
108+
// ASSERT_MPFR_MATCH(mpfr::Operation::Log1p, x, result, 0.5);
109+
if (!EXPECT_MPFR_MATCH_ROUNDING_SILENTLY(mpfr::Operation::Log1p, x,
110+
result, 0.5, rounding_mode)) {
111+
++fails;
112+
while (!EXPECT_MPFR_MATCH_ROUNDING_SILENTLY(
113+
mpfr::Operation::Log1p, x, result, tol, rounding_mode)) {
114+
mx = x;
115+
mr = result;
116+
tol *= 2.0;
117+
}
118+
}
119+
}
120+
tlog << " Log1p failed: " << fails << "/" << count << "/" << cc
121+
<< " tests.\n";
122+
tlog << " Max ULPs is at most: " << static_cast<uint64_t>(tol) << ".\n";
123+
if (fails) {
124+
EXPECT_MPFR_MATCH(mpfr::Operation::Log1p, mx, mr, 0.5, rounding_mode);
125+
}
126+
};
127+
128+
auto test_all_rounding = [&](uint64_t start, uint64_t stop,
129+
const char *start_str, const char *stop_str) {
130+
tlog << "\n=== Test in range [" << start_str << ", " << stop_str
131+
<< "] ===\n";
132+
133+
tlog << "\n Test Rounding To Nearest...\n";
134+
test(start, stop, mpfr::RoundingMode::Nearest);
135+
136+
tlog << "\n Test Rounding Downward...\n";
137+
test(start, stop, mpfr::RoundingMode::Downward);
138+
139+
tlog << "\n Test Rounding Upward...\n";
140+
test(start, stop, mpfr::RoundingMode::Upward);
141+
142+
tlog << "\n Test Rounding Toward Zero...\n";
143+
test(start, stop, mpfr::RoundingMode::TowardZero);
144+
};
145+
146+
test_all_rounding(0x0000'0000'0000'0001ULL, 0x0010'0000'0000'0000ULL,
147+
"2^-1074", "2^-1022");
148+
149+
test_all_rounding(0x39B0'0000'0000'0000ULL, 0x3A50'0000'0000'0000ULL,
150+
"2^-100", "2^-90");
151+
152+
test_all_rounding(0x3CD0'0000'0000'0000ULL, 0x3D20'0000'0000'0000ULL, "2^-50",
153+
"2^-45");
154+
155+
test_all_rounding(0x3E10'0000'0000'0000ULL, 0x3E40'0000'0000'0000ULL, "2^-30",
156+
"2^-27");
157+
158+
test_all_rounding(0x3FD0'0000'0000'0000ULL, 0x4010'0000'0000'0000ULL, "0.25",
159+
"4.0");
160+
161+
test_all_rounding(0x4630'0000'0000'0000ULL, 0x4670'0000'0000'0000ULL, "2^100",
162+
"2^104");
163+
164+
test_all_rounding(0x7FD0'0000'0000'0000ULL, 0x7FF0'0000'0000'0000ULL,
165+
"2^1022", "2^1024");
166+
}

0 commit comments

Comments
 (0)