[libc][math] Implement double precision log1p correctly rounded to all rounding modes.

lntue · lntue · commit b91e78da3780 · 2023-05-23T11:04:04.000-04:00
Implement double precision log1p function correctly rounded to all rounding modes. **Performance** - For `0.5 <= x <= 2`, the fast pass hitting rate is about 99.93%. - Benchmarks with `./perf.sh` tool from the CORE-MATH project, unit is (CPU clocks / call). - Reciprocal throughput from CORE-MATH's perf tool on Ryzen 5900X: ``` $ ./perf.sh log1p GNU libc version: 2.35 GNU libc release: stable -- CORE-MATH reciprocal throughput -- with FMA [####################] 100 % Ntrial = 20 ; Min = 39.792 + 1.011 clc/call; Median-Min = 0.940 clc/call; Max = 41.373 clc/call; -- CORE-MATH reciprocal throughput -- without FMA (-march=x86-64-v2) [####################] 100 % Ntrial = 20 ; Min = 87.285 + 1.135 clc/call; Median-Min = 1.299 clc/call; Max = 89.715 clc/call; -- System LIBC reciprocal throughput -- [####################] 100 % Ntrial = 20 ; Min = 20.666 + 0.123 clc/call; Median-Min = 0.125 clc/call; Max = 20.828 clc/call; -- LIBC reciprocal throughput -- with FMA [####################] 100 % Ntrial = 20 ; Min = 20.928 + 0.771 clc/call; Median-Min = 0.725 clc/call; Max = 22.767 clc/call; -- LIBC reciprocal throughput -- without FMA [####################] 100 % Ntrial = 20 ; Min = 31.461 + 0.528 clc/call; Median-Min = 0.602 clc/call; Max = 36.809 clc/call; ``` - Latency from CORE-MATH's perf tool on Ryzen 5900X: ``` $ ./perf.sh log1p --latency GNU libc version: 2.35 GNU libc release: stable -- CORE-MATH latency -- with FMA [####################] 100 % Ntrial = 20 ; Min = 77.875 + 0.062 clc/call; Median-Min = 0.051 clc/call; Max = 78.003 clc/call; -- CORE-MATH latency -- without FMA (-march=x86-64-v2) [####################] 100 % Ntrial = 20 ; Min = 101.958 + 1.202 clc/call; Median-Min = 1.325 clc/call; Max = 104.452 clc/call; -- System LIBC latency -- [####################] 100 % Ntrial = 20 ; Min = 60.581 + 1.443 clc/call; Median-Min = 1.611 clc/call; Max = 62.285 clc/call; -- LIBC latency -- with FMA [####################] 100 % Ntrial = 20 ; Min = 48.817 + 1.108 clc/call; Median-Min = 1.300 clc/call; Max = 50.282 clc/call; -- LIBC latency -- without FMA [####################] 100 % Ntrial = 20 ; Min = 61.121 + 0.599 clc/call; Median-Min = 0.761 clc/call; Max = 62.020 clc/call; ``` - Accurate pass latency: ``` $ ./perf.sh log1p --latency --simple_stat GNU libc version: 2.35 GNU libc release: stable -- CORE-MATH latency -- with FMA 760.444 -- CORE-MATH latency -- without FMA (-march=x86-64-v2) 827.880 -- LIBC latency -- with FMA 711.837 -- LIBC latency -- without FMA 764.317 ``` Reviewed By: zimmermann6 Differential Revision: https://reviews.llvm.org/D151049
diff --git a/libc/config/darwin/arm/entrypoints.txt b/libc/config/darwin/arm/entrypoints.txt
@@ -176,6 +176,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.ldexpl
     libc.src.math.log10
     libc.src.math.log10f
+    libc.src.math.log1p
     libc.src.math.log1pf
     libc.src.math.log2
     libc.src.math.log2f
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
@@ -287,6 +287,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.ldexpl
     libc.src.math.log10
     libc.src.math.log10f
+    libc.src.math.log1p
     libc.src.math.log1pf
     libc.src.math.log2
     libc.src.math.log2f
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
@@ -292,6 +292,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.llroundl
     libc.src.math.log10
     libc.src.math.log10f
+    libc.src.math.log1p
     libc.src.math.log1pf
     libc.src.math.log2
     libc.src.math.log2f
diff --git a/libc/config/windows/entrypoints.txt b/libc/config/windows/entrypoints.txt
@@ -169,6 +169,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.llroundl
     libc.src.math.log10
     libc.src.math.log10f
+    libc.src.math.log1p
     libc.src.math.log1pf
     libc.src.math.log2
     libc.src.math.log2f
diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td
@@ -408,6 +408,7 @@ def StdC : StandardSpec<"stdc"> {
           FunctionSpec<"log10", RetValSpec<DoubleType>, [ArgSpec<DoubleType>]>,
           FunctionSpec<"log10f", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
 
+          FunctionSpec<"log1p", RetValSpec<DoubleType>, [ArgSpec<DoubleType>]>,
           FunctionSpec<"log1pf", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
 
           FunctionSpec<"log2", RetValSpec<DoubleType>, [ArgSpec<DoubleType>]>,
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
@@ -114,6 +114,7 @@ add_math_entrypoint_object(ldexpl)
 add_math_entrypoint_object(log10)
 add_math_entrypoint_object(log10f)
 
+add_math_entrypoint_object(log1p)
 add_math_entrypoint_object(log1pf)
 
 add_math_entrypoint_object(log2)
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
@@ -814,6 +814,26 @@ add_entrypoint_object(
     -O3
 )
 
+add_entrypoint_object(
+  log1p
+  SRCS
+    log1p.cpp
+  HDRS
+    ../log1p.h
+  DEPENDS
+    .common_constants
+    .log_range_reduction
+    libc.src.__support.FPUtil.fenv_impl
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.polyeval
+    libc.src.__support.FPUtil.double_double
+    libc.src.__support.FPUtil.dyadic_float
+    libc.src.__support.macros.optimization
+  COMPILE_OPTIONS
+    -O3
+)
+
 add_entrypoint_object(
   log1pf
   SRCS
diff --git a/libc/src/math/generic/log1p.cpp b/libc/src/math/generic/log1p.cpp
diff --git a/libc/src/math/log1p.h b/libc/src/math/log1p.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for log1p -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_LOG1P_H
+#define LLVM_LIBC_SRC_MATH_LOG1P_H
+
+namespace __llvm_libc {
+
+double log1p(double x);
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_MATH_LOG1P_H
diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt
@@ -1351,6 +1351,20 @@ add_fp_unittest(
     libc.src.__support.FPUtil.fp_bits
 )
 
+add_fp_unittest(
+log1p_test
+ NEED_MPFR
+ SUITE
+   libc_math_unittests
+ SRCS
+   log1p_test.cpp
+ DEPENDS
+   libc.src.errno.errno
+   libc.include.math
+   libc.src.math.log1p
+   libc.src.__support.FPUtil.fp_bits
+)
+
 add_fp_unittest(
   log1pf_test
   NEED_MPFR
diff --git a/libc/test/src/math/log1p_test.cpp b/libc/test/src/math/log1p_test.cpp
@@ -0,0 +1,166 @@
+//===-- Unittests for log1p -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/errno/libc_errno.h"
+#include "src/math/log1p.h"
+#include "test/UnitTest/FPMatcher.h"
+#include "test/UnitTest/Test.h"
+#include "utils/MPFRWrapper/MPFRUtils.h"
+#include <math.h>
+
+#include <errno.h>
+#include <stdint.h>
+
+namespace mpfr = __llvm_libc::testing::mpfr;
+using __llvm_libc::testing::tlog;
+
+DECLARE_SPECIAL_CONSTANTS(double)
+
+TEST(LlvmLibcLog1pTest, SpecialNumbers) {
+  EXPECT_FP_EQ(aNaN, __llvm_libc::log1p(aNaN));
+  EXPECT_FP_EQ(inf, __llvm_libc::log1p(inf));
+  EXPECT_FP_IS_NAN_WITH_EXCEPTION(__llvm_libc::log1p(neg_inf), FE_INVALID);
+  EXPECT_FP_IS_NAN_WITH_EXCEPTION(__llvm_libc::log1p(-2.0), FE_INVALID);
+  EXPECT_FP_EQ(zero, __llvm_libc::log1p(0.0));
+  EXPECT_FP_EQ(neg_zero, __llvm_libc::log1p(-0.0));
+  EXPECT_FP_EQ_WITH_EXCEPTION(neg_inf, __llvm_libc::log1p(-1.0), FE_DIVBYZERO);
+}
+
+TEST(LlvmLibcLog1pTest, TrickyInputs) {
+  constexpr int N = 41;
+  constexpr uint64_t INPUTS[N] = {
+      0x3ff0000000000000, // x = 1.0
+      0x4024000000000000, // x = 10.0
+      0x4059000000000000, // x = 10^2
+      0x408f400000000000, // x = 10^3
+      0x40c3880000000000, // x = 10^4
+      0x40f86a0000000000, // x = 10^5
+      0x412e848000000000, // x = 10^6
+      0x416312d000000000, // x = 10^7
+      0x4197d78400000000, // x = 10^8
+      0x41cdcd6500000000, // x = 10^9
+      0x4202a05f20000000, // x = 10^10
+      0x42374876e8000000, // x = 10^11
+      0x426d1a94a2000000, // x = 10^12
+      0x42a2309ce5400000, // x = 10^13
+      0x42d6bcc41e900000, // x = 10^14
+      0x430c6bf526340000, // x = 10^15
+      0x4341c37937e08000, // x = 10^16
+      0x4376345785d8a000, // x = 10^17
+      0x43abc16d674ec800, // x = 10^18
+      0x43e158e460913d00, // x = 10^19
+      0x4415af1d78b58c40, // x = 10^20
+      0x444b1ae4d6e2ef50, // x = 10^21
+      0x4480f0cf064dd592, // x = 10^22
+      0x3fefffffffef06ad, 0x3fefde0f22c7d0eb, 0x225e7812faadb32f,
+      0x3fee1076964c2903, 0x3fdfe93fff7fceb0, 0x3ff012631ad8df10,
+      0x3fefbfdaa448ed98, 0x3fd00a8cefe9a5f8, 0x3fd0b4d870eb22f8,
+      0x3c90c40cef04efb5, 0x449d2ccad399848e, 0x4aa12ccdffd9d2ec,
+      0x5656f070b92d36ce, 0x6db06dcb74f76bcc, 0x7f1954e72ffd4596,
+      0x5671e2f1628093e4, 0x73dac56e2bf1a951, 0x8001bc6879ea14c5,
+  };
+  for (int i = 0; i < N; ++i) {
+    double x = double(FPBits(INPUTS[i]));
+    EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Log1p, x,
+                                   __llvm_libc::log1p(x), 0.5);
+  }
+}
+
+TEST(LlvmLibcLog1pTest, AllExponents) {
+  double x = 0x1.0p-1074;
+  for (int i = -1074; i < 1024; ++i, x *= 2.0) {
+    ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Log1p, x,
+                                   __llvm_libc::log1p(x), 0.5);
+  }
+}
+
+TEST(LlvmLibcLog1pTest, InDoubleRange) {
+  constexpr uint64_t COUNT = 1234561;
+
+  auto test = [&](uint64_t start, uint64_t stop,
+                  mpfr::RoundingMode rounding_mode) {
+    mpfr::ForceRoundingMode __r(rounding_mode);
+    uint64_t fails = 0;
+    uint64_t count = 0;
+    uint64_t cc = 0;
+    double mx, mr = 0.0;
+    double tol = 0.5;
+
+    uint64_t step = (stop - start) / COUNT;
+
+    for (uint64_t i = 0, v = start; i <= COUNT; ++i, v += step) {
+      double x = FPBits(v).get_val();
+      if (isnan(x) || isinf(x) || x < 0.0)
+        continue;
+      libc_errno = 0;
+      double result = __llvm_libc::log1p(x);
+      ++cc;
+      if (isnan(result) || isinf(result))
+        continue;
+
+      ++count;
+      // ASSERT_MPFR_MATCH(mpfr::Operation::Log1p, x, result, 0.5);
+      if (!EXPECT_MPFR_MATCH_ROUNDING_SILENTLY(mpfr::Operation::Log1p, x,
+                                               result, 0.5, rounding_mode)) {
+        ++fails;
+        while (!EXPECT_MPFR_MATCH_ROUNDING_SILENTLY(
+            mpfr::Operation::Log1p, x, result, tol, rounding_mode)) {
+          mx = x;
+          mr = result;
+          tol *= 2.0;
+        }
+      }
+    }
+    tlog << " Log1p failed: " << fails << "/" << count << "/" << cc
+         << " tests.\n";
+    tlog << "   Max ULPs is at most: " << static_cast<uint64_t>(tol) << ".\n";
+    if (fails) {
+      EXPECT_MPFR_MATCH(mpfr::Operation::Log1p, mx, mr, 0.5, rounding_mode);
+    }
+  };
+
+  auto test_all_rounding = [&](uint64_t start, uint64_t stop,
+                               const char *start_str, const char *stop_str) {
+    tlog << "\n=== Test in range [" << start_str << ", " << stop_str
+         << "] ===\n";
+
+    tlog << "\n Test Rounding To Nearest...\n";
+    test(start, stop, mpfr::RoundingMode::Nearest);
+
+    tlog << "\n Test Rounding Downward...\n";
+    test(start, stop, mpfr::RoundingMode::Downward);
+
+    tlog << "\n Test Rounding Upward...\n";
+    test(start, stop, mpfr::RoundingMode::Upward);
+
+    tlog << "\n Test Rounding Toward Zero...\n";
+    test(start, stop, mpfr::RoundingMode::TowardZero);
+  };
+
+  test_all_rounding(0x0000'0000'0000'0001ULL, 0x0010'0000'0000'0000ULL,
+                    "2^-1074", "2^-1022");
+
+  test_all_rounding(0x39B0'0000'0000'0000ULL, 0x3A50'0000'0000'0000ULL,
+                    "2^-100", "2^-90");
+
+  test_all_rounding(0x3CD0'0000'0000'0000ULL, 0x3D20'0000'0000'0000ULL, "2^-50",
+                    "2^-45");
+
+  test_all_rounding(0x3E10'0000'0000'0000ULL, 0x3E40'0000'0000'0000ULL, "2^-30",
+                    "2^-27");
+
+  test_all_rounding(0x3FD0'0000'0000'0000ULL, 0x4010'0000'0000'0000ULL, "0.25",
+                    "4.0");
+
+  test_all_rounding(0x4630'0000'0000'0000ULL, 0x4670'0000'0000'0000ULL, "2^100",
+                    "2^104");
+
+  test_all_rounding(0x7FD0'0000'0000'0000ULL, 0x7FF0'0000'0000'0000ULL,
+                    "2^1022", "2^1024");
+}