fixup! [libc][math] Optimize nearest integer functions using builtins when available

overmighty · overmighty · commit fddf0188419e · 2024-07-15T15:27:28.000+02:00
Optimize rint{,f,f16} using __builtin_rint{,f} when available.
diff --git a/libc/cmake/modules/CheckCompilerFeatures.cmake b/libc/cmake/modules/CheckCompilerFeatures.cmake
@@ -4,7 +4,7 @@
 
 set(
   ALL_COMPILER_FEATURES
-    "builtin_ceil_floor_trunc"
+    "builtin_ceil_floor_rint_trunc"
     "builtin_round"
     "builtin_roundeven"
     "float16"
@@ -76,8 +76,8 @@ foreach(feature IN LISTS ALL_COMPILER_FEATURES)
       set(LIBC_TYPES_HAS_FLOAT128 TRUE)
     elseif(${feature} STREQUAL "fixed_point")
       set(LIBC_COMPILER_HAS_FIXED_POINT TRUE)
-    elseif(${feature} STREQUAL "builtin_ceil_floor_trunc")
-      set(LIBC_COMPILER_HAS_BUILTIN_CEIL_FLOOR_TRUNC TRUE)
+    elseif(${feature} STREQUAL "builtin_ceil_floor_rint_trunc")
+      set(LIBC_COMPILER_HAS_BUILTIN_CEIL_FLOOR_RINT_TRUNC TRUE)
     elseif(${feature} STREQUAL "builtin_round")
       set(LIBC_COMPILER_HAS_BUILTIN_ROUND TRUE)
     elseif(${feature} STREQUAL "builtin_roundeven")
diff --git a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
@@ -23,8 +23,9 @@ function(_get_compile_options_from_flags output_var)
         # have SSE4.1.
         list(APPEND compile_options "-msse4.2")
       endif()
-      if(LIBC_COMPILER_HAS_BUILTIN_CEIL_FLOOR_TRUNC)
-        list(APPEND compile_options "-D__LIBC_USE_BUILTIN_CEIL_FLOOR_TRUNC")
+      if(LIBC_COMPILER_HAS_BUILTIN_CEIL_FLOOR_RINT_TRUNC)
+        list(APPEND compile_options
+             "-D__LIBC_USE_BUILTIN_CEIL_FLOOR_RINT_TRUNC")
       endif()
       if(LIBC_COMPILER_HAS_BUILTIN_ROUND)
         list(APPEND compile_options "-D__LIBC_USE_BUILTIN_ROUND")
diff --git a/libc/cmake/modules/compiler_features/check_builtin_ceil_floor_rint_trunc.cpp b/libc/cmake/modules/compiler_features/check_builtin_ceil_floor_rint_trunc.cpp
@@ -1,9 +1,11 @@
 float try_builtin_ceilf(float x) { return __builtin_ceilf(x); }
 float try_builtin_floorf(float x) { return __builtin_floorf(x); }
+float try_builtin_rintf(float x) { return __builtin_rintf(x); }
 float try_builtin_truncf(float x) { return __builtin_truncf(x); }
 
 double try_builtin_ceil(double x) { return __builtin_ceil(x); }
 double try_builtin_floor(double x) { return __builtin_floor(x); }
+double try_builtin_rint(double x) { return __builtin_rint(x); }
 double try_builtin_trunc(double x) { return __builtin_trunc(x); }
 
 int main() {}
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
@@ -862,6 +862,8 @@ add_entrypoint_object(
     -O3
   DEPENDS
     libc.src.__support.FPUtil.nearest_integer_operations
+  FLAGS
+    ROUND_OPT
 )
 
 add_entrypoint_object(
@@ -874,6 +876,8 @@ add_entrypoint_object(
     -O3
   DEPENDS
     libc.src.__support.FPUtil.nearest_integer_operations
+  FLAGS
+    ROUND_OPT
 )
 
 add_entrypoint_object(
@@ -899,6 +903,9 @@ add_entrypoint_object(
   DEPENDS
     libc.src.__support.macros.properties.types
     libc.src.__support.FPUtil.nearest_integer_operations
+    libc.src.__support.macros.properties.architectures
+  FLAGS
+    ROUND_OPT
 )
 
 add_entrypoint_object(
diff --git a/libc/src/math/generic/ceil.cpp b/libc/src/math/generic/ceil.cpp
@@ -14,7 +14,7 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(double, ceil, (double x)) {
-#ifdef __LIBC_USE_BUILTIN_CEIL_FLOOR_TRUNC
+#ifdef __LIBC_USE_BUILTIN_CEIL_FLOOR_RINT_TRUNC
   return __builtin_ceil(x);
 #else
   return fputil::ceil(x);
diff --git a/libc/src/math/generic/ceilf.cpp b/libc/src/math/generic/ceilf.cpp
@@ -14,7 +14,7 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(float, ceilf, (float x)) {
-#ifdef __LIBC_USE_BUILTIN_CEIL_FLOOR_TRUNC
+#ifdef __LIBC_USE_BUILTIN_CEIL_FLOOR_RINT_TRUNC
   return __builtin_ceilf(x);
 #else
   return fputil::ceil(x);
diff --git a/libc/src/math/generic/ceilf16.cpp b/libc/src/math/generic/ceilf16.cpp
@@ -15,7 +15,7 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(float16, ceilf16, (float16 x)) {
-#if defined(__LIBC_USE_BUILTIN_CEIL_FLOOR_TRUNC) &&                            \
+#if defined(__LIBC_USE_BUILTIN_CEIL_FLOOR_RINT_TRUNC) &&                       \
     defined(LIBC_TARGET_ARCH_IS_AARCH64)
   return static_cast<float16>(__builtin_ceilf(x));
 #else
diff --git a/libc/src/math/generic/floor.cpp b/libc/src/math/generic/floor.cpp
@@ -14,7 +14,7 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(double, floor, (double x)) {
-#ifdef __LIBC_USE_BUILTIN_CEIL_FLOOR_TRUNC
+#ifdef __LIBC_USE_BUILTIN_CEIL_FLOOR_RINT_TRUNC
   return __builtin_floor(x);
 #else
   return fputil::floor(x);
diff --git a/libc/src/math/generic/floorf.cpp b/libc/src/math/generic/floorf.cpp
@@ -14,7 +14,7 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(float, floorf, (float x)) {
-#ifdef __LIBC_USE_BUILTIN_CEIL_FLOOR_TRUNC
+#ifdef __LIBC_USE_BUILTIN_CEIL_FLOOR_RINT_TRUNC
   return __builtin_floorf(x);
 #else
   return fputil::floor(x);
diff --git a/libc/src/math/generic/floorf16.cpp b/libc/src/math/generic/floorf16.cpp
@@ -15,7 +15,7 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(float16, floorf16, (float16 x)) {
-#if defined(__LIBC_USE_BUILTIN_CEIL_FLOOR_TRUNC) &&                            \
+#if defined(__LIBC_USE_BUILTIN_CEIL_FLOOR_RINT_TRUNC) &&                       \
     defined(LIBC_TARGET_ARCH_IS_AARCH64)
   return static_cast<float16>(__builtin_floorf(x));
 #else
diff --git a/libc/src/math/generic/rint.cpp b/libc/src/math/generic/rint.cpp
@@ -14,7 +14,11 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(double, rint, (double x)) {
+#ifdef __LIBC_USE_BUILTIN_CEIL_FLOOR_RINT_TRUNC
+  return __builtin_rint(x);
+#else
   return fputil::round_using_current_rounding_mode(x);
+#endif
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/rintf.cpp b/libc/src/math/generic/rintf.cpp
@@ -14,7 +14,11 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(float, rintf, (float x)) {
+#ifdef __LIBC_USE_BUILTIN_CEIL_FLOOR_RINT_TRUNC
+  return __builtin_rintf(x);
+#else
   return fputil::round_using_current_rounding_mode(x);
+#endif
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/rintf16.cpp b/libc/src/math/generic/rintf16.cpp
@@ -10,11 +10,17 @@
 #include "src/__support/FPUtil/NearestIntegerOperations.h"
 #include "src/__support/common.h"
 #include "src/__support/macros/config.h"
+#include "src/__support/macros/properties/architectures.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(float16, rintf16, (float16 x)) {
+#if defined(__LIBC_USE_BUILTIN_CEIL_FLOOR_RINT_TRUNC) &&                       \
+    defined(LIBC_TARGET_ARCH_IS_AARCH64)
+  return static_cast<float16>(__builtin_rintf(x));
+#else
   return fputil::round_using_current_rounding_mode(x);
+#endif
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/trunc.cpp b/libc/src/math/generic/trunc.cpp
@@ -14,7 +14,7 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(double, trunc, (double x)) {
-#ifdef __LIBC_USE_BUILTIN_CEIL_FLOOR_TRUNC
+#ifdef __LIBC_USE_BUILTIN_CEIL_FLOOR_RINT_TRUNC
   return __builtin_trunc(x);
 #else
   return fputil::trunc(x);
diff --git a/libc/src/math/generic/truncf.cpp b/libc/src/math/generic/truncf.cpp
@@ -14,7 +14,7 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(float, truncf, (float x)) {
-#ifdef __LIBC_USE_BUILTIN_CEIL_FLOOR_TRUNC
+#ifdef __LIBC_USE_BUILTIN_CEIL_FLOOR_RINT_TRUNC
   return __builtin_truncf(x);
 #else
   return fputil::trunc(x);
diff --git a/libc/src/math/generic/truncf16.cpp b/libc/src/math/generic/truncf16.cpp
@@ -15,7 +15,7 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(float16, truncf16, (float16 x)) {
-#if defined(__LIBC_USE_BUILTIN_CEIL_FLOOR_TRUNC) &&                            \
+#if defined(__LIBC_USE_BUILTIN_CEIL_FLOOR_RINT_TRUNC) &&                       \
     defined(LIBC_TARGET_ARCH_IS_AARCH64)
   return static_cast<float16>(__builtin_truncf(x));
 #else
diff --git a/libc/test/src/math/performance_testing/CMakeLists.txt b/libc/test/src/math/performance_testing/CMakeLists.txt
@@ -21,7 +21,7 @@ function(add_perf_binary target_name)
     "PERF"
     "" # No optional arguments
     "SUITE;CXX_STANDARD" # Single value arguments
-    "SRCS;HDRS;DEPENDS;COMPILE_OPTIONS" # Multi-value arguments
+    "SRCS;HDRS;DEPENDS;COMPILE_OPTIONS;LINK_LIBRARIES" # Multi-value arguments
     ${ARGN}
   )
   if(NOT PERF_SRCS)
@@ -64,9 +64,13 @@ function(add_perf_binary target_name)
     )
   endif()
 
+  set(link_libraries ${link_object_files})
+  foreach(lib IN LISTS PERF_LINK_LIBRARIES)
+    list(APPEND link_libraries ${lib}.unit)
+  endforeach()
   target_link_libraries(
       ${fq_target_name}
-      PRIVATE ${link_object_files} libc_diff_test_utils)
+      PRIVATE ${link_libraries} libc_diff_test_utils)
 
   set_target_properties(${fq_target_name}
     PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
@@ -385,6 +389,8 @@ add_perf_binary(
     libc.src.math.ceilf16
     libc.src.math.floorf
     libc.src.math.floorf16
+    libc.src.math.rintf
+    libc.src.math.rintf16
     libc.src.math.roundevenf
     libc.src.math.roundevenf16
     libc.src.math.roundf
@@ -393,4 +399,6 @@ add_perf_binary(
     libc.src.math.truncf16
   COMPILE_OPTIONS
     -fno-builtin
+  LINK_LIBRARIES
+    LibcFPTestHelpers
 )
diff --git a/libc/test/src/math/performance_testing/nearest_integer_funcs_perf.cpp b/libc/test/src/math/performance_testing/nearest_integer_funcs_perf.cpp
@@ -11,17 +11,23 @@
 #include "src/math/ceilf16.h"
 #include "src/math/floorf.h"
 #include "src/math/floorf16.h"
+#include "src/math/rintf.h"
+#include "src/math/rintf16.h"
 #include "src/math/roundevenf.h"
 #include "src/math/roundevenf16.h"
 #include "src/math/roundf.h"
 #include "src/math/roundf16.h"
 #include "src/math/truncf.h"
 #include "src/math/truncf16.h"
+#include "test/UnitTest/RoundingModeUtils.h"
 #include "test/src/math/performance_testing/Timer.h"
 
 #include <fstream>
 #include <math.h>
 
+using LIBC_NAMESPACE::fputil::testing::ForceRoundingMode;
+using LIBC_NAMESPACE::fputil::testing::RoundingMode;
+
 namespace LIBC_NAMESPACE::testing {
 
 template <typename T> class NearestIntegerPerf {
@@ -164,5 +170,30 @@ int main() {
   NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::truncf, ::truncf, FLOAT_ROUNDS,
                        "truncf_perf.log")
 
+  if (ForceRoundingMode r(RoundingMode::Upward); r.success) {
+    NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::rintf16, ::placeholderf16,
+                         FLOAT16_ROUNDS, "rintf16_upward_perf.log")
+    NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::rintf, ::rintf, FLOAT_ROUNDS,
+                         "rintf_upward_perf.log")
+  }
+  if (ForceRoundingMode r(RoundingMode::Downward); r.success) {
+    NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::rintf16, ::placeholderf16,
+                         FLOAT16_ROUNDS, "rintf16_downward_perf.log")
+    NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::rintf, ::rintf, FLOAT_ROUNDS,
+                         "rintf_downward_perf.log")
+  }
+  if (ForceRoundingMode r(RoundingMode::TowardZero); r.success) {
+    NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::rintf16, ::placeholderf16,
+                         FLOAT16_ROUNDS, "rintf16_towardzero_perf.log")
+    NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::rintf, ::rintf, FLOAT_ROUNDS,
+                         "rintf_towardzero_perf.log")
+  }
+  if (ForceRoundingMode r(RoundingMode::Nearest); r.success) {
+    NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::rintf16, ::placeholderf16,
+                         FLOAT16_ROUNDS, "rintf16_nearest_perf.log")
+    NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::rintf, ::rintf, FLOAT_ROUNDS,
+                         "rintf_nearest_perf.log")
+  }
+
   return 0;
 }