Skip to content

[libc][math] Optimize nearest integer functions using builtins when available #98376

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jul 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 28 additions & 2 deletions libc/cmake/modules/CheckCompilerFeatures.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,15 @@
# Compiler features definition and flags
# ------------------------------------------------------------------------------

set(ALL_COMPILER_FEATURES "float16" "float128" "fixed_point")
set(
ALL_COMPILER_FEATURES
"builtin_ceil_floor_rint_trunc"
"builtin_round"
"builtin_roundeven"
"float16"
"float128"
"fixed_point"
)

# Making sure ALL_COMPILER_FEATURES is sorted.
list(SORT ALL_COMPILER_FEATURES)
Expand Down Expand Up @@ -39,18 +47,30 @@ endfunction()
set(AVAILABLE_COMPILER_FEATURES "")

# Try compile a C file to check if flag is supported.
set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
foreach(feature IN LISTS ALL_COMPILER_FEATURES)
set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
set(compile_options ${LIBC_COMPILE_OPTIONS_NATIVE})
set(link_options "")
if(${feature} STREQUAL "fixed_point")
list(APPEND compile_options "-ffixed-point")
elseif(${feature} MATCHES "^builtin_")
set(compile_options ${LIBC_COMPILE_OPTIONS_DEFAULT})
set(link_options -nostdlib)
# The compiler might handle calls to rounding builtins by generating calls
# to the respective libc math functions, in which case we cannot use these
# builtins in our implementations of these functions. We check that this is
# not the case by trying to link an executable, since linking would fail due
# to unresolved references with -nostdlib if calls to libc functions were
# generated.
set(CMAKE_TRY_COMPILE_TARGET_TYPE EXECUTABLE)
endif()

try_compile(
has_feature
${CMAKE_CURRENT_BINARY_DIR}/compiler_features
SOURCES ${LIBC_SOURCE_DIR}/cmake/modules/compiler_features/check_${feature}.cpp
COMPILE_DEFINITIONS -I${LIBC_SOURCE_DIR} ${compile_options}
LINK_OPTIONS ${link_options}
)
if(has_feature)
list(APPEND AVAILABLE_COMPILER_FEATURES ${feature})
Expand All @@ -60,6 +80,12 @@ foreach(feature IN LISTS ALL_COMPILER_FEATURES)
set(LIBC_TYPES_HAS_FLOAT128 TRUE)
elseif(${feature} STREQUAL "fixed_point")
set(LIBC_COMPILER_HAS_FIXED_POINT TRUE)
elseif(${feature} STREQUAL "builtin_ceil_floor_rint_trunc")
set(LIBC_COMPILER_HAS_BUILTIN_CEIL_FLOOR_RINT_TRUNC TRUE)
elseif(${feature} STREQUAL "builtin_round")
set(LIBC_COMPILER_HAS_BUILTIN_ROUND TRUE)
elseif(${feature} STREQUAL "builtin_roundeven")
set(LIBC_COMPILER_HAS_BUILTIN_ROUNDEVEN TRUE)
endif()
endif()
endforeach()
Expand Down
21 changes: 18 additions & 3 deletions libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ function(_get_compile_options_from_flags output_var)
if(LIBC_TARGET_ARCHITECTURE_IS_RISCV64 OR(LIBC_CPU_FEATURES MATCHES "FMA"))
check_flag(ADD_FMA_FLAG ${FMA_OPT_FLAG} ${ARGN})
endif()
check_flag(ADD_SSE4_2_FLAG ${ROUND_OPT_FLAG} ${ARGN})
check_flag(ADD_ROUND_OPT_FLAG ${ROUND_OPT_FLAG} ${ARGN})
check_flag(ADD_EXPLICIT_SIMD_OPT_FLAG ${EXPLICIT_SIMD_OPT_FLAG} ${ARGN})

if(LLVM_COMPILER_IS_GCC_COMPATIBLE)
Expand All @@ -16,8 +16,23 @@ function(_get_compile_options_from_flags output_var)
list(APPEND compile_options "-D__LIBC_RISCV_USE_FMA")
endif()
endif()
if(ADD_SSE4_2_FLAG)
list(APPEND compile_options "-msse4.2")
if(ADD_ROUND_OPT_FLAG)
if(LIBC_TARGET_ARCHITECTURE_IS_X86)
# ROUND_OPT_FLAG is only enabled if SSE4.2 is detected, not just SSE4.1,
# because there was code to check for SSE4.2 already, and few CPUs only
# have SSE4.1.
list(APPEND compile_options "-msse4.2")
endif()
if(LIBC_COMPILER_HAS_BUILTIN_CEIL_FLOOR_RINT_TRUNC)
list(APPEND compile_options
"-D__LIBC_USE_BUILTIN_CEIL_FLOOR_RINT_TRUNC")
endif()
if(LIBC_COMPILER_HAS_BUILTIN_ROUND)
list(APPEND compile_options "-D__LIBC_USE_BUILTIN_ROUND")
endif()
if(LIBC_COMPILER_HAS_BUILTIN_ROUNDEVEN)
list(APPEND compile_options "-D__LIBC_USE_BUILTIN_ROUNDEVEN")
endif()
endif()
if(ADD_EXPLICIT_SIMD_OPT_FLAG)
list(APPEND compile_options "-D__LIBC_EXPLICIT_SIMD_OPT")
Expand Down
3 changes: 2 additions & 1 deletion libc/cmake/modules/LLVMLibCFlagRules.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@ if(NOT(LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "SSE2")))
endif()

# Skip ROUND_OPT flag for targets that don't support SSE 4.2.
if(NOT(LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "SSE4_2")))
if(NOT((LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "SSE4_2")) OR
LIBC_TARGET_ARCHITECTURE_IS_AARCH64))
Comment on lines +280 to +281
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be nice if we had a CMake formatter. I copied the style from the lines above, but the OR line is longer than 80 chars (just like above).

set(SKIP_FLAG_EXPANSION_ROUND_OPT TRUE)
endif()
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
float try_builtin_ceilf(float x) { return __builtin_ceilf(x); }
float try_builtin_floorf(float x) { return __builtin_floorf(x); }
float try_builtin_rintf(float x) { return __builtin_rintf(x); }
float try_builtin_truncf(float x) { return __builtin_truncf(x); }

double try_builtin_ceil(double x) { return __builtin_ceil(x); }
double try_builtin_floor(double x) { return __builtin_floor(x); }
double try_builtin_rint(double x) { return __builtin_rint(x); }
double try_builtin_trunc(double x) { return __builtin_trunc(x); }

extern "C" void _start() {}
5 changes: 5 additions & 0 deletions libc/cmake/modules/compiler_features/check_builtin_round.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
float try_builtin_roundf(float x) { return __builtin_roundf(x); }

double try_builtin_round(double x) { return __builtin_round(x); }

extern "C" void _start() {}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
float try_builtin_roundevenf(float x) { return __builtin_roundevenf(x); }

double try_builtin_roundeven(double x) { return __builtin_roundeven(x); }

extern "C" void _start() {}
79 changes: 0 additions & 79 deletions libc/src/math/aarch64/CMakeLists.txt

This file was deleted.

21 changes: 0 additions & 21 deletions libc/src/math/aarch64/ceil.cpp

This file was deleted.

21 changes: 0 additions & 21 deletions libc/src/math/aarch64/ceilf.cpp

This file was deleted.

21 changes: 0 additions & 21 deletions libc/src/math/aarch64/floor.cpp

This file was deleted.

21 changes: 0 additions & 21 deletions libc/src/math/aarch64/floorf.cpp

This file was deleted.

21 changes: 0 additions & 21 deletions libc/src/math/aarch64/round.cpp

This file was deleted.

21 changes: 0 additions & 21 deletions libc/src/math/aarch64/roundf.cpp

This file was deleted.

21 changes: 0 additions & 21 deletions libc/src/math/aarch64/trunc.cpp

This file was deleted.

21 changes: 0 additions & 21 deletions libc/src/math/aarch64/truncf.cpp

This file was deleted.

Loading
Loading