Skip to content

Commit e7f8d4b

Browse files
authored
[libc][math] Optimize maximum and minimum functions using builtins when available (#100002)
1 parent caaba2a commit e7f8d4b

File tree

12 files changed

+284
-42
lines changed

12 files changed

+284
-42
lines changed

libc/cmake/modules/CheckCompilerFeatures.cmake

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
set(
66
ALL_COMPILER_FEATURES
77
"builtin_ceil_floor_rint_trunc"
8+
"builtin_fmax_fmin"
9+
"builtin_fmaxf16_fminf16"
810
"builtin_round"
911
"builtin_roundeven"
1012
"float16"
@@ -82,6 +84,10 @@ foreach(feature IN LISTS ALL_COMPILER_FEATURES)
8284
set(LIBC_COMPILER_HAS_FIXED_POINT TRUE)
8385
elseif(${feature} STREQUAL "builtin_ceil_floor_rint_trunc")
8486
set(LIBC_COMPILER_HAS_BUILTIN_CEIL_FLOOR_RINT_TRUNC TRUE)
87+
elseif(${feature} STREQUAL "builtin_fmax_fmin")
88+
set(LIBC_COMPILER_HAS_BUILTIN_FMAX_FMIN TRUE)
89+
elseif(${feature} STREQUAL "builtin_fmaxf16_fminf16")
90+
set(LIBC_COMPILER_HAS_BUILTIN_FMAXF16_FMINF16 TRUE)
8591
elseif(${feature} STREQUAL "builtin_round")
8692
set(LIBC_COMPILER_HAS_BUILTIN_ROUND TRUE)
8793
elseif(${feature} STREQUAL "builtin_roundeven")

libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ if(${LIBC_TARGET_ARCHITECTURE_IS_X86})
99
set(ALL_CPU_FEATURES SSE2 SSE4_2 AVX AVX2 AVX512F AVX512BW FMA)
1010
set(LIBC_COMPILE_OPTIONS_NATIVE -march=native)
1111
elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64})
12+
set(ALL_CPU_FEATURES "FullFP16")
1213
set(LIBC_COMPILE_OPTIONS_NATIVE -mcpu=native)
1314
endif()
1415

libc/cmake/modules/LLVMLibCCompileOptionRules.cmake

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,17 @@ function(_get_compile_options_from_flags output_var)
4040
endif()
4141
if(ADD_MISC_MATH_BASIC_OPS_OPT_FLAG)
4242
list(APPEND compile_options "-D__LIBC_MISC_MATH_BASIC_OPS_OPT")
43+
if(LIBC_COMPILER_HAS_BUILTIN_FMAX_FMIN)
44+
list(APPEND compile_options "-D__LIBC_USE_BUILTIN_FMAX_FMIN")
45+
endif()
46+
if(LIBC_COMPILER_HAS_BUILTIN_FMAXF16_FMINF16)
47+
list(APPEND compile_options "-D__LIBC_USE_BUILTIN_FMAXF16_FMINF16")
48+
endif()
49+
if("FullFP16" IN_LIST LIBC_CPU_FEATURES AND
50+
CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
51+
list(APPEND compile_options
52+
"SHELL:-Xclang -target-feature -Xclang +fullfp16")
53+
endif()
4354
endif()
4455
elseif(MSVC)
4556
if(ADD_FMA_FLAG)
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
float try_builtin_fmaxf(float x, float y) { return __builtin_fmaxf(x, y); }
2+
float try_builtin_fminf(float x, float y) { return __builtin_fminf(x, y); }
3+
4+
double try_builtin_fmaxf(double x, double y) { return __builtin_fmax(x, y); }
5+
double try_builtin_fminf(double x, double y) { return __builtin_fmin(x, y); }
6+
7+
extern "C" void _start() {}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
_Float16 try_builtin_fmaxf16(_Float16 x, _Float16 y) {
2+
return __builtin_fmaxf16(x, y);
3+
}
4+
5+
_Float16 try_builtin_fminf16(_Float16 x, _Float16 y) {
6+
return __builtin_fminf16(x, y);
7+
}
8+
9+
extern "C" void _start() {}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#include "src/__support/macros/properties/cpu_features.h"
2+
3+
#ifndef LIBC_TARGET_CPU_HAS_FULLFP16
4+
#error unsupported
5+
#endif

libc/src/__support/FPUtil/BasicOperations.h

Lines changed: 88 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
#include "src/__support/common.h"
1818
#include "src/__support/macros/config.h"
1919
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
20+
#include "src/__support/macros/properties/architectures.h"
21+
#include "src/__support/macros/properties/types.h"
2022
#include "src/__support/uint128.h"
2123

2224
namespace LIBC_NAMESPACE_DECL {
@@ -27,6 +29,86 @@ LIBC_INLINE T abs(T x) {
2729
return FPBits<T>(x).abs().get_val();
2830
}
2931

32+
namespace internal {
33+
34+
template <typename T>
35+
LIBC_INLINE cpp::enable_if_t<cpp::is_floating_point_v<T>, T> max(T x, T y) {
36+
FPBits<T> x_bits(x);
37+
FPBits<T> y_bits(y);
38+
39+
// To make sure that fmax(+0, -0) == +0 == fmax(-0, +0), whenever x and y
40+
// have different signs and both are not NaNs, we return the number with
41+
// positive sign.
42+
if (x_bits.sign() != y_bits.sign())
43+
return x_bits.is_pos() ? x : y;
44+
return x > y ? x : y;
45+
}
46+
47+
#if defined(__LIBC_USE_BUILTIN_FMAXF16_FMINF16)
48+
template <> LIBC_INLINE float16 max(float16 x, float16 y) {
49+
return __builtin_fmaxf16(x, y);
50+
}
51+
#elif !defined(LIBC_TARGET_ARCH_IS_AARCH64)
52+
template <> LIBC_INLINE float16 max(float16 x, float16 y) {
53+
FPBits<float16> x_bits(x);
54+
FPBits<float16> y_bits(y);
55+
56+
int16_t xi = static_cast<int16_t>(x_bits.uintval());
57+
int16_t yi = static_cast<int16_t>(y_bits.uintval());
58+
return ((xi > yi) != (xi < 0 && yi < 0)) ? x : y;
59+
}
60+
#endif
61+
62+
#if defined(__LIBC_USE_BUILTIN_FMAX_FMIN) && !defined(LIBC_TARGET_ARCH_IS_X86)
63+
template <> LIBC_INLINE float max(float x, float y) {
64+
return __builtin_fmaxf(x, y);
65+
}
66+
67+
template <> LIBC_INLINE double max(double x, double y) {
68+
return __builtin_fmax(x, y);
69+
}
70+
#endif
71+
72+
template <typename T>
73+
LIBC_INLINE cpp::enable_if_t<cpp::is_floating_point_v<T>, T> min(T x, T y) {
74+
FPBits<T> x_bits(x);
75+
FPBits<T> y_bits(y);
76+
77+
// To make sure that fmin(+0, -0) == -0 == fmin(-0, +0), whenever x and y have
78+
// different signs and both are not NaNs, we return the number with negative
79+
// sign.
80+
if (x_bits.sign() != y_bits.sign())
81+
return x_bits.is_neg() ? x : y;
82+
return x < y ? x : y;
83+
}
84+
85+
#if defined(__LIBC_USE_BUILTIN_FMAXF16_FMINF16)
86+
template <> LIBC_INLINE float16 min(float16 x, float16 y) {
87+
return __builtin_fminf16(x, y);
88+
}
89+
#elif !defined(LIBC_TARGET_ARCH_IS_AARCH64)
90+
template <> LIBC_INLINE float16 min(float16 x, float16 y) {
91+
FPBits<float16> x_bits(x);
92+
FPBits<float16> y_bits(y);
93+
94+
int16_t xi = static_cast<int16_t>(x_bits.uintval());
95+
int16_t yi = static_cast<int16_t>(y_bits.uintval());
96+
return ((xi < yi) != (xi < 0 && yi < 0)) ? x : y;
97+
}
98+
#endif
99+
100+
#if defined(__LIBC_USE_BUILTIN_FMAX_FMIN) && !defined(LIBC_TARGET_ARCH_IS_X86)
101+
template <> LIBC_INLINE float min(float x, float y) {
102+
return __builtin_fminf(x, y);
103+
}
104+
105+
template <> LIBC_INLINE double min(double x, double y) {
106+
return __builtin_fmin(x, y);
107+
}
108+
#endif
109+
110+
} // namespace internal
111+
30112
template <typename T, cpp::enable_if_t<cpp::is_floating_point_v<T>, int> = 0>
31113
LIBC_INLINE T fmin(T x, T y) {
32114
const FPBits<T> bitx(x), bity(y);
@@ -35,12 +117,7 @@ LIBC_INLINE T fmin(T x, T y) {
35117
return y;
36118
if (bity.is_nan())
37119
return x;
38-
if (bitx.sign() != bity.sign())
39-
// To make sure that fmin(+0, -0) == -0 == fmin(-0, +0), whenever x and
40-
// y has different signs and both are not NaNs, we return the number
41-
// with negative sign.
42-
return bitx.is_neg() ? x : y;
43-
return x < y ? x : y;
120+
return internal::min(x, y);
44121
}
45122

46123
template <typename T, cpp::enable_if_t<cpp::is_floating_point_v<T>, int> = 0>
@@ -51,12 +128,7 @@ LIBC_INLINE T fmax(T x, T y) {
51128
return y;
52129
if (bity.is_nan())
53130
return x;
54-
if (bitx.sign() != bity.sign())
55-
// To make sure that fmax(+0, -0) == +0 == fmax(-0, +0), whenever x and
56-
// y has different signs and both are not NaNs, we return the number
57-
// with positive sign.
58-
return bitx.is_neg() ? y : x;
59-
return x > y ? x : y;
131+
return internal::max(x, y);
60132
}
61133

62134
template <typename T, cpp::enable_if_t<cpp::is_floating_point_v<T>, int> = 0>
@@ -67,9 +139,7 @@ LIBC_INLINE T fmaximum(T x, T y) {
67139
return x;
68140
if (bity.is_nan())
69141
return y;
70-
if (bitx.sign() != bity.sign())
71-
return (bitx.is_neg() ? y : x);
72-
return x > y ? x : y;
142+
return internal::max(x, y);
73143
}
74144

75145
template <typename T, cpp::enable_if_t<cpp::is_floating_point_v<T>, int> = 0>
@@ -80,9 +150,7 @@ LIBC_INLINE T fminimum(T x, T y) {
80150
return x;
81151
if (bity.is_nan())
82152
return y;
83-
if (bitx.sign() != bity.sign())
84-
return (bitx.is_neg()) ? x : y;
85-
return x < y ? x : y;
153+
return internal::min(x, y);
86154
}
87155

88156
template <typename T, cpp::enable_if_t<cpp::is_floating_point_v<T>, int> = 0>
@@ -97,9 +165,7 @@ LIBC_INLINE T fmaximum_num(T x, T y) {
97165
return y;
98166
if (bity.is_nan())
99167
return x;
100-
if (bitx.sign() != bity.sign())
101-
return (bitx.is_neg() ? y : x);
102-
return x > y ? x : y;
168+
return internal::max(x, y);
103169
}
104170

105171
template <typename T, cpp::enable_if_t<cpp::is_floating_point_v<T>, int> = 0>
@@ -114,9 +180,7 @@ LIBC_INLINE T fminimum_num(T x, T y) {
114180
return y;
115181
if (bity.is_nan())
116182
return x;
117-
if (bitx.sign() != bity.sign())
118-
return (bitx.is_neg() ? x : y);
119-
return x < y ? x : y;
183+
return internal::min(x, y);
120184
}
121185

122186
template <typename T, cpp::enable_if_t<cpp::is_floating_point_v<T>, int> = 0>

libc/src/__support/FPUtil/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,8 @@ add_header_library(
192192
libc.src.__support.uint128
193193
libc.src.__support.common
194194
libc.src.__support.macros.optimization
195+
libc.src.__support.macros.properties.architectures
196+
libc.src.__support.macros.properties.types
195197
)
196198

197199
add_header_library(

libc/src/__support/macros/properties/cpu_features.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@
1414

1515
#include "architectures.h"
1616

17+
#if defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC)
18+
#define LIBC_TARGET_CPU_HAS_FULLFP16
19+
#endif
20+
1721
#if defined(__SSE2__)
1822
#define LIBC_TARGET_CPU_HAS_SSE2
1923
#endif

0 commit comments

Comments
 (0)