Skip to content

Commit 81ce796

Browse files
authored
[libc][math][c23] Enable C23 _Float16 math functions on GPUs (#99248)
1 parent cc4f989 commit 81ce796

File tree

11 files changed

+121
-27
lines changed

11 files changed

+121
-27
lines changed

libc/cmake/modules/CheckCompilerFeatures.cmake

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,12 @@ set(
1717
# Making sure ALL_COMPILER_FEATURES is sorted.
1818
list(SORT ALL_COMPILER_FEATURES)
1919

20+
# Compiler features that are unavailable on GPU targets with the in-tree Clang.
21+
set(
22+
CPU_ONLY_COMPILER_FEATURES
23+
"float128"
24+
)
25+
2026
# Function to check whether the compiler supports the provided set of features.
2127
# Usage:
2228
# compiler_supports(
@@ -67,13 +73,26 @@ foreach(feature IN LISTS ALL_COMPILER_FEATURES)
6773
set(CMAKE_TRY_COMPILE_TARGET_TYPE EXECUTABLE)
6874
endif()
6975

70-
try_compile(
71-
has_feature
72-
${CMAKE_CURRENT_BINARY_DIR}/compiler_features
73-
SOURCES ${LIBC_SOURCE_DIR}/cmake/modules/compiler_features/check_${feature}.cpp
74-
COMPILE_DEFINITIONS -I${LIBC_SOURCE_DIR} ${compile_options}
75-
LINK_OPTIONS ${link_options}
76-
)
76+
if(LIBC_TARGET_OS_IS_GPU)
77+
# CUDA shouldn't be required to build the libc, only to test it, so we can't
78+
# try to build CUDA binaries here. Since GPU builds are always compiled with
79+
# the in-tree Clang, we just hardcode which compiler features are available
80+
# when targeting GPUs.
81+
if(feature IN_LIST CPU_ONLY_COMPILER_FEATURES)
82+
set(has_feature FALSE)
83+
else()
84+
set(has_feature TRUE)
85+
endif()
86+
else()
87+
try_compile(
88+
has_feature
89+
${CMAKE_CURRENT_BINARY_DIR}/compiler_features
90+
SOURCES ${LIBC_SOURCE_DIR}/cmake/modules/compiler_features/check_${feature}.cpp
91+
COMPILE_DEFINITIONS -I${LIBC_SOURCE_DIR} ${compile_options}
92+
LINK_OPTIONS ${link_options}
93+
)
94+
endif()
95+
7796
if(has_feature)
7897
list(APPEND AVAILABLE_COMPILER_FEATURES ${feature})
7998
if(${feature} STREQUAL "float16")

libc/cmake/modules/LLVMLibCFlagRules.cmake

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -279,8 +279,10 @@ if(NOT(LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "SSE2")))
279279
set(SKIP_FLAG_EXPANSION_EXPLICIT_SIMD_OPT TRUE)
280280
endif()
281281

282-
# Skip ROUND_OPT flag for targets that don't support SSE 4.2.
282+
# Skip ROUND_OPT flag for targets that don't support rounding instructions. On
283+
# x86, these are SSE4.1 instructions, but we already had code to check for
284+
# SSE4.2 support.
283285
if(NOT((LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "SSE4_2")) OR
284-
LIBC_TARGET_ARCHITECTURE_IS_AARCH64))
286+
LIBC_TARGET_ARCHITECTURE_IS_AARCH64 OR LIBC_TARGET_OS_IS_GPU))
285287
set(SKIP_FLAG_EXPANSION_ROUND_OPT TRUE)
286288
endif()

libc/config/gpu/entrypoints.txt

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,74 @@ set(TARGET_LIBM_ENTRYPOINTS
348348
libc.src.math.truncf
349349
)
350350

351+
if(LIBC_TYPES_HAS_FLOAT16)
352+
list(APPEND TARGET_LIBM_ENTRYPOINTS
353+
# math.h C23 _Float16 entrypoints
354+
libc.src.math.canonicalizef16
355+
libc.src.math.ceilf16
356+
libc.src.math.copysignf16
357+
libc.src.math.f16add
358+
libc.src.math.f16addf
359+
libc.src.math.f16div
360+
libc.src.math.f16divf
361+
libc.src.math.f16fma
362+
libc.src.math.f16fmaf
363+
libc.src.math.f16mul
364+
libc.src.math.f16mulf
365+
libc.src.math.f16sqrt
366+
libc.src.math.f16sqrtf
367+
libc.src.math.f16sub
368+
libc.src.math.f16subf
369+
libc.src.math.fabsf16
370+
libc.src.math.fdimf16
371+
libc.src.math.floorf16
372+
libc.src.math.fmaxf16
373+
libc.src.math.fmaximum_mag_numf16
374+
libc.src.math.fmaximum_magf16
375+
libc.src.math.fmaximum_numf16
376+
libc.src.math.fmaximumf16
377+
libc.src.math.fminf16
378+
libc.src.math.fminimum_mag_numf16
379+
libc.src.math.fminimum_magf16
380+
libc.src.math.fminimum_numf16
381+
libc.src.math.fminimumf16
382+
libc.src.math.fmodf16
383+
libc.src.math.frexpf16
384+
libc.src.math.fromfpf16
385+
libc.src.math.fromfpxf16
386+
libc.src.math.getpayloadf16
387+
libc.src.math.ilogbf16
388+
libc.src.math.ldexpf16
389+
libc.src.math.llogbf16
390+
libc.src.math.llrintf16
391+
libc.src.math.llroundf16
392+
libc.src.math.logbf16
393+
libc.src.math.lrintf16
394+
libc.src.math.lroundf16
395+
libc.src.math.modff16
396+
libc.src.math.nanf16
397+
libc.src.math.nearbyintf16
398+
libc.src.math.nextafterf16
399+
libc.src.math.nextdownf16
400+
libc.src.math.nexttowardf16
401+
libc.src.math.nextupf16
402+
libc.src.math.remainderf16
403+
libc.src.math.remquof16
404+
libc.src.math.rintf16
405+
libc.src.math.roundevenf16
406+
libc.src.math.roundf16
407+
libc.src.math.scalblnf16
408+
libc.src.math.scalbnf16
409+
libc.src.math.setpayloadf16
410+
libc.src.math.setpayloadsigf16
411+
libc.src.math.totalorderf16
412+
libc.src.math.totalordermagf16
413+
libc.src.math.truncf16
414+
libc.src.math.ufromfpf16
415+
libc.src.math.ufromfpxf16
416+
)
417+
endif()
418+
351419
set(TARGET_LLVMLIBC_ENTRYPOINTS
352420
${TARGET_LIBC_ENTRYPOINTS}
353421
${TARGET_LIBM_ENTRYPOINTS}

libc/src/__support/macros/properties/cpu_features.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,4 +53,8 @@
5353
#define LIBC_TARGET_CPU_HAS_NEAREST_INT
5454
#endif
5555

56+
#if defined(LIBC_TARGET_ARCH_IS_AARCH64) || defined(LIBC_TARGET_ARCH_IS_GPU)
57+
#define LIBC_TARGET_CPU_HAS_FAST_FLOAT16_OPS
58+
#endif
59+
5660
#endif // LLVM_LIBC_SRC___SUPPORT_MACROS_PROPERTIES_CPU_FEATURES_H

libc/src/math/generic/CMakeLists.txt

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ add_entrypoint_object(
111111
DEPENDS
112112
libc.src.__support.macros.properties.types
113113
libc.src.__support.FPUtil.nearest_integer_operations
114-
libc.src.__support.macros.properties.architectures
114+
libc.src.__support.macros.properties.cpu_features
115115
FLAGS
116116
ROUND_OPT
117117
)
@@ -548,7 +548,7 @@ add_entrypoint_object(
548548
DEPENDS
549549
libc.src.__support.macros.properties.types
550550
libc.src.__support.FPUtil.nearest_integer_operations
551-
libc.src.__support.macros.properties.architectures
551+
libc.src.__support.macros.properties.cpu_features
552552
FLAGS
553553
ROUND_OPT
554554
)
@@ -617,7 +617,7 @@ add_entrypoint_object(
617617
DEPENDS
618618
libc.src.__support.macros.properties.types
619619
libc.src.__support.FPUtil.nearest_integer_operations
620-
libc.src.__support.macros.properties.architectures
620+
libc.src.__support.macros.properties.cpu_features
621621
FLAGS
622622
ROUND_OPT
623623
)
@@ -686,7 +686,7 @@ add_entrypoint_object(
686686
DEPENDS
687687
libc.src.__support.macros.properties.types
688688
libc.src.__support.FPUtil.nearest_integer_operations
689-
libc.src.__support.macros.properties.architectures
689+
libc.src.__support.macros.properties.cpu_features
690690
FLAGS
691691
ROUND_OPT
692692
)
@@ -755,7 +755,7 @@ add_entrypoint_object(
755755
DEPENDS
756756
libc.src.__support.macros.properties.types
757757
libc.src.__support.FPUtil.nearest_integer_operations
758-
libc.src.__support.macros.properties.architectures
758+
libc.src.__support.macros.properties.cpu_features
759759
FLAGS
760760
ROUND_OPT
761761
)
@@ -948,7 +948,7 @@ add_entrypoint_object(
948948
DEPENDS
949949
libc.src.__support.macros.properties.types
950950
libc.src.__support.FPUtil.nearest_integer_operations
951-
libc.src.__support.macros.properties.architectures
951+
libc.src.__support.macros.properties.cpu_features
952952
FLAGS
953953
ROUND_OPT
954954
)

libc/src/math/generic/ceilf16.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,13 @@
1010
#include "src/__support/FPUtil/NearestIntegerOperations.h"
1111
#include "src/__support/common.h"
1212
#include "src/__support/macros/config.h"
13-
#include "src/__support/macros/properties/architectures.h"
13+
#include "src/__support/macros/properties/cpu_features.h"
1414

1515
namespace LIBC_NAMESPACE_DECL {
1616

1717
LLVM_LIBC_FUNCTION(float16, ceilf16, (float16 x)) {
1818
#if defined(__LIBC_USE_BUILTIN_CEIL_FLOOR_RINT_TRUNC) && \
19-
defined(LIBC_TARGET_ARCH_IS_AARCH64)
19+
defined(LIBC_TARGET_CPU_HAS_FAST_FLOAT16_OPS)
2020
return static_cast<float16>(__builtin_ceilf(x));
2121
#else
2222
return fputil::ceil(x);

libc/src/math/generic/floorf16.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,13 @@
1010
#include "src/__support/FPUtil/NearestIntegerOperations.h"
1111
#include "src/__support/common.h"
1212
#include "src/__support/macros/config.h"
13-
#include "src/__support/macros/properties/architectures.h"
13+
#include "src/__support/macros/properties/cpu_features.h"
1414

1515
namespace LIBC_NAMESPACE_DECL {
1616

1717
LLVM_LIBC_FUNCTION(float16, floorf16, (float16 x)) {
1818
#if defined(__LIBC_USE_BUILTIN_CEIL_FLOOR_RINT_TRUNC) && \
19-
defined(LIBC_TARGET_ARCH_IS_AARCH64)
19+
defined(LIBC_TARGET_CPU_HAS_FAST_FLOAT16_OPS)
2020
return static_cast<float16>(__builtin_floorf(x));
2121
#else
2222
return fputil::floor(x);

libc/src/math/generic/rintf16.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,13 @@
1010
#include "src/__support/FPUtil/NearestIntegerOperations.h"
1111
#include "src/__support/common.h"
1212
#include "src/__support/macros/config.h"
13-
#include "src/__support/macros/properties/architectures.h"
13+
#include "src/__support/macros/properties/cpu_features.h"
1414

1515
namespace LIBC_NAMESPACE_DECL {
1616

1717
LLVM_LIBC_FUNCTION(float16, rintf16, (float16 x)) {
1818
#if defined(__LIBC_USE_BUILTIN_CEIL_FLOOR_RINT_TRUNC) && \
19-
defined(LIBC_TARGET_ARCH_IS_AARCH64)
19+
defined(LIBC_TARGET_CPU_HAS_FAST_FLOAT16_OPS)
2020
return static_cast<float16>(__builtin_rintf(x));
2121
#else
2222
return fputil::round_using_current_rounding_mode(x);

libc/src/math/generic/roundevenf16.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,13 @@
1010
#include "src/__support/FPUtil/NearestIntegerOperations.h"
1111
#include "src/__support/common.h"
1212
#include "src/__support/macros/config.h"
13-
#include "src/__support/macros/properties/architectures.h"
13+
#include "src/__support/macros/properties/cpu_features.h"
1414

1515
namespace LIBC_NAMESPACE_DECL {
1616

1717
LLVM_LIBC_FUNCTION(float16, roundevenf16, (float16 x)) {
1818
#if defined(__LIBC_USE_BUILTIN_ROUNDEVEN) && \
19-
defined(LIBC_TARGET_ARCH_IS_AARCH64)
19+
defined(LIBC_TARGET_CPU_HAS_FAST_FLOAT16_OPS)
2020
return static_cast<float16>(__builtin_roundevenf(x));
2121
#else
2222
return fputil::round_using_specific_rounding_mode(x, FP_INT_TONEAREST);

libc/src/math/generic/roundf16.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,13 @@
1010
#include "src/__support/FPUtil/NearestIntegerOperations.h"
1111
#include "src/__support/common.h"
1212
#include "src/__support/macros/config.h"
13-
#include "src/__support/macros/properties/architectures.h"
13+
#include "src/__support/macros/properties/cpu_features.h"
1414

1515
namespace LIBC_NAMESPACE_DECL {
1616

1717
LLVM_LIBC_FUNCTION(float16, roundf16, (float16 x)) {
18-
#if defined(__LIBC_USE_BUILTIN_ROUND) && defined(LIBC_TARGET_ARCH_IS_AARCH64)
18+
#if defined(__LIBC_USE_BUILTIN_ROUND) && \
19+
defined(LIBC_TARGET_CPU_HAS_FAST_FLOAT16_OPS)
1920
return static_cast<float16>(__builtin_roundf(x));
2021
#else
2122
return fputil::round(x);

libc/src/math/generic/truncf16.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,13 @@
1010
#include "src/__support/FPUtil/NearestIntegerOperations.h"
1111
#include "src/__support/common.h"
1212
#include "src/__support/macros/config.h"
13-
#include "src/__support/macros/properties/architectures.h"
13+
#include "src/__support/macros/properties/cpu_features.h"
1414

1515
namespace LIBC_NAMESPACE_DECL {
1616

1717
LLVM_LIBC_FUNCTION(float16, truncf16, (float16 x)) {
1818
#if defined(__LIBC_USE_BUILTIN_CEIL_FLOOR_RINT_TRUNC) && \
19-
defined(LIBC_TARGET_ARCH_IS_AARCH64)
19+
defined(LIBC_TARGET_CPU_HAS_FAST_FLOAT16_OPS)
2020
return static_cast<float16>(__builtin_truncf(x));
2121
#else
2222
return fputil::trunc(x);

0 commit comments

Comments
 (0)