Skip to content

Commit c9465af

Browse files
committed
[compiler-rt] Initial support for builtins on GPU targets
Summary: This patch adds initial support to build the `builtins` library for GPU targets. Primarily this requires adding a few new architectures for `amdgcn` and `nvptx64`. I built this using the following invocations. ```console $ cmake ../compiler-rt -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release -GNinja -DCMAKE_C_COMPILER_TARGET=<nvptx64-nvidia-cuda|amdgcn-amd-amdhsa> -DCMAKE_CXX_COMPILER_TARGET=<nvptx64-nvidia-cuda|amdgcn-amd-amdhsa> -DCMAKE_C_COMPILER_WORKS=1 -DCMAKE_CXX_COMPILER_WORKS=1 -DLLVM_CMAKE_DIR=../cmake/Modules -DCOMPILER_RT_DEFAULT_TARGET_ONLY=ON -C ../compiler-rt/cmake/caches/GPU.cmake ``` Some pointers would be appreciated for how to test this using a standard (non-default target only) build. GPU builds are somewhat finnicky. We only expect this to be built with a sufficiently new clang, as it's the only compiler that supports the target and output we distribute. Distribution is done as LLVM-IR blobs. GPUs have little backward compatibility, so linking object files is difficult. However, this prevents us from calling these functions post-LTO as they will have been optimized out. Another issue is the CMake flag querying functions, currently these fail on nvptx if you don't have CUDA installed because they want to use the `ptxas` and `nvlink` binaries. More work is necessary to build correctly for all targets and ship into the correct clang resource directory. Additionally we need to use the `libc` project's support for running unit tests.
1 parent d6bbe2e commit c9465af

File tree

7 files changed

+91
-6
lines changed

7 files changed

+91
-6
lines changed

compiler-rt/cmake/Modules/AddCompilerRT.cmake

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -175,8 +175,11 @@ function(add_compiler_rt_runtime name type)
175175
${ARGN})
176176
set(libnames)
177177
# Until we support this some other way, build compiler-rt runtime without LTO
178-
# to allow non-LTO projects to link with it.
179-
if(COMPILER_RT_HAS_FNO_LTO_FLAG)
178+
# to allow non-LTO projects to link with it. GPU targets can currently only be
179+
# distributed as LLVM-IR and ignore this.
180+
if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn|nvptx")
181+
set(NO_LTO_FLAGS "")
182+
elseif(COMPILER_RT_HAS_FNO_LTO_FLAG)
180183
set(NO_LTO_FLAGS "-fno-lto")
181184
else()
182185
set(NO_LTO_FLAGS "")

compiler-rt/cmake/Modules/BuiltinTests.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ endfunction()
112112
function(builtin_check_c_compiler_flag flag output)
113113
if(NOT DEFINED ${output})
114114
message(STATUS "Performing Test ${output}")
115-
try_compile_only(result FLAGS ${flag})
115+
try_compile_only(result FLAGS ${flag} ${CMAKE_REQUIRED_FLAGS})
116116
set(${output} ${result} CACHE INTERNAL "Compiler supports ${flag}")
117117
if(${result})
118118
message(STATUS "Performing Test ${output} - Success")

compiler-rt/cmake/Modules/CompilerRTUtils.cmake

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ macro(test_target_arch arch def)
146146
endmacro()
147147

148148
macro(detect_target_arch)
149+
check_symbol_exists(__AMDGPU__ "" __AMDGPU)
149150
check_symbol_exists(__arm__ "" __ARM)
150151
check_symbol_exists(__AVR__ "" __AVR)
151152
check_symbol_exists(__aarch64__ "" __AARCH64)
@@ -154,6 +155,7 @@ macro(detect_target_arch)
154155
check_symbol_exists(__loongarch__ "" __LOONGARCH)
155156
check_symbol_exists(__mips__ "" __MIPS)
156157
check_symbol_exists(__mips64__ "" __MIPS64)
158+
check_symbol_exists(__NVPTX__ "" __NVPTX)
157159
check_symbol_exists(__powerpc__ "" __PPC)
158160
check_symbol_exists(__powerpc64__ "" __PPC64)
159161
check_symbol_exists(__powerpc64le__ "" __PPC64LE)
@@ -164,7 +166,9 @@ macro(detect_target_arch)
164166
check_symbol_exists(__wasm32__ "" __WEBASSEMBLY32)
165167
check_symbol_exists(__wasm64__ "" __WEBASSEMBLY64)
166168
check_symbol_exists(__ve__ "" __VE)
167-
if(__ARM)
169+
if(__AMDGPU)
170+
add_default_target_arch(amdgcn)
171+
elseif(__ARM)
168172
add_default_target_arch(arm)
169173
elseif(__AVR)
170174
add_default_target_arch(avr)
@@ -192,6 +196,8 @@ macro(detect_target_arch)
192196
add_default_target_arch(mips64)
193197
elseif(__MIPS)
194198
add_default_target_arch(mips)
199+
elseif(__NVPTX)
200+
add_default_target_arch(nvptx64)
195201
elseif(__PPC64) # must be checked before __PPC
196202
add_default_target_arch(powerpc64)
197203
elseif(__PPC64LE)
@@ -388,6 +394,29 @@ macro(construct_compiler_rt_default_triple)
388394
set(COMPILER_RT_DEFAULT_TARGET_ARCH "i386")
389395
endif()
390396

397+
# If we are directly targeting a GPU we need to check that the compiler is
398+
# compatible and pass some default arguments.
399+
if(COMPILER_RT_DEFAULT_TARGET_ONLY)
400+
if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn|nvptx")
401+
# Ensure the compiler is a valid clang when building the GPU target.
402+
set(req_ver "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}")
403+
if(LLVM_VERSION_MAJOR AND NOT (CMAKE_CXX_COMPILER_ID MATCHES "[Cc]lang" AND
404+
${CMAKE_CXX_COMPILER_VERSION} VERSION_EQUAL "${req_ver}"))
405+
message(FATAL_ERROR "Cannot build compiler-rt for GPU. CMake compiler "
406+
"'${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}' "
407+
" is not 'Clang ${req_ver}'.")
408+
endif()
409+
endif()
410+
411+
# Pass the necessary flags to make flag detection work.
412+
if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn")
413+
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -nogpulib")
414+
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "nvptx")
415+
set(CMAKE_REQUIRED_FLAGS
416+
"${CMAKE_REQUIRED_FLAGS} -flto -c -Wno-unused-command-line-argument")
417+
endif()
418+
endif()
419+
391420
# Determine if test target triple is specified explicitly, and doesn't match the
392421
# default.
393422
if(NOT COMPILER_RT_DEFAULT_TARGET_TRIPLE STREQUAL LLVM_TARGET_TRIPLE)
@@ -466,6 +495,10 @@ function(get_compiler_rt_target arch variable)
466495
endif()
467496
endif()
468497
set(target "${arch}${triple_suffix}")
498+
elseif("${arch}" MATCHES "^amdgcn")
499+
set(target "amdgcn-amd-amdhsa")
500+
elseif("${arch}" MATCHES "^nvptx")
501+
set(target "nvptx64-nvidia-cuda")
469502
else()
470503
set(target "${arch}${triple_suffix}")
471504
endif()

compiler-rt/cmake/base-config-ix.cmake

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,10 @@ macro(test_targets)
214214
test_target_arch(x86_64 "" "")
215215
endif()
216216
endif()
217+
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn")
218+
test_target_arch(amdgcn "" "--target=amdgcn-amd-amdhsa" "-nogpulib"
219+
"-flto" "-fconvergent-functions"
220+
"-Xclang -mcode-object-version=none")
217221
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "loongarch64")
218222
test_target_arch(loongarch64 "" "")
219223
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "powerpc64le|ppc64le")
@@ -254,6 +258,9 @@ macro(test_targets)
254258
test_target_arch(mips "" "-mips32r2" "-mabi=32" "-D_LARGEFILE_SOURCE=1" "-D_FILE_OFFSET_BITS=64")
255259
test_target_arch(mips64 "" "-mips64r2" "-mabi=64")
256260
endif()
261+
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "nvptx")
262+
test_target_arch(nvptx64 "" "--nvptx64-nvidia-cuda" "-nogpulib" "-flto"
263+
"-fconvergent-functions" "-c")
257264
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "arm")
258265
if(WIN32)
259266
test_target_arch(arm "" "" "")

compiler-rt/cmake/builtin-config-ix.cmake

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@ builtin_check_c_compiler_flag(-fno-profile-generate COMPILER_RT_HAS_FNO_PROFILE_
1919
builtin_check_c_compiler_flag(-fno-profile-instr-generate COMPILER_RT_HAS_FNO_PROFILE_INSTR_GENERATE_FLAG)
2020
builtin_check_c_compiler_flag(-fno-profile-instr-use COMPILER_RT_HAS_FNO_PROFILE_INSTR_USE_FLAG)
2121
builtin_check_c_compiler_flag(-Wno-pedantic COMPILER_RT_HAS_WNO_PEDANTIC)
22+
builtin_check_c_compiler_flag(-nogpulib COMPILER_RT_HAS_NOGPULIB_FLAG)
23+
builtin_check_c_compiler_flag(-flto COMPILER_RT_HAS_FLTO_FLAG)
24+
builtin_check_c_compiler_flag(-fconvergent-functions COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG)
25+
builtin_check_c_compiler_flag("-Xclang -mcode-object-version=none" COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG)
2226
builtin_check_c_compiler_flag(-Wbuiltin-declaration-mismatch COMPILER_RT_HAS_WBUILTIN_DECLARATION_MISMATCH_FLAG)
2327
builtin_check_c_compiler_flag(/Zl COMPILER_RT_HAS_ZL_FLAG)
2428

@@ -52,6 +56,7 @@ else()
5256
set(OS_NAME "${CMAKE_SYSTEM_NAME}")
5357
endif()
5458

59+
set(AMDGPU amdgcn)
5560
set(ARM64 aarch64)
5661
set(ARM32 arm armhf armv4t armv5te armv6 armv6m armv7m armv7em armv7 armv7s armv7k armv8m.base armv8m.main armv8.1m.main)
5762
set(AVR avr)
@@ -61,6 +66,7 @@ set(X86_64 x86_64)
6166
set(LOONGARCH64 loongarch64)
6267
set(MIPS32 mips mipsel)
6368
set(MIPS64 mips64 mips64el)
69+
set(NVPTX nvptx64)
6470
set(PPC32 powerpc powerpcspe)
6571
set(PPC64 powerpc64 powerpc64le)
6672
set(RISCV32 riscv32)
@@ -78,8 +84,8 @@ if(APPLE)
7884
endif()
7985

8086
set(ALL_BUILTIN_SUPPORTED_ARCH
81-
${X86} ${X86_64} ${ARM32} ${ARM64} ${AVR}
82-
${HEXAGON} ${MIPS32} ${MIPS64} ${PPC32} ${PPC64}
87+
${X86} ${X86_64} ${AMDGPU} ${ARM32} ${ARM64} ${AVR}
88+
${HEXAGON} ${MIPS32} ${MIPS64} ${NVPTX} ${PPC32} ${PPC64}
8389
${RISCV32} ${RISCV64} ${SPARC} ${SPARCV9}
8490
${WASM32} ${WASM64} ${VE} ${LOONGARCH64})
8591

compiler-rt/cmake/caches/GPU.cmake

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# This file sets up a CMakeCache for GPU builds of compiler-rt. This supports
2+
# amdgcn and nvptx builds targeting the builtins library.
3+
4+
set(COMPILER_RT_INCLUDE_TESTS OFF CACHE BOOL "")
5+
set(COMPILER_RT_HAS_SAFESTACK OFF CACHE BOOL "")
6+
7+
set(COMPILER_RT_BUILD_BUILTINS ON CACHE BOOL "")
8+
set(COMPILER_RT_BAREMETAL_BUILD ON CACHE BOOL "")
9+
set(COMPILER_RT_BUILD_CRT OFF CACHE BOOL "")
10+
set(COMPILER_RT_BUILD_SANITIZERS OFF CACHE BOOL "")
11+
set(COMPILER_RT_BUILD_XRAY OFF CACHE BOOL "")
12+
set(COMPILER_RT_BUILD_LIBFUZZER OFF CACHE BOOL "")
13+
set(COMPILER_RT_BUILD_PROFILE OFF CACHE BOOL "")
14+
set(COMPILER_RT_BUILD_MEMPROF OFF CACHE BOOL "")
15+
set(COMPILER_RT_BUILD_XRAY_NO_PREINIT OFF CACHE BOOL "")
16+
set(COMPILER_RT_BUILD_ORC OFF CACHE BOOL "")
17+
set(COMPILER_RT_BUILD_GWP_ASAN OFF CACHE BOOL "")
18+
set(COMPILER_RT_BUILD_SCUDO_SANTDALONE_WITH_LLVM_LIBC OFF CACHE BOOL "")

compiler-rt/lib/builtins/CMakeLists.txt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -619,6 +619,8 @@ if (MINGW)
619619
)
620620
endif()
621621

622+
set(amdgcn_SOURCES ${GENERIC_SOURCES})
623+
622624
set(armv4t_SOURCES ${arm_min_SOURCES})
623625
set(armv5te_SOURCES ${arm_min_SOURCES})
624626
set(armv6_SOURCES ${arm_min_SOURCES})
@@ -698,6 +700,8 @@ set(mips64_SOURCES ${GENERIC_TF_SOURCES}
698700
set(mips64el_SOURCES ${GENERIC_TF_SOURCES}
699701
${mips_SOURCES})
700702

703+
set(nvptx64_SOURCES ${GENERIC_SOURCES})
704+
701705
set(powerpc_SOURCES ${GENERIC_SOURCES})
702706

703707
set(powerpcspe_SOURCES ${GENERIC_SOURCES})
@@ -803,6 +807,20 @@ else ()
803807
endif()
804808
endif()
805809

810+
# Directly targeting the GPU requires a few extra flags.
811+
if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn|nvptx")
812+
append_list_if(COMPILER_RT_HAS_NOGPULIB_FLAG -nogpulib BUILTIN_CFLAGS)
813+
append_list_if(COMPILER_RT_HAS_FLTO_FLAG -flto BUILTIN_CFLAGS)
814+
append_list_if(COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG
815+
-fconvergent-functions BUILTIN_CFLAGS)
816+
817+
# AMDGPU targets want to use a generic ABI.
818+
if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn")
819+
append_list_if(COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG
820+
"SHELL:-Xclang -mcode-object-version=none" BUILTIN_CFLAGS)
821+
endif()
822+
endif()
823+
806824
set(BUILTIN_DEFS "")
807825

808826
if(COMPILER_RT_BUILTINS_HIDE_SYMBOLS)

0 commit comments

Comments
 (0)