Skip to content

Commit b329da8

Browse files
authored
[flang][runtime] Support for offload build of FortranDecimal. (#87653)
1 parent 25cf279 commit b329da8

File tree

6 files changed

+199
-169
lines changed

6 files changed

+199
-169
lines changed
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
option(FLANG_EXPERIMENTAL_CUDA_RUNTIME
2+
"Compile Fortran runtime as CUDA sources (experimental)" OFF
3+
)
4+
5+
set(FLANG_LIBCUDACXX_PATH "" CACHE PATH "Path to libcu++ package installation")
6+
7+
set(FLANG_EXPERIMENTAL_OMP_OFFLOAD_BUILD "off" CACHE STRING
8+
"Compile Fortran runtime as OpenMP target offload sources (experimental). Valid options are 'off', 'host_device', 'nohost'")
9+
10+
set(FLANG_OMP_DEVICE_ARCHITECTURES "all" CACHE STRING
11+
"List of OpenMP device architectures to be used to compile the Fortran runtime (e.g. 'gfx1103;sm_90')")
12+
13+
macro(enable_cuda_compilation files)
14+
if (FLANG_EXPERIMENTAL_CUDA_RUNTIME)
15+
if (BUILD_SHARED_LIBS)
16+
message(FATAL_ERROR
17+
"BUILD_SHARED_LIBS is not supported for CUDA build of Fortran runtime"
18+
)
19+
endif()
20+
21+
enable_language(CUDA)
22+
23+
# TODO: figure out how to make target property CUDA_SEPARABLE_COMPILATION
24+
# work, and avoid setting CMAKE_CUDA_SEPARABLE_COMPILATION.
25+
set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
26+
27+
# Treat all supported sources as CUDA files.
28+
set_source_files_properties(${files} PROPERTIES LANGUAGE CUDA)
29+
set(CUDA_COMPILE_OPTIONS)
30+
if ("${CMAKE_CUDA_COMPILER_ID}" MATCHES "Clang")
31+
# Allow varargs.
32+
set(CUDA_COMPILE_OPTIONS
33+
-Xclang -fcuda-allow-variadic-functions
34+
)
35+
endif()
36+
if ("${CMAKE_CUDA_COMPILER_ID}" MATCHES "NVIDIA")
37+
set(CUDA_COMPILE_OPTIONS
38+
--expt-relaxed-constexpr
39+
# Disable these warnings:
40+
# 'long double' is treated as 'double' in device code
41+
-Xcudafe --diag_suppress=20208
42+
-Xcudafe --display_error_number
43+
)
44+
endif()
45+
set_source_files_properties(${files} PROPERTIES COMPILE_OPTIONS
46+
"${CUDA_COMPILE_OPTIONS}"
47+
)
48+
49+
if (EXISTS "${FLANG_LIBCUDACXX_PATH}/include")
50+
# When using libcudacxx headers files, we have to use them
51+
# for all files of F18 runtime.
52+
include_directories(AFTER ${FLANG_LIBCUDACXX_PATH}/include)
53+
add_compile_definitions(RT_USE_LIBCUDACXX=1)
54+
endif()
55+
endif()
56+
endmacro()
57+
58+
macro(enable_omp_offload_compilation files)
59+
if (NOT FLANG_EXPERIMENTAL_OMP_OFFLOAD_BUILD STREQUAL "off")
60+
# 'host_device' build only works with Clang compiler currently.
61+
# The build is done with the CMAKE_C/CXX_COMPILER, i.e. it does not use
62+
# the in-tree built Clang. We may have a mode that would use the in-tree
63+
# built Clang.
64+
#
65+
# 'nohost' is supposed to produce an LLVM Bitcode library,
66+
# and it has to be done with a C/C++ compiler producing LLVM Bitcode
67+
# compatible with the LLVM toolchain version distributed with the Flang
68+
# compiler.
69+
# In general, the in-tree built Clang should be used for 'nohost' build.
70+
# Note that 'nohost' build does not produce the host version of Flang
71+
# runtime library, so there will be two separate distributable objects.
72+
# 'nohost' build is a TODO.
73+
74+
if (NOT FLANG_EXPERIMENTAL_OMP_OFFLOAD_BUILD STREQUAL "host_device")
75+
message(FATAL_ERROR "Unsupported OpenMP offload build of Flang runtime")
76+
endif()
77+
if (BUILD_SHARED_LIBS)
78+
message(FATAL_ERROR
79+
"BUILD_SHARED_LIBS is not supported for OpenMP offload build of Fortran runtime"
80+
)
81+
endif()
82+
83+
if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" AND
84+
"${CMAKE_C_COMPILER_ID}" MATCHES "Clang")
85+
86+
set(all_amdgpu_architectures
87+
"gfx700;gfx701;gfx801;gfx803;gfx900;gfx902;gfx906"
88+
"gfx908;gfx90a;gfx90c;gfx940;gfx1010;gfx1030"
89+
"gfx1031;gfx1032;gfx1033;gfx1034;gfx1035;gfx1036"
90+
"gfx1100;gfx1101;gfx1102;gfx1103;gfx1150;gfx1151"
91+
)
92+
set(all_nvptx_architectures
93+
"sm_35;sm_37;sm_50;sm_52;sm_53;sm_60;sm_61;sm_62"
94+
"sm_70;sm_72;sm_75;sm_80;sm_86;sm_89;sm_90"
95+
)
96+
set(all_gpu_architectures
97+
"${all_amdgpu_architectures};${all_nvptx_architectures}"
98+
)
99+
# TODO: support auto detection on the build system.
100+
if (FLANG_OMP_DEVICE_ARCHITECTURES STREQUAL "all")
101+
set(FLANG_OMP_DEVICE_ARCHITECTURES ${all_gpu_architectures})
102+
endif()
103+
list(REMOVE_DUPLICATES FLANG_OMP_DEVICE_ARCHITECTURES)
104+
105+
string(REPLACE ";" "," compile_for_architectures
106+
"${FLANG_OMP_DEVICE_ARCHITECTURES}"
107+
)
108+
109+
set(OMP_COMPILE_OPTIONS
110+
-fopenmp
111+
-fvisibility=hidden
112+
-fopenmp-cuda-mode
113+
--offload-arch=${compile_for_architectures}
114+
# Force LTO for the device part.
115+
-foffload-lto
116+
)
117+
set_source_files_properties(${files} PROPERTIES COMPILE_OPTIONS
118+
"${OMP_COMPILE_OPTIONS}"
119+
)
120+
121+
# Enable "declare target" in the source code.
122+
set_source_files_properties(${files}
123+
PROPERTIES COMPILE_DEFINITIONS OMP_OFFLOAD_BUILD
124+
)
125+
else()
126+
message(FATAL_ERROR
127+
"Flang runtime build is not supported for these compilers:\n"
128+
"CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}\n"
129+
"CMAKE_C_COMPILER_ID: ${CMAKE_C_COMPILER_ID}")
130+
endif()
131+
endif()
132+
endmacro()

flang/lib/Decimal/CMakeLists.txt

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,17 @@ endif()
4949
# avoid an unwanted dependency on libstdc++.so.
5050
add_definitions(-U_GLIBCXX_ASSERTIONS)
5151

52-
add_flang_library(FortranDecimal INSTALL_WITH_TOOLCHAIN
52+
set(sources
5353
binary-to-decimal.cpp
5454
decimal-to-binary.cpp
5555
)
5656

57+
include(AddFlangOffloadRuntime)
58+
enable_cuda_compilation("${sources}")
59+
enable_omp_offload_compilation("${sources}")
60+
61+
add_flang_library(FortranDecimal INSTALL_WITH_TOOLCHAIN ${sources})
62+
5763
if (DEFINED MSVC)
5864
set(CMAKE_MSVC_RUNTIME_LIBRARY MultiThreaded)
5965
add_flang_library(FortranDecimal.static INSTALL_WITH_TOOLCHAIN
@@ -77,4 +83,4 @@ if (DEFINED MSVC)
7783
)
7884
add_dependencies(FortranDecimal FortranDecimal.static FortranDecimal.dynamic
7985
FortranDecimal.static_dbg FortranDecimal.dynamic_dbg)
80-
endif()
86+
endif()

0 commit comments

Comments
 (0)