Skip to content

[flang][runtime] Support for offload build of FortranDecimal. #87653

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 132 additions & 0 deletions flang/cmake/modules/AddFlangOffloadRuntime.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
option(FLANG_EXPERIMENTAL_CUDA_RUNTIME
"Compile Fortran runtime as CUDA sources (experimental)" OFF
)

set(FLANG_LIBCUDACXX_PATH "" CACHE PATH "Path to libcu++ package installation")

set(FLANG_EXPERIMENTAL_OMP_OFFLOAD_BUILD "off" CACHE STRING
"Compile Fortran runtime as OpenMP target offload sources (experimental). Valid options are 'off', 'host_device', 'nohost'")

set(FLANG_OMP_DEVICE_ARCHITECTURES "all" CACHE STRING
"List of OpenMP device architectures to be used to compile the Fortran runtime (e.g. 'gfx1103;sm_90')")

macro(enable_cuda_compilation files)
if (FLANG_EXPERIMENTAL_CUDA_RUNTIME)
if (BUILD_SHARED_LIBS)
message(FATAL_ERROR
"BUILD_SHARED_LIBS is not supported for CUDA build of Fortran runtime"
)
endif()

enable_language(CUDA)

# TODO: figure out how to make target property CUDA_SEPARABLE_COMPILATION
# work, and avoid setting CMAKE_CUDA_SEPARABLE_COMPILATION.
set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)

# Treat all supported sources as CUDA files.
set_source_files_properties(${files} PROPERTIES LANGUAGE CUDA)
set(CUDA_COMPILE_OPTIONS)
if ("${CMAKE_CUDA_COMPILER_ID}" MATCHES "Clang")
# Allow varargs.
set(CUDA_COMPILE_OPTIONS
-Xclang -fcuda-allow-variadic-functions
)
endif()
if ("${CMAKE_CUDA_COMPILER_ID}" MATCHES "NVIDIA")
set(CUDA_COMPILE_OPTIONS
--expt-relaxed-constexpr
# Disable these warnings:
# 'long double' is treated as 'double' in device code
-Xcudafe --diag_suppress=20208
-Xcudafe --display_error_number
)
endif()
set_source_files_properties(${files} PROPERTIES COMPILE_OPTIONS
"${CUDA_COMPILE_OPTIONS}"
)

if (EXISTS "${FLANG_LIBCUDACXX_PATH}/include")
# When using libcudacxx headers files, we have to use them
# for all files of F18 runtime.
include_directories(AFTER ${FLANG_LIBCUDACXX_PATH}/include)
add_compile_definitions(RT_USE_LIBCUDACXX=1)
endif()
endif()
endmacro()

macro(enable_omp_offload_compilation files)
if (NOT FLANG_EXPERIMENTAL_OMP_OFFLOAD_BUILD STREQUAL "off")
# 'host_device' build only works with Clang compiler currently.
# The build is done with the CMAKE_C/CXX_COMPILER, i.e. it does not use
# the in-tree built Clang. We may have a mode that would use the in-tree
# built Clang.
#
# 'nohost' is supposed to produce an LLVM Bitcode library,
# and it has to be done with a C/C++ compiler producing LLVM Bitcode
# compatible with the LLVM toolchain version distributed with the Flang
# compiler.
# In general, the in-tree built Clang should be used for 'nohost' build.
# Note that 'nohost' build does not produce the host version of Flang
# runtime library, so there will be two separate distributable objects.
# 'nohost' build is a TODO.

if (NOT FLANG_EXPERIMENTAL_OMP_OFFLOAD_BUILD STREQUAL "host_device")
message(FATAL_ERROR "Unsupported OpenMP offload build of Flang runtime")
endif()
if (BUILD_SHARED_LIBS)
message(FATAL_ERROR
"BUILD_SHARED_LIBS is not supported for OpenMP offload build of Fortran runtime"
)
endif()

if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" AND
"${CMAKE_C_COMPILER_ID}" MATCHES "Clang")

set(all_amdgpu_architectures
"gfx700;gfx701;gfx801;gfx803;gfx900;gfx902;gfx906"
"gfx908;gfx90a;gfx90c;gfx940;gfx1010;gfx1030"
"gfx1031;gfx1032;gfx1033;gfx1034;gfx1035;gfx1036"
"gfx1100;gfx1101;gfx1102;gfx1103;gfx1150;gfx1151"
)
set(all_nvptx_architectures
"sm_35;sm_37;sm_50;sm_52;sm_53;sm_60;sm_61;sm_62"
"sm_70;sm_72;sm_75;sm_80;sm_86;sm_89;sm_90"
)
set(all_gpu_architectures
"${all_amdgpu_architectures};${all_nvptx_architectures}"
)
# TODO: support auto detection on the build system.
if (FLANG_OMP_DEVICE_ARCHITECTURES STREQUAL "all")
set(FLANG_OMP_DEVICE_ARCHITECTURES ${all_gpu_architectures})
endif()
list(REMOVE_DUPLICATES FLANG_OMP_DEVICE_ARCHITECTURES)

string(REPLACE ";" "," compile_for_architectures
"${FLANG_OMP_DEVICE_ARCHITECTURES}"
)

set(OMP_COMPILE_OPTIONS
-fopenmp
-fvisibility=hidden
-fopenmp-cuda-mode
--offload-arch=${compile_for_architectures}
# Force LTO for the device part.
-foffload-lto
)
set_source_files_properties(${files} PROPERTIES COMPILE_OPTIONS
"${OMP_COMPILE_OPTIONS}"
)

# Enable "declare target" in the source code.
set_source_files_properties(${files}
PROPERTIES COMPILE_DEFINITIONS OMP_OFFLOAD_BUILD
)
else()
message(FATAL_ERROR
"Flang runtime build is not supported for these compilers:\n"
"CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}\n"
"CMAKE_C_COMPILER_ID: ${CMAKE_C_COMPILER_ID}")
endif()
endif()
endmacro()
10 changes: 8 additions & 2 deletions flang/lib/Decimal/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,17 @@ endif()
# avoid an unwanted dependency on libstdc++.so.
add_definitions(-U_GLIBCXX_ASSERTIONS)

add_flang_library(FortranDecimal INSTALL_WITH_TOOLCHAIN
set(sources
binary-to-decimal.cpp
decimal-to-binary.cpp
)

include(AddFlangOffloadRuntime)
enable_cuda_compilation("${sources}")
enable_omp_offload_compilation("${sources}")

add_flang_library(FortranDecimal INSTALL_WITH_TOOLCHAIN ${sources})

if (DEFINED MSVC)
set(CMAKE_MSVC_RUNTIME_LIBRARY MultiThreaded)
add_flang_library(FortranDecimal.static INSTALL_WITH_TOOLCHAIN
Expand All @@ -77,4 +83,4 @@ if (DEFINED MSVC)
)
add_dependencies(FortranDecimal FortranDecimal.static FortranDecimal.dynamic
FortranDecimal.static_dbg FortranDecimal.dynamic_dbg)
endif()
endif()
Loading