Skip to content

[SYCL][ROCm] Add HIP NVIDIA support #4049

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 14, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 18 additions & 10 deletions buildbot/configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def do_configure(args):
sycl_build_pi_cuda = 'OFF'
sycl_build_pi_esimd_cpu = 'ON'
sycl_build_pi_rocm = 'OFF'
sycl_build_pi_rocm_platform = 'AMD'
sycl_werror = 'ON'
llvm_enable_assertions = 'ON'
llvm_enable_doxygen = 'OFF'
Expand All @@ -40,21 +41,26 @@ def do_configure(args):
if args.arm:
llvm_targets_to_build = 'ARM;AArch64'

if args.disable_esimd_cpu:
sycl_build_pi_esimd_cpu = 'OFF'

if args.cuda or args.rocm:
llvm_enable_projects += ';libclc'

if args.cuda:
llvm_targets_to_build += ';NVPTX'
llvm_enable_projects += ';libclc'
libclc_targets_to_build = 'nvptx64--;nvptx64--nvidiacl'
sycl_build_pi_cuda = 'ON'

if args.disable_esimd_cpu:
sycl_build_pi_esimd_cpu = 'OFF'

if args.rocm:
llvm_targets_to_build += ';AMDGPU'
# TODO libclc should be added once,
# TODO when we build DPC++ with both CUDA and ROCM support
llvm_enable_projects += ';libclc'
libclc_targets_to_build = 'amdgcn--;amdgcn--amdhsa'
if args.rocm_platform == 'AMD':
llvm_targets_to_build += ';AMDGPU'
libclc_targets_to_build += ';amdgcn--;amdgcn--amdhsa'
elif args.rocm_platform == 'NVIDIA' and not args.cuda:
llvm_targets_to_build += ';NVPTX'
libclc_targets_to_build += ';nvptx64--;nvptx64--nvidiacl'

sycl_build_pi_rocm_platform = args.rocm_platform
sycl_build_pi_rocm = 'ON'

if args.no_werror:
Expand Down Expand Up @@ -92,6 +98,7 @@ def do_configure(args):
"-DLIBCLC_TARGETS_TO_BUILD={}".format(libclc_targets_to_build),
"-DSYCL_BUILD_PI_CUDA={}".format(sycl_build_pi_cuda),
"-DSYCL_BUILD_PI_ROCM={}".format(sycl_build_pi_rocm),
"-DSYCL_BUILD_PI_ROCM_PLATFORM={}".format(sycl_build_pi_rocm_platform),
"-DLLVM_BUILD_TOOLS=ON",
"-DSYCL_ENABLE_WERROR={}".format(sycl_werror),
"-DCMAKE_INSTALL_PREFIX={}".format(install_dir),
Expand Down Expand Up @@ -161,7 +168,8 @@ def main():
parser.add_argument("-t", "--build-type",
metavar="BUILD_TYPE", default="Release", help="build type: Debug, Release")
parser.add_argument("--cuda", action='store_true', help="switch from OpenCL to CUDA")
parser.add_argument("--rocm", action='store_true', help="swith from OpenCL to ROCM")
parser.add_argument("--rocm", action='store_true', help="switch from OpenCL to ROCm")
parser.add_argument("--rocm-platform", type=str, choices=['AMD', 'NVIDIA'], default='AMD', help="choose ROCm backend")
parser.add_argument("--arm", action='store_true', help="build ARM support rather than x86")
parser.add_argument("--disable-esimd-cpu", action='store_true', help="build without ESIMD_CPU support")
parser.add_argument("--no-assertions", action='store_true', help="build without assertions")
Expand Down
32 changes: 31 additions & 1 deletion sycl/doc/GetStartedGuide.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ and a wide range of compute accelerators such as GPU and FPGA.
- [Build DPC++ toolchain with libc++ library](#build-dpc-toolchain-with-libc-library)
- [Build DPC++ toolchain with support for NVIDIA CUDA](#build-dpc-toolchain-with-support-for-nvidia-cuda)
- [Build DPC++ toolchain with support for AMD ROCm](#build-dpc-toolchain-with-support-for-amd-rocm)
- [Build DPC++ toolchain with support for NVIDIA ROCm](#build-dpc-toolchain-with-support-for-nvidia-rocm)
- [Build Doxygen documentation](#build-doxygen-documentation)
- [Deployment](#deployment)
- [Use DPC++ toolchain](#use-dpc-toolchain)
Expand Down Expand Up @@ -107,6 +108,7 @@ flags can be found by launching the script with `--help`):
* `--no-werror` -> Don't treat warnings as errors when compiling llvm
* `--cuda` -> use the cuda backend (see [Nvidia CUDA](#build-dpc-toolchain-with-support-for-nvidia-cuda))
* `--rocm` -> use the rocm backend (see [AMD ROCm](#build-dpc-toolchain-with-support-for-amd-rocm))
* `--rocm-platform` -> select the platform used by the rocm backend, `AMD` or `NVIDIA` (see [AMD ROCm](#build-dpc-toolchain-with-support-for-amd-rocm) or see [NVIDIA ROCm](#build-dpc-toolchain-with-support-for-nvidia-rocm))
* `--shared-libs` -> Build shared libraries
* `-t` -> Build type (debug or release)
* `-o` -> Path to build directory
Expand Down Expand Up @@ -175,6 +177,34 @@ produce a standard ELF shared code object which can be loaded and executed on an
So if you want to support AMD ROCm, you should also build the lld project.
[LLD Build Guide](https://lld.llvm.org/)

The following CMake variables can be updated to change where CMake is looking
for the ROCm installation:

* `SYCL_BUILD_PI_ROCM_INCLUDE_DIR`: Path to HIP include directory (default
`/opt/rocm/hip/include`).
* `SYCL_BUILD_PI_ROCM_HSA_INCLUDE_DIR`: Path to HSA include directory (default
`/opt/rocm/hsa/include`).
* `SYCL_BUILD_PI_ROCM_AMD_LIBRARY`: Path to HIP runtime library (default
`/opt/rocm/hip/lib/libamdhip64.so`).

### Build DPC++ toolchain with support for NVIDIA ROCm

There is experimental support for DPC++ for using ROCm on NVIDIA devices.

This is a compatibility feature and the [CUDA backend](#build-dpc-toolchain-with-support-for-nvidia-cuda)
should be preferred to run on NVIDIA GPUs.

To enable support for NVIDIA ROCm devices, follow the instructions for the Linux
DPC++ toolchain, but add the `--rocm` and `--rocm-platform NVIDIA` flags to
`configure.py`.

Enabling this flag requires ROCm to be installed, more specifically
[HIP NVCC](https://rocmdocs.amd.com/en/latest/Installation_Guide/HIP-Installation.html#nvidia-platform),
as well as CUDA to be installed, see
[NVIDIA CUDA Installation Guide for Linux](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html).

Currently this was only tested on Linux with ROCm 4.2, CUDA 11 and a GeForce GTX
1060 card.

### Build Doxygen documentation

Expand Down Expand Up @@ -510,7 +540,7 @@ and run following command:
clang++ -fsycl simple-sycl-app.cpp -o simple-sycl-app.exe
```

When building for CUDA, use the CUDA target triple as follows:
When building for CUDA or NVIDIA ROCm, use the CUDA target triple as follows:

```bash
clang++ -fsycl -fsycl-targets=nvptx64-nvidia-cuda-sycldevice \
Expand Down
99 changes: 62 additions & 37 deletions sycl/plugins/rocm/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,51 +1,76 @@
message(STATUS "Including the PI API ROCM backend.")
# Set default PI ROCm platform to AMD
set(SYCL_BUILD_PI_ROCM_PLATFORM "AMD" CACHE STRING "PI ROCm platform, AMD or NVIDIA")

# cannot rely on cmake support for ROCM; it assumes runtime API is being used.
# we only require the ROCM driver API to be used
# rocm_rocm_LIBRARY variable defines the path to libhsa-runtime64.so, the ROCM Driver API library.
message(STATUS "Including the PI API ROCM backend for ${SYCL_BUILD_PI_ROCM_PLATFORM}.")

#find_package(ROCM 4.0 REQUIRED)

# Make imported library global to use it within the project.
add_library(rocmdrv SHARED IMPORTED GLOBAL)


set(ROCM_ROCM_LIBRARY "/opt/rocm/hip/lib/libamdhip64.so")
set(ROCM_INCLUDE_DIRS "/opt/rocm/hip/include")
set(hsa_inc_dir "/opt/rocm/hsa/include")


add_definitions(-D__HIP_PLATFORM_HCC__)

set_target_properties(
rocmdrv PROPERTIES
IMPORTED_LOCATION ${ROCM_ROCM_LIBRARY}
INTERFACE_INCLUDE_DIRECTORIES ${ROCM_INCLUDE_DIRS}
)
# Set default ROCm include dirs
set(SYCL_BUILD_PI_ROCM_INCLUDE_DIR "/opt/rocm/hip/include" CACHE STRING "HIP include dir")
set(SYCL_BUILD_PI_ROCM_HSA_INCLUDE_DIR "/opt/rocm/hsa/include" CACHE STRING "HSA include dir")
set(HIP_HEADERS "${SYCL_BUILD_PI_ROCM_INCLUDE_DIR};${SYCL_BUILD_PI_ROCM_HSA_INCLUDE_DIR}")

# Create pi_rocm library
add_library(pi_rocm SHARED
"${sycl_inc_dir}/CL/sycl/detail/pi.h"
"${sycl_inc_dir}/CL/sycl/detail/pi.hpp"
"pi_rocm.hpp"
"pi_rocm.cpp"
"${sycl_inc_dir}/CL/sycl/detail/pi.h"
"${sycl_inc_dir}/CL/sycl/detail/pi.hpp"
"pi_rocm.hpp"
"pi_rocm.cpp"
)


add_dependencies(sycl-toolchain pi_rocm)

set_target_properties(pi_rocm PROPERTIES LINKER_LANGUAGE CXX)
target_link_libraries(pi_rocm PUBLIC OpenCL-Headers)


# Setup include directories
target_include_directories(pi_rocm
PRIVATE
${sycl_inc_dir}
${sycl_plugin_dir}
${ROCM_INCLUDE_DIRS}
${hsa_inc_dir}
PRIVATE
${sycl_inc_dir}
${sycl_plugin_dir}
)


target_link_libraries(pi_rocm PUBLIC OpenCL-Headers rocmdrv)
if("${SYCL_BUILD_PI_ROCM_PLATFORM}" STREQUAL "AMD")
# Import HIP runtime library
set(SYCL_BUILD_PI_ROCM_AMD_LIBRARY "/opt/rocm/hip/lib/libamdhip64.so" CACHE STRING "HIP AMD runtime library")
add_library(rocmdrv SHARED IMPORTED GLOBAL)

set_target_properties(
rocmdrv PROPERTIES
IMPORTED_LOCATION ${SYCL_BUILD_PI_ROCM_AMD_LIBRARY}
INTERFACE_INCLUDE_DIRECTORIES "${HIP_HEADERS}"
INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${HIP_HEADERS}"
)
target_link_libraries(pi_rocm PUBLIC rocmdrv)

# Set HIP define to select AMD platform
target_compile_definitions(pi_rocm PRIVATE __HIP_PLATFORM_AMD__)
elseif("${SYCL_BUILD_PI_ROCM_PLATFORM}" STREQUAL "NVIDIA")
# Import CUDA libraries
find_package(CUDA REQUIRED)
list(APPEND HIP_HEADERS ${CUDA_INCLUDE_DIRS})

# cudadrv may be defined by the CUDA plugin
if(NOT TARGET cudadrv)
add_library(cudadrv SHARED IMPORTED GLOBAL)
set_target_properties(
cudadrv PROPERTIES
IMPORTED_LOCATION ${CUDA_CUDA_LIBRARY}
INTERFACE_INCLUDE_DIRECTORIES "${HIP_HEADERS}"
INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${HIP_HEADERS}"
)
endif()

add_library(cudart SHARED IMPORTED GLOBAL)
set_target_properties(
cudart PROPERTIES
IMPORTED_LOCATION ${CUDA_CUDART_LIBRARY}
INTERFACE_INCLUDE_DIRECTORIES "${HIP_HEADERS}"
INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${HIP_HEADERS}"
)
target_link_libraries(pi_rocm PUBLIC cudadrv cudart)

# Set HIP define to select NVIDIA platform
target_compile_definitions(pi_rocm PRIVATE __HIP_PLATFORM_NVIDIA__)
else()
message(FATAL_ERROR "Unspecified PI ROCM platform please set SYCL_BUILD_PI_ROCM_PLATFORM to 'AMD' or 'NVIDIA'")
endif()

add_common_options(pi_rocm)

Expand Down
Loading