intel · bader · Jul 14, 2021 · Jun 23, 2021 · Jul 9, 2021 · Jul 9, 2021
@@ -27,6 +27,7 @@ def do_configure(args):
     sycl_build_pi_cuda = 'OFF'
     sycl_build_pi_esimd_cpu = 'ON'
     sycl_build_pi_rocm = 'OFF'
+    sycl_build_pi_rocm_platform = 'AMD'
     sycl_werror = 'ON'
     llvm_enable_assertions = 'ON'
     llvm_enable_doxygen = 'OFF'
@@ -40,21 +41,26 @@ def do_configure(args):
     if args.arm:
         llvm_targets_to_build = 'ARM;AArch64'
 
+    if args.disable_esimd_cpu:
+        sycl_build_pi_esimd_cpu = 'OFF'
+
+    if args.cuda or args.rocm:
+        llvm_enable_projects += ';libclc'
+
     if args.cuda:
         llvm_targets_to_build += ';NVPTX'
-        llvm_enable_projects += ';libclc'
         libclc_targets_to_build = 'nvptx64--;nvptx64--nvidiacl'
         sycl_build_pi_cuda = 'ON'
 
-    if args.disable_esimd_cpu:
-        sycl_build_pi_esimd_cpu = 'OFF'
-
     if args.rocm:
-        llvm_targets_to_build += ';AMDGPU'
-        # TODO libclc should be added once,
-        # TODO when we build DPC++ with both CUDA and ROCM support
-        llvm_enable_projects += ';libclc'
-        libclc_targets_to_build = 'amdgcn--;amdgcn--amdhsa'
+        if args.rocm_platform == 'AMD':
+            llvm_targets_to_build += ';AMDGPU'
+            libclc_targets_to_build += ';amdgcn--;amdgcn--amdhsa'
+        elif args.rocm_platform == 'NVIDIA' and not args.cuda:
+            llvm_targets_to_build += ';NVPTX'
+            libclc_targets_to_build += ';nvptx64--;nvptx64--nvidiacl'
+
+        sycl_build_pi_rocm_platform = args.rocm_platform
         sycl_build_pi_rocm = 'ON'
 
     if args.no_werror:
@@ -92,6 +98,7 @@ def do_configure(args):
         "-DLIBCLC_TARGETS_TO_BUILD={}".format(libclc_targets_to_build),
         "-DSYCL_BUILD_PI_CUDA={}".format(sycl_build_pi_cuda),
         "-DSYCL_BUILD_PI_ROCM={}".format(sycl_build_pi_rocm),
+        "-DSYCL_BUILD_PI_ROCM_PLATFORM={}".format(sycl_build_pi_rocm_platform),
         "-DLLVM_BUILD_TOOLS=ON",
         "-DSYCL_ENABLE_WERROR={}".format(sycl_werror),
         "-DCMAKE_INSTALL_PREFIX={}".format(install_dir),
@@ -161,7 +168,8 @@ def main():
     parser.add_argument("-t", "--build-type",
                         metavar="BUILD_TYPE", default="Release", help="build type: Debug, Release")
     parser.add_argument("--cuda", action='store_true', help="switch from OpenCL to CUDA")
-    parser.add_argument("--rocm", action='store_true', help="swith from OpenCL to ROCM")
+    parser.add_argument("--rocm", action='store_true', help="switch from OpenCL to ROCm")
+    parser.add_argument("--rocm-platform", type=str, choices=['AMD', 'NVIDIA'], default='AMD', help="choose ROCm backend")
     parser.add_argument("--arm", action='store_true', help="build ARM support rather than x86")
     parser.add_argument("--disable-esimd-cpu", action='store_true', help="build without ESIMD_CPU support")
     parser.add_argument("--no-assertions", action='store_true', help="build without assertions")

@@ -10,6 +10,7 @@ and a wide range of compute accelerators such as GPU and FPGA.
     - [Build DPC++ toolchain with libc++ library](#build-dpc-toolchain-with-libc-library)
     - [Build DPC++ toolchain with support for NVIDIA CUDA](#build-dpc-toolchain-with-support-for-nvidia-cuda)
     - [Build DPC++ toolchain with support for AMD ROCm](#build-dpc-toolchain-with-support-for-amd-rocm)
+    - [Build DPC++ toolchain with support for NVIDIA ROCm](#build-dpc-toolchain-with-support-for-nvidia-rocm)
     - [Build Doxygen documentation](#build-doxygen-documentation)
     - [Deployment](#deployment)
   - [Use DPC++ toolchain](#use-dpc-toolchain)
@@ -107,6 +108,7 @@ flags can be found by launching the script with `--help`):
 * `--no-werror` -> Don't treat warnings as errors when compiling llvm
 * `--cuda` -> use the cuda backend (see [Nvidia CUDA](#build-dpc-toolchain-with-support-for-nvidia-cuda))
 * `--rocm` -> use the rocm backend (see [AMD ROCm](#build-dpc-toolchain-with-support-for-amd-rocm))
+* `--rocm-platform` -> select the platform used by the rocm backend, `AMD` or `NVIDIA` (see [AMD ROCm](#build-dpc-toolchain-with-support-for-amd-rocm) or see [NVIDIA ROCm](#build-dpc-toolchain-with-support-for-nvidia-rocm))
 * `--shared-libs` -> Build shared libraries
 * `-t` -> Build type (debug or release)
 * `-o` -> Path to build directory
@@ -175,6 +177,34 @@ produce a standard ELF shared code object which can be loaded and executed on an
 So if you want to support AMD ROCm, you should also build the lld project.
 [LLD Build Guide](https://lld.llvm.org/)
 
+The following CMake variables can be updated to change where CMake is looking
+for the ROCm installation:
+
+* `SYCL_BUILD_PI_ROCM_INCLUDE_DIR`: Path to HIP include directory (default
+  `/opt/rocm/hip/include`).
+* `SYCL_BUILD_PI_ROCM_HSA_INCLUDE_DIR`: Path to HSA include directory (default
+  `/opt/rocm/hsa/include`).
+* `SYCL_BUILD_PI_ROCM_AMD_LIBRARY`: Path to HIP runtime library (default
+  `/opt/rocm/hip/lib/libamdhip64.so`).
+
+### Build DPC++ toolchain with support for NVIDIA ROCm
+
+There is experimental support for DPC++ for using ROCm on NVIDIA devices.
+
+This is a compatibility feature and the [CUDA backend](#build-dpc-toolchain-with-support-for-nvidia-cuda)
+should be preferred to run on NVIDIA GPUs.
+
+To enable support for NVIDIA ROCm devices, follow the instructions for the Linux
+DPC++ toolchain, but add the `--rocm` and `--rocm-platform NVIDIA` flags to
+`configure.py`.
+
+Enabling this flag requires ROCm to be installed, more specifically
+[HIP NVCC](https://rocmdocs.amd.com/en/latest/Installation_Guide/HIP-Installation.html#nvidia-platform),
+as well as CUDA to be installed, see
+[NVIDIA CUDA Installation Guide for Linux](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html).
+
+Currently this was only tested on Linux with ROCm 4.2, CUDA 11 and a GeForce GTX
+1060 card.
 
 ### Build Doxygen documentation
 
@@ -510,7 +540,7 @@ and run following command:
 clang++ -fsycl simple-sycl-app.cpp -o simple-sycl-app.exe
 ```
 
-When building for CUDA, use the CUDA target triple as follows:
+When building for CUDA or NVIDIA ROCm, use the CUDA target triple as follows:
 
 ```bash
 clang++ -fsycl -fsycl-targets=nvptx64-nvidia-cuda-sycldevice \

@@ -1,51 +1,76 @@
-message(STATUS "Including the PI API ROCM backend.")
+# Set default PI ROCm platform to AMD
+set(SYCL_BUILD_PI_ROCM_PLATFORM "AMD" CACHE STRING "PI ROCm platform, AMD or NVIDIA")
 
- # cannot rely on cmake support for ROCM; it assumes runtime API is being used.
- # we only require the ROCM driver API to be used
- # rocm_rocm_LIBRARY variable defines the path to libhsa-runtime64.so, the ROCM Driver API library.
+message(STATUS "Including the PI API ROCM backend for ${SYCL_BUILD_PI_ROCM_PLATFORM}.")
 
-#find_package(ROCM 4.0 REQUIRED)
-
-# Make imported library global to use it within the project.
-add_library(rocmdrv SHARED IMPORTED GLOBAL)
-
-
-set(ROCM_ROCM_LIBRARY "/opt/rocm/hip/lib/libamdhip64.so")
-set(ROCM_INCLUDE_DIRS "/opt/rocm/hip/include")
-set(hsa_inc_dir "/opt/rocm/hsa/include")
-
-
-add_definitions(-D__HIP_PLATFORM_HCC__)
-
-set_target_properties(
-  rocmdrv PROPERTIES 
-    IMPORTED_LOCATION             ${ROCM_ROCM_LIBRARY}
-    INTERFACE_INCLUDE_DIRECTORIES ${ROCM_INCLUDE_DIRS}
-)
+# Set default ROCm include dirs
+set(SYCL_BUILD_PI_ROCM_INCLUDE_DIR "/opt/rocm/hip/include" CACHE STRING "HIP include dir")
+set(SYCL_BUILD_PI_ROCM_HSA_INCLUDE_DIR "/opt/rocm/hsa/include" CACHE STRING "HSA include dir")
+set(HIP_HEADERS "${SYCL_BUILD_PI_ROCM_INCLUDE_DIR};${SYCL_BUILD_PI_ROCM_HSA_INCLUDE_DIR}")
 
+# Create pi_rocm library
 add_library(pi_rocm SHARED
-   "${sycl_inc_dir}/CL/sycl/detail/pi.h"
-   "${sycl_inc_dir}/CL/sycl/detail/pi.hpp"
-   "pi_rocm.hpp"
-   "pi_rocm.cpp" 
+  "${sycl_inc_dir}/CL/sycl/detail/pi.h"
+  "${sycl_inc_dir}/CL/sycl/detail/pi.hpp"
+  "pi_rocm.hpp"
+  "pi_rocm.cpp"
 )
-
-
 add_dependencies(sycl-toolchain pi_rocm)
-
 set_target_properties(pi_rocm PROPERTIES LINKER_LANGUAGE CXX)
+target_link_libraries(pi_rocm PUBLIC OpenCL-Headers)
 
-
+# Setup include directories
 target_include_directories(pi_rocm
-        PRIVATE
-        ${sycl_inc_dir}
-        ${sycl_plugin_dir}
-        ${ROCM_INCLUDE_DIRS}
-        ${hsa_inc_dir}
+  PRIVATE
+  ${sycl_inc_dir}
+  ${sycl_plugin_dir}
 )
 
-
-target_link_libraries(pi_rocm PUBLIC OpenCL-Headers rocmdrv)
+if("${SYCL_BUILD_PI_ROCM_PLATFORM}" STREQUAL "AMD")
+  # Import HIP runtime library
+  set(SYCL_BUILD_PI_ROCM_AMD_LIBRARY "/opt/rocm/hip/lib/libamdhip64.so" CACHE STRING "HIP AMD runtime library")
+  add_library(rocmdrv SHARED IMPORTED GLOBAL)
+
+  set_target_properties(
+    rocmdrv PROPERTIES
+      IMPORTED_LOCATION                    ${SYCL_BUILD_PI_ROCM_AMD_LIBRARY}
+      INTERFACE_INCLUDE_DIRECTORIES        "${HIP_HEADERS}"
+      INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${HIP_HEADERS}"
+  )
+  target_link_libraries(pi_rocm PUBLIC rocmdrv)
+
+  # Set HIP define to select AMD platform
+  target_compile_definitions(pi_rocm PRIVATE __HIP_PLATFORM_AMD__)
+ elseif("${SYCL_BUILD_PI_ROCM_PLATFORM}" STREQUAL "NVIDIA")
+  # Import CUDA libraries
+  find_package(CUDA REQUIRED)
+  list(APPEND HIP_HEADERS ${CUDA_INCLUDE_DIRS})
+
+  # cudadrv may be defined by the CUDA plugin
+  if(NOT TARGET cudadrv)
+    add_library(cudadrv SHARED IMPORTED GLOBAL)
+    set_target_properties(
+      cudadrv PROPERTIES
+        IMPORTED_LOCATION                    ${CUDA_CUDA_LIBRARY}
+        INTERFACE_INCLUDE_DIRECTORIES        "${HIP_HEADERS}"
+        INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${HIP_HEADERS}"
+    )
+  endif()
+
+  add_library(cudart SHARED IMPORTED GLOBAL)
+  set_target_properties(
+    cudart PROPERTIES
+      IMPORTED_LOCATION                    ${CUDA_CUDART_LIBRARY}
+      INTERFACE_INCLUDE_DIRECTORIES        "${HIP_HEADERS}"
+      INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${HIP_HEADERS}"
+  )
+  target_link_libraries(pi_rocm PUBLIC cudadrv cudart)
+
+  # Set HIP define to select NVIDIA platform
+  target_compile_definitions(pi_rocm PRIVATE __HIP_PLATFORM_NVIDIA__)
+else()
+  message(FATAL_ERROR "Unspecified PI ROCM platform please set SYCL_BUILD_PI_ROCM_PLATFORM to 'AMD' or 'NVIDIA'")
+endif()
 
 add_common_options(pi_rocm)