Skip to content

Commit 0d640ea

Browse files
author
Alexander Johnston
committed
[SYCL][CUDA] Initial implementation of the CUDA backend
Contributors Alan Forbes <[email protected]> Alexander Johnston <[email protected]> Bjoern Knafla <[email protected]> Daniel Soutar <[email protected]> David Wood <[email protected]> Kumudha Narasimhan <[email protected]> Mehdi Goli <[email protected]> Przemek Malon <[email protected]> Ruyman Reyes <[email protected]> Stuart Adams <[email protected]> Svetlozar Georgiev <[email protected]> Steffen Larsen <[email protected]> Victor Lomuller <[email protected]> Signed-off-by: Alexander Johnston <[email protected]>
1 parent ba96fd7 commit 0d640ea

File tree

826 files changed

+21323
-3446
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

826 files changed

+21323
-3446
lines changed

buildbot/configure.py

Lines changed: 39 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -11,30 +11,49 @@ def do_configure(args):
1111
sycl_dir = os.path.join(args.src_dir, "sycl")
1212
spirv_dir = os.path.join(args.src_dir, "llvm-spirv")
1313
ocl_header_dir = os.path.join(args.obj_dir, "OpenCL-Headers")
14-
icd_loader_lib = ''
14+
icd_loader_lib = os.path.join(args.obj_dir, "OpenCL-ICD-Loader", "build")
15+
llvm_targets_to_build = 'X86'
16+
llvm_enable_projects = 'clang;llvm-spirv;sycl'
17+
libclc_targets_to_build = ''
18+
sycl_build_pi_cuda = 'OFF'
19+
llvm_enable_assertions = 'OFF'
1520

1621
if platform.system() == 'Linux':
17-
icd_loader_lib = os.path.join(args.obj_dir, "OpenCL-ICD-Loader", "build", "libOpenCL.so")
22+
icd_loader_lib = os.path.join(icd_loader_lib, "libOpenCL.so")
1823
else:
19-
icd_loader_lib = os.path.join(args.obj_dir, "OpenCL-ICD-Loader", "build", "OpenCL.lib")
24+
icd_loader_lib = os.path.join(icd_loader_lib, "OpenCL.lib")
25+
26+
if args.cuda:
27+
llvm_targets_to_build += ';NVPTX'
28+
llvm_enable_projects += ';libclc'
29+
libclc_targets_to_build = 'nvptx64--;nvptx64--nvidiacl'
30+
sycl_build_pi_cuda = 'ON'
31+
32+
if args.assertions:
33+
llvm_enable_assertions = 'ON'
2034

2135
install_dir = os.path.join(args.obj_dir, "install")
2236

23-
cmake_cmd = ["cmake",
24-
"-G", "Ninja",
25-
"-DCMAKE_BUILD_TYPE={}".format(args.build_type),
26-
"-DLLVM_EXTERNAL_PROJECTS=sycl;llvm-spirv;opencl-aot",
27-
"-DLLVM_EXTERNAL_SYCL_SOURCE_DIR={}".format(sycl_dir),
28-
"-DLLVM_EXTERNAL_LLVM_SPIRV_SOURCE_DIR={}".format(spirv_dir),
29-
"-DLLVM_ENABLE_PROJECTS=clang;sycl;llvm-spirv;opencl-aot",
30-
"-DOpenCL_INCLUDE_DIR={}".format(ocl_header_dir),
31-
"-DOpenCL_LIBRARY={}".format(icd_loader_lib),
32-
"-DLLVM_BUILD_TOOLS=ON",
33-
"-DSYCL_ENABLE_WERROR=ON",
34-
"-DLLVM_ENABLE_ASSERTIONS=ON",
35-
"-DCMAKE_INSTALL_PREFIX={}".format(install_dir),
36-
"-DSYCL_INCLUDE_TESTS=ON", # Explicitly include all kinds of SYCL tests.
37-
llvm_dir]
37+
cmake_cmd = [
38+
"cmake",
39+
"-G", "Ninja",
40+
"-DCMAKE_BUILD_TYPE={}".format(args.build_type),
41+
"-DLLVM_ENABLE_ASSERTIONS={}".format(llvm_enable_assertions),
42+
"-DLLVM_TARGETS_TO_BUILD={}".format(llvm_targets_to_build),
43+
"-DLLVM_EXTERNAL_PROJECTS=sycl;llvm-spirv",
44+
"-DLLVM_EXTERNAL_SYCL_SOURCE_DIR={}".format(sycl_dir),
45+
"-DLLVM_EXTERNAL_LLVM_SPIRV_SOURCE_DIR={}".format(spirv_dir),
46+
"-DLLVM_ENABLE_PROJECTS={}".format(llvm_enable_projects),
47+
"-DLIBCLC_TARGETS_TO_BUILD={}".format(libclc_targets_to_build),
48+
"-DOpenCL_INCLUDE_DIR={}".format(ocl_header_dir),
49+
"-DOpenCL_LIBRARY={}".format(icd_loader_lib),
50+
"-DSYCL_BUILD_PI_CUDA={}".format(sycl_build_pi_cuda),
51+
"-DLLVM_BUILD_TOOLS=ON",
52+
"-DSYCL_ENABLE_WERROR=ON",
53+
"-DCMAKE_INSTALL_PREFIX={}".format(install_dir),
54+
"-DSYCL_INCLUDE_TESTS=ON", # Explicitly include all kinds of SYCL tests.
55+
llvm_dir
56+
]
3857

3958
print(cmake_cmd)
4059

@@ -63,6 +82,8 @@ def main():
6382
parser.add_argument("-o", "--obj-dir", metavar="OBJ_DIR", required=True, help="build directory")
6483
parser.add_argument("-t", "--build-type",
6584
metavar="BUILD_TYPE", required=True, help="build type, debug or release")
85+
parser.add_argument("--cuda", action='store_true', help="switch from OpenCL to CUDA")
86+
parser.add_argument("--assertions", action='store_true', help="build with assertions")
6687

6788
args = parser.parse_args()
6889

@@ -74,4 +95,3 @@ def main():
7495
ret = main()
7596
exit_code = 0 if ret else 1
7697
sys.exit(exit_code)
77-

clang/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,10 @@ endif()
219219
include(CheckIncludeFile)
220220
check_include_file(sys/resource.h CLANG_HAVE_RLIMITS)
221221

222+
if(SYCL_BUILD_PI_CUDA)
223+
set(SYCL_HAVE_PI_CUDA 1)
224+
endif()
225+
222226
set(CLANG_RESOURCE_DIR "" CACHE STRING
223227
"Relative directory from the Clang binary to its resource files.")
224228

clang/include/clang/Basic/DiagnosticDriverKinds.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,9 @@ def err_drv_cuda_version_unsupported : Error<
6161
"install, pass a different GPU arch with --cuda-gpu-arch, or pass "
6262
"--no-cuda-version-check.">;
6363
def err_drv_cuda_host_arch : Error<"unsupported architecture '%0' for host compilation.">;
64+
def err_drv_no_sycl_libspirv : Error<
65+
"cannot find `libspirv-nvptx64--nvidiacl.bc`. Provide path to libspirv library via "
66+
"-fsycl-libspirv-path, or pass -fno-sycl-libspirv to build without linking with libspirv.">;
6467
def err_drv_mix_cuda_hip : Error<"Mixed Cuda and HIP compilation is not supported.">;
6568
def err_drv_invalid_thread_model_for_target : Error<
6669
"invalid thread model '%0' in '%1' for this target">;

clang/include/clang/Basic/DiagnosticIDs.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ namespace clang {
2828
// Size of each of the diagnostic categories.
2929
enum {
3030
DIAG_SIZE_COMMON = 300,
31-
DIAG_SIZE_DRIVER = 250, // 200 -> 250 for SYCL related diagnostics
31+
DIAG_SIZE_DRIVER = 210,
3232
DIAG_SIZE_FRONTEND = 150,
3333
DIAG_SIZE_SERIALIZATION = 120,
3434
DIAG_SIZE_LEX = 400,

clang/include/clang/Config/config.h.cmake

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,9 @@
8080
#cmakedefine01 CLANG_ENABLE_OBJC_REWRITER
8181
#cmakedefine01 CLANG_ENABLE_STATIC_ANALYZER
8282

83+
/* Define if we have SYCL PI CUDA support */
84+
#cmakedefine SYCL_HAVE_PI_CUDA ${SYCL_HAVE_PI_CUDA}
85+
8386
/* Spawn a new process clang.exe for the CC1 tool invocation, when necessary */
8487
#cmakedefine01 CLANG_SPAWN_CC1
8588

clang/include/clang/Driver/Options.td

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ def pedantic_Group : OptionGroup<"<pedantic group>">, Group<f_Group>,
124124
def opencl_Group : OptionGroup<"<opencl group>">, Group<f_Group>,
125125
DocName<"OpenCL flags">;
126126

127-
def sycl_Group : OptionGroup<"<sycl group>">, Group<f_Group>,
127+
def sycl_Group : OptionGroup<"<sycl group>">, Group<f_Group>,
128128
DocName<"SYCL flags">;
129129

130130
def m_Group : OptionGroup<"<m group>">, Group<CompileOnly_Group>,
@@ -1866,6 +1866,9 @@ def fsycl_help_EQ : Joined<["-"], "fsycl-help=">,
18661866
def fsycl_help : Flag<["-"], "fsycl-help">, Alias<fsycl_help_EQ>,
18671867
Flags<[DriverOption, CoreOption]>, AliasArgs<["all"]>, HelpText<"Emit help information "
18681868
"from all of the offline compilation tools">;
1869+
def fsycl_libspirv_path_EQ : Joined<["-"], "fsycl-libspirv-path=">,
1870+
Flags<[CC1Option, CoreOption]>, HelpText<"Path to libspirv library">;
1871+
def fno_sycl_libspirv : Flag<["-"], "fno-sycl-libspirv">, HelpText<"Disable check for libspirv">;
18691872
def fsyntax_only : Flag<["-"], "fsyntax-only">,
18701873
Flags<[DriverOption,CoreOption,CC1Option]>, Group<Action_Group>;
18711874
def ftabstop_EQ : Joined<["-"], "ftabstop=">, Group<f_Group>;

clang/lib/Basic/Targets/NVPTX.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,8 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
5757
.Default(32);
5858
}
5959

60-
TLSSupported = false;
60+
// FIXME: Needed for compiling SYCL to PTX.
61+
TLSSupported = Triple.getEnvironment() == llvm::Triple::SYCLDevice;
6162
VLASupported = false;
6263
AddrSpaceMap = &NVPTXAddrSpaceMap;
6364
UseAddrSpaceMapMangling = true;

clang/lib/Basic/Targets/NVPTX.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,12 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo {
141141
Opts.support("cl_khr_global_int32_extended_atomics");
142142
Opts.support("cl_khr_local_int32_base_atomics");
143143
Opts.support("cl_khr_local_int32_extended_atomics");
144+
// PTX actually supports 64 bits operations even if the Nvidia OpenCL
145+
// runtime does not report support for it.
146+
// This is required for libclc to compile 64 bits atomic functions.
147+
// FIXME: maybe we should have a way to control this ?
148+
Opts.support("cl_khr_int64_base_atomics");
149+
Opts.support("cl_khr_int64_extended_atomics");
144150
}
145151

146152
/// \returns If a target requires an address within a target specific address

clang/lib/CodeGen/BackendUtil.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -831,9 +831,6 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
831831
PerFunctionPasses.add(
832832
createTargetTransformInfoWrapperPass(getTargetIRAnalysis()));
833833

834-
if (LangOpts.SYCLIsDevice)
835-
PerModulePasses.add(createSYCLLowerWGScopePass());
836-
837834
CreatePasses(PerModulePasses, PerFunctionPasses);
838835

839836
legacy::PassManager CodeGenPasses;

clang/lib/CodeGen/CGCall.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -755,6 +755,12 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType,
755755
return *FI;
756756

757757
unsigned CC = ClangCallConvToLLVMCallConv(info.getCC());
758+
// This is required so SYCL kernels are successfully processed by tools from CUDA. Kernels
759+
// with a `spir_kernel` calling convention are ignored otherwise.
760+
if (CC == llvm::CallingConv::SPIR_KERNEL && CGM.getTriple().isNVPTX() &&
761+
getContext().getLangOpts().SYCLIsDevice) {
762+
CC = llvm::CallingConv::C;
763+
}
758764

759765
// Construct the function info. We co-allocate the ArgInfos.
760766
FI = CGFunctionInfo::create(CC, instanceMethod, chainCall, info,

clang/lib/CodeGen/CodeGenAction.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "CodeGenModule.h"
1111
#include "CoverageMappingGen.h"
1212
#include "MacroPPCallbacks.h"
13+
#include "SYCLLowerIR/LowerWGScope.h"
1314
#include "clang/AST/ASTConsumer.h"
1415
#include "clang/AST/ASTContext.h"
1516
#include "clang/AST/DeclCXX.h"
@@ -32,6 +33,7 @@
3233
#include "llvm/IR/DiagnosticPrinter.h"
3334
#include "llvm/IR/GlobalValue.h"
3435
#include "llvm/IR/LLVMContext.h"
36+
#include "llvm/IR/LegacyPassManager.h"
3537
#include "llvm/IR/Module.h"
3638
#include "llvm/IR/RemarkStreamer.h"
3739
#include "llvm/IRReader/IRReader.h"
@@ -326,6 +328,17 @@ namespace clang {
326328
CodeGenOpts.getProfileUse() != CodeGenOptions::ProfileNone)
327329
Ctx.setDiagnosticsHotnessRequested(true);
328330

331+
// The parallel_for_work_group legalization pass can emit calls to
332+
// builtins function. Definitions of those builtins can be provided in
333+
// LinkModule. We force the pass to legalize the code before the link
334+
// happens.
335+
if (LangOpts.SYCLIsDevice) {
336+
PrettyStackTraceString CrashInfo("Pre-linking SYCL passes");
337+
legacy::PassManager PreLinkingSyclPasses;
338+
PreLinkingSyclPasses.add(createSYCLLowerWGScopePass());
339+
PreLinkingSyclPasses.run(*getModule());
340+
}
341+
329342
// Link each LinkModule into our module.
330343
if (LinkInModules())
331344
return;

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,8 @@ void CodeGenModule::createSYCLRuntime() {
240240
switch (getTriple().getArch()) {
241241
case llvm::Triple::spir:
242242
case llvm::Triple::spir64:
243+
case llvm::Triple::nvptx:
244+
case llvm::Triple::nvptx64:
243245
SYCLRuntime.reset(new CGSYCLRuntime(*this));
244246
break;
245247
default:

0 commit comments

Comments
 (0)