codeplaysoftware · steffenlarsen · Feb 6, 2020 · Feb 6, 2020 · Feb 18, 2020 · Feb 7, 2020
diff --git a/buildbot/configure.py b/buildbot/configure.py
@@ -11,30 +11,49 @@ def do_configure(args):
     sycl_dir = os.path.join(args.src_dir, "sycl")
     spirv_dir = os.path.join(args.src_dir, "llvm-spirv")
     ocl_header_dir = os.path.join(args.obj_dir, "OpenCL-Headers")
-    icd_loader_lib = ''
+    icd_loader_lib = os.path.join(args.obj_dir, "OpenCL-ICD-Loader", "build")
+    llvm_targets_to_build = 'X86'
+    llvm_enable_projects = 'clang;llvm-spirv;sycl;opencl-aot'
+    libclc_targets_to_build = ''
+    sycl_build_pi_cuda = 'OFF'
+    llvm_enable_assertions = 'ON'
 
     if platform.system() == 'Linux':
-      icd_loader_lib = os.path.join(args.obj_dir, "OpenCL-ICD-Loader", "build", "libOpenCL.so")
+        icd_loader_lib = os.path.join(icd_loader_lib, "libOpenCL.so")
     else:
-      icd_loader_lib = os.path.join(args.obj_dir, "OpenCL-ICD-Loader", "build", "OpenCL.lib")
+        icd_loader_lib = os.path.join(icd_loader_lib, "OpenCL.lib")
+
+    if args.cuda:
+        llvm_targets_to_build += ';NVPTX'
+        llvm_enable_projects += ';libclc'
+        libclc_targets_to_build = 'nvptx64--;nvptx64--nvidiacl'
+        sycl_build_pi_cuda = 'ON'
+
+    if args.assertions:
+        llvm_enable_assertions = 'ON'
 
     install_dir = os.path.join(args.obj_dir, "install")
 
-    cmake_cmd = ["cmake",
-                 "-G", "Ninja",
-                 "-DCMAKE_BUILD_TYPE={}".format(args.build_type),
-                 "-DLLVM_EXTERNAL_PROJECTS=sycl;llvm-spirv;opencl-aot",
-                 "-DLLVM_EXTERNAL_SYCL_SOURCE_DIR={}".format(sycl_dir),
-                 "-DLLVM_EXTERNAL_LLVM_SPIRV_SOURCE_DIR={}".format(spirv_dir),
-                 "-DLLVM_ENABLE_PROJECTS=clang;sycl;llvm-spirv;opencl-aot",
-                 "-DOpenCL_INCLUDE_DIR={}".format(ocl_header_dir),
-                 "-DOpenCL_LIBRARY={}".format(icd_loader_lib),
-                 "-DLLVM_BUILD_TOOLS=ON",
-                 "-DSYCL_ENABLE_WERROR=ON",
-                 "-DLLVM_ENABLE_ASSERTIONS=ON",
-                 "-DCMAKE_INSTALL_PREFIX={}".format(install_dir),
-                 "-DSYCL_INCLUDE_TESTS=ON", # Explicitly include all kinds of SYCL tests.
-                 llvm_dir]
+    cmake_cmd = [
+        "cmake",
+        "-G", "Ninja",
+        "-DCMAKE_BUILD_TYPE={}".format(args.build_type),
+        "-DLLVM_ENABLE_ASSERTIONS={}".format(llvm_enable_assertions),
+        "-DLLVM_TARGETS_TO_BUILD={}".format(llvm_targets_to_build),
+        "-DLLVM_EXTERNAL_PROJECTS=sycl;llvm-spirv;opencl-aot",
+        "-DLLVM_EXTERNAL_SYCL_SOURCE_DIR={}".format(sycl_dir),
+        "-DLLVM_EXTERNAL_LLVM_SPIRV_SOURCE_DIR={}".format(spirv_dir),
+        "-DLLVM_ENABLE_PROJECTS={}".format(llvm_enable_projects),
+        "-DLIBCLC_TARGETS_TO_BUILD={}".format(libclc_targets_to_build),
+        "-DOpenCL_INCLUDE_DIR={}".format(ocl_header_dir),
+        "-DOpenCL_LIBRARY={}".format(icd_loader_lib),
+        "-DSYCL_BUILD_PI_CUDA={}".format(sycl_build_pi_cuda),
+        "-DLLVM_BUILD_TOOLS=ON",
+        "-DSYCL_ENABLE_WERROR=ON",
+        "-DCMAKE_INSTALL_PREFIX={}".format(install_dir),
+        "-DSYCL_INCLUDE_TESTS=ON", # Explicitly include all kinds of SYCL tests.
+        llvm_dir
+    ]
 
     print(cmake_cmd)
 
@@ -63,6 +82,8 @@ def main():
     parser.add_argument("-o", "--obj-dir", metavar="OBJ_DIR", required=True, help="build directory")
     parser.add_argument("-t", "--build-type",
                         metavar="BUILD_TYPE", required=True, help="build type, debug or release")
+    parser.add_argument("--cuda", action='store_true', help="switch from OpenCL to CUDA")
+    parser.add_argument("--assertions", action='store_true', help="build with assertions")
 
     args = parser.parse_args()
 
@@ -74,4 +95,3 @@ def main():
     ret = main()
     exit_code = 0 if ret else 1
     sys.exit(exit_code)
-
diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -64,6 +64,9 @@ def warn_drv_unknown_cuda_version: Warning<
   "Unknown CUDA version %0. Assuming the latest supported version %1">,
   InGroup<CudaUnknownVersion>;
 def err_drv_cuda_host_arch : Error<"unsupported architecture '%0' for host compilation.">;
+def err_drv_no_sycl_libspirv : Error<
+  "cannot find `libspirv-nvptx64--nvidiacl.bc`. Provide path to libspirv library via "
+  "-fsycl-libspirv-path, or pass -fno-sycl-libspirv to build without linking with libspirv.">;
 def err_drv_mix_cuda_hip : Error<"Mixed Cuda and HIP compilation is not supported.">;
 def err_drv_invalid_thread_model_for_target : Error<
   "invalid thread model '%0' in '%1' for this target">;

diff --git a/clang/include/clang/Basic/DiagnosticIDs.h b/clang/include/clang/Basic/DiagnosticIDs.h
@@ -28,7 +28,7 @@ namespace clang {
     // Size of each of the diagnostic categories.
     enum {
       DIAG_SIZE_COMMON        =  300,
-      DIAG_SIZE_DRIVER        =  250, // 200 -> 250 for SYCL related diagnostics
+      DIAG_SIZE_DRIVER        =  210,
       DIAG_SIZE_FRONTEND      =  150,
       DIAG_SIZE_SERIALIZATION =  120,
       DIAG_SIZE_LEX           =  400,

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
@@ -1872,6 +1872,9 @@ def fsycl_help_EQ : Joined<["-"], "fsycl-help=">,
 def fsycl_help : Flag<["-"], "fsycl-help">, Alias<fsycl_help_EQ>,
   Flags<[DriverOption, CoreOption]>, AliasArgs<["all"]>, HelpText<"Emit help information "
   "from all of the offline compilation tools">;
+def fsycl_libspirv_path_EQ : Joined<["-"], "fsycl-libspirv-path=">,
+  Flags<[CC1Option, CoreOption]>, HelpText<"Path to libspirv library">;
+def fno_sycl_libspirv : Flag<["-"], "fno-sycl-libspirv">, HelpText<"Disable check for libspirv">;
 def fsyntax_only : Flag<["-"], "fsyntax-only">,
   Flags<[DriverOption,CoreOption,CC1Option]>, Group<Action_Group>;
 def ftabstop_EQ : Joined<["-"], "ftabstop=">, Group<f_Group>;

diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -57,7 +57,8 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
                      .Default(32);
   }
 
-  TLSSupported = false;
+  // FIXME: Needed for compiling SYCL to PTX.
+  TLSSupported = Triple.getEnvironment() == llvm::Triple::SYCLDevice;
   VLASupported = false;
   AddrSpaceMap = &NVPTXAddrSpaceMap;
   UseAddrSpaceMapMangling = true;

diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h
@@ -141,6 +141,12 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo {
     Opts.support("cl_khr_global_int32_extended_atomics");
     Opts.support("cl_khr_local_int32_base_atomics");
     Opts.support("cl_khr_local_int32_extended_atomics");
+    // PTX actually supports 64 bits operations even if the Nvidia OpenCL
+    // runtime does not report support for it.
+    // This is required for libclc to compile 64 bits atomic functions.
+    // FIXME: maybe we should have a way to control this ?
+    Opts.support("cl_khr_int64_base_atomics");
+    Opts.support("cl_khr_int64_extended_atomics");
   }
 
   /// \returns If a target requires an address within a target specific address

diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
@@ -842,9 +842,6 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
   PerFunctionPasses.add(
       createTargetTransformInfoWrapperPass(getTargetIRAnalysis()));
 
-  if (LangOpts.SYCLIsDevice)
-    PerFunctionPasses.add(createSYCLLowerWGScopePass());
-
   CreatePasses(PerModulePasses, PerFunctionPasses);
 
   legacy::PassManager CodeGenPasses;

diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
@@ -755,6 +755,12 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType,
     return *FI;
 
   unsigned CC = ClangCallConvToLLVMCallConv(info.getCC());
+  // This is required so SYCL kernels are successfully processed by tools from CUDA. Kernels
+  // with a `spir_kernel` calling convention are ignored otherwise.
+  if (CC == llvm::CallingConv::SPIR_KERNEL && CGM.getTriple().isNVPTX() &&
+      getContext().getLangOpts().SYCLIsDevice) {
+    CC = llvm::CallingConv::C;
+  }
 
   // Construct the function info.  We co-allocate the ArgInfos.
   FI = CGFunctionInfo::create(CC, instanceMethod, chainCall, info,

diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp
@@ -10,6 +10,7 @@
 #include "CodeGenModule.h"
 #include "CoverageMappingGen.h"
 #include "MacroPPCallbacks.h"
+#include "SYCLLowerIR/LowerWGScope.h"
 #include "clang/AST/ASTConsumer.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/DeclCXX.h"
@@ -33,6 +34,7 @@
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/LLVMRemarkStreamer.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IRReader/IRReader.h"
 #include "llvm/Linker/Linker.h"
@@ -326,6 +328,17 @@ namespace clang {
           CodeGenOpts.getProfileUse() != CodeGenOptions::ProfileNone)
         Ctx.setDiagnosticsHotnessRequested(true);
 
+      // The parallel_for_work_group legalization pass can emit calls to
+      // builtins function. Definitions of those builtins can be provided in
+      // LinkModule. We force the pass to legalize the code before the link
+      // happens.
+      if (LangOpts.SYCLIsDevice) {
+        PrettyStackTraceString CrashInfo("Pre-linking SYCL passes");
+        legacy::PassManager PreLinkingSyclPasses;
+        PreLinkingSyclPasses.add(createSYCLLowerWGScopePass());
+        PreLinkingSyclPasses.run(*getModule());
+      }
+
       // Link each LinkModule into our module.
       if (LinkInModules())
         return;

diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -240,6 +240,8 @@ void CodeGenModule::createSYCLRuntime() {
   switch (getTriple().getArch()) {
   case llvm::Triple::spir:
   case llvm::Triple::spir64:
+  case llvm::Triple::nvptx:
+  case llvm::Triple::nvptx64:
     SYCLRuntime.reset(new CGSYCLRuntime(*this));
     break;
   default: