Skip to content

[PI][CUDA] Implementation of piEventSetCallback with tests #16

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 24 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
3939cf7
[SYCL][LIBCLC] Additional libclc builtins to support SYCL work
Feb 6, 2020
c1e1088
[SYCL] CMake and lit support for SYCL CUDA backend
Feb 6, 2020
856dfdc
[SYCL][CUDA] Add SYCL CUDA support to clang driver
Feb 18, 2020
e6bf942
[SYCL] Local Accessor Support for CUDA
Feb 7, 2020
9fa80ce
[SYCL][CUDA] Change __spirv_BuiltIn.. to functions
Feb 7, 2020
339ca5a
[SYCL][CUDA] Initial Implementation of the CUDA backend
Feb 21, 2020
a64df74
[SYCL] Update libclc install rules
Feb 3, 2020
63c5d73
[SYCL][CUDA] Inline cl namespace to simplify SYCL API usage
fwyzard Feb 3, 2020
f9c308a
Added missing flags for device-side builtins
Ruyk Feb 10, 2020
d5d56d2
[SYCL][CUDA] Removing unnecessary tool from the tree
Ruyk Feb 10, 2020
eb44fb1
[SYCL][PI] Fix kernel group info parameter conversion
Feb 12, 2020
d5b4cde
[SYCL] Changed CUDA unit tests to call through plugin
Feb 18, 2020
e4de6c1
[SYCL] Have default_selector consider SYCL_BE
Feb 14, 2020
5668962
[SYCL] Select GlobalPlugin based on SYCL_BE
Feb 17, 2020
f24b61c
[SYCL] Improve default device selection checks
Feb 17, 2020
607e48a
[SYCL] Formatting update for device_selector.cpp
Feb 18, 2020
77ef0af
[SYCL][CUDA] Refactor __SYCL_INLINE macro
fwyzard Feb 13, 2020
22f31bc
[SYCL][CUDA] Code style and cleanup to CUDA support
Feb 21, 2020
df1e9a0
[SYCL] Pass SYCL_BE=PI_OPENCL in check-sycl
Feb 20, 2020
80bc7f9
[SYCL][CUDA] Remove PI_CUDA specific details from clang
Feb 20, 2020
a56728f
[SYCL][CUDA] Disable linear_id/opencl-interop.cpp for cuda
Feb 20, 2020
3cf8625
[SYCL][CUDA] Further fixes to CUDA device selection
Feb 20, 2020
b7ed055
[SYCL] Enable asserts in all buildbot builds
Feb 21, 2020
836439a
[PI][CUDA] Implementation of piEventSetCallback with tests
nyalloc Dec 12, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
58 changes: 39 additions & 19 deletions buildbot/configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,30 +11,49 @@ def do_configure(args):
sycl_dir = os.path.join(args.src_dir, "sycl")
spirv_dir = os.path.join(args.src_dir, "llvm-spirv")
ocl_header_dir = os.path.join(args.obj_dir, "OpenCL-Headers")
icd_loader_lib = ''
icd_loader_lib = os.path.join(args.obj_dir, "OpenCL-ICD-Loader", "build")
llvm_targets_to_build = 'X86'
llvm_enable_projects = 'clang;llvm-spirv;sycl;opencl-aot'
libclc_targets_to_build = ''
sycl_build_pi_cuda = 'OFF'
llvm_enable_assertions = 'ON'

if platform.system() == 'Linux':
icd_loader_lib = os.path.join(args.obj_dir, "OpenCL-ICD-Loader", "build", "libOpenCL.so")
icd_loader_lib = os.path.join(icd_loader_lib, "libOpenCL.so")
else:
icd_loader_lib = os.path.join(args.obj_dir, "OpenCL-ICD-Loader", "build", "OpenCL.lib")
icd_loader_lib = os.path.join(icd_loader_lib, "OpenCL.lib")

if args.cuda:
llvm_targets_to_build += ';NVPTX'
llvm_enable_projects += ';libclc'
libclc_targets_to_build = 'nvptx64--;nvptx64--nvidiacl'
sycl_build_pi_cuda = 'ON'

if args.assertions:
llvm_enable_assertions = 'ON'

install_dir = os.path.join(args.obj_dir, "install")

cmake_cmd = ["cmake",
"-G", "Ninja",
"-DCMAKE_BUILD_TYPE={}".format(args.build_type),
"-DLLVM_EXTERNAL_PROJECTS=sycl;llvm-spirv;opencl-aot",
"-DLLVM_EXTERNAL_SYCL_SOURCE_DIR={}".format(sycl_dir),
"-DLLVM_EXTERNAL_LLVM_SPIRV_SOURCE_DIR={}".format(spirv_dir),
"-DLLVM_ENABLE_PROJECTS=clang;sycl;llvm-spirv;opencl-aot",
"-DOpenCL_INCLUDE_DIR={}".format(ocl_header_dir),
"-DOpenCL_LIBRARY={}".format(icd_loader_lib),
"-DLLVM_BUILD_TOOLS=ON",
"-DSYCL_ENABLE_WERROR=ON",
"-DLLVM_ENABLE_ASSERTIONS=ON",
"-DCMAKE_INSTALL_PREFIX={}".format(install_dir),
"-DSYCL_INCLUDE_TESTS=ON", # Explicitly include all kinds of SYCL tests.
llvm_dir]
cmake_cmd = [
"cmake",
"-G", "Ninja",
"-DCMAKE_BUILD_TYPE={}".format(args.build_type),
"-DLLVM_ENABLE_ASSERTIONS={}".format(llvm_enable_assertions),
"-DLLVM_TARGETS_TO_BUILD={}".format(llvm_targets_to_build),
"-DLLVM_EXTERNAL_PROJECTS=sycl;llvm-spirv;opencl-aot",
"-DLLVM_EXTERNAL_SYCL_SOURCE_DIR={}".format(sycl_dir),
"-DLLVM_EXTERNAL_LLVM_SPIRV_SOURCE_DIR={}".format(spirv_dir),
"-DLLVM_ENABLE_PROJECTS={}".format(llvm_enable_projects),
"-DLIBCLC_TARGETS_TO_BUILD={}".format(libclc_targets_to_build),
"-DOpenCL_INCLUDE_DIR={}".format(ocl_header_dir),
"-DOpenCL_LIBRARY={}".format(icd_loader_lib),
"-DSYCL_BUILD_PI_CUDA={}".format(sycl_build_pi_cuda),
"-DLLVM_BUILD_TOOLS=ON",
"-DSYCL_ENABLE_WERROR=ON",
"-DCMAKE_INSTALL_PREFIX={}".format(install_dir),
"-DSYCL_INCLUDE_TESTS=ON", # Explicitly include all kinds of SYCL tests.
llvm_dir
]

print(cmake_cmd)

Expand Down Expand Up @@ -63,6 +82,8 @@ def main():
parser.add_argument("-o", "--obj-dir", metavar="OBJ_DIR", required=True, help="build directory")
parser.add_argument("-t", "--build-type",
metavar="BUILD_TYPE", required=True, help="build type, debug or release")
parser.add_argument("--cuda", action='store_true', help="switch from OpenCL to CUDA")
parser.add_argument("--assertions", action='store_true', help="build with assertions")

args = parser.parse_args()

Expand All @@ -74,4 +95,3 @@ def main():
ret = main()
exit_code = 0 if ret else 1
sys.exit(exit_code)

3 changes: 3 additions & 0 deletions clang/include/clang/Basic/DiagnosticDriverKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ def warn_drv_unknown_cuda_version: Warning<
"Unknown CUDA version %0. Assuming the latest supported version %1">,
InGroup<CudaUnknownVersion>;
def err_drv_cuda_host_arch : Error<"unsupported architecture '%0' for host compilation.">;
def err_drv_no_sycl_libspirv : Error<
"cannot find `libspirv-nvptx64--nvidiacl.bc`. Provide path to libspirv library via "
"-fsycl-libspirv-path, or pass -fno-sycl-libspirv to build without linking with libspirv.">;
def err_drv_mix_cuda_hip : Error<"Mixed Cuda and HIP compilation is not supported.">;
def err_drv_invalid_thread_model_for_target : Error<
"invalid thread model '%0' in '%1' for this target">;
Expand Down
2 changes: 1 addition & 1 deletion clang/include/clang/Basic/DiagnosticIDs.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ namespace clang {
// Size of each of the diagnostic categories.
enum {
DIAG_SIZE_COMMON = 300,
DIAG_SIZE_DRIVER = 250, // 200 -> 250 for SYCL related diagnostics
DIAG_SIZE_DRIVER = 210,
DIAG_SIZE_FRONTEND = 150,
DIAG_SIZE_SERIALIZATION = 120,
DIAG_SIZE_LEX = 400,
Expand Down
3 changes: 3 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -1872,6 +1872,9 @@ def fsycl_help_EQ : Joined<["-"], "fsycl-help=">,
def fsycl_help : Flag<["-"], "fsycl-help">, Alias<fsycl_help_EQ>,
Flags<[DriverOption, CoreOption]>, AliasArgs<["all"]>, HelpText<"Emit help information "
"from all of the offline compilation tools">;
def fsycl_libspirv_path_EQ : Joined<["-"], "fsycl-libspirv-path=">,
Flags<[CC1Option, CoreOption]>, HelpText<"Path to libspirv library">;
def fno_sycl_libspirv : Flag<["-"], "fno-sycl-libspirv">, HelpText<"Disable check for libspirv">;
def fsyntax_only : Flag<["-"], "fsyntax-only">,
Flags<[DriverOption,CoreOption,CC1Option]>, Group<Action_Group>;
def ftabstop_EQ : Joined<["-"], "ftabstop=">, Group<f_Group>;
Expand Down
3 changes: 2 additions & 1 deletion clang/lib/Basic/Targets/NVPTX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
.Default(32);
}

TLSSupported = false;
// FIXME: Needed for compiling SYCL to PTX.
TLSSupported = Triple.getEnvironment() == llvm::Triple::SYCLDevice;
VLASupported = false;
AddrSpaceMap = &NVPTXAddrSpaceMap;
UseAddrSpaceMapMangling = true;
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/Basic/Targets/NVPTX.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,12 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo {
Opts.support("cl_khr_global_int32_extended_atomics");
Opts.support("cl_khr_local_int32_base_atomics");
Opts.support("cl_khr_local_int32_extended_atomics");
// PTX actually supports 64 bits operations even if the Nvidia OpenCL
// runtime does not report support for it.
// This is required for libclc to compile 64 bits atomic functions.
// FIXME: maybe we should have a way to control this ?
Opts.support("cl_khr_int64_base_atomics");
Opts.support("cl_khr_int64_extended_atomics");
}

/// \returns If a target requires an address within a target specific address
Expand Down
3 changes: 0 additions & 3 deletions clang/lib/CodeGen/BackendUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -842,9 +842,6 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
PerFunctionPasses.add(
createTargetTransformInfoWrapperPass(getTargetIRAnalysis()));

if (LangOpts.SYCLIsDevice)
PerFunctionPasses.add(createSYCLLowerWGScopePass());

CreatePasses(PerModulePasses, PerFunctionPasses);

legacy::PassManager CodeGenPasses;
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/CodeGen/CGCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -755,6 +755,12 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType,
return *FI;

unsigned CC = ClangCallConvToLLVMCallConv(info.getCC());
// This is required so SYCL kernels are successfully processed by tools from CUDA. Kernels
// with a `spir_kernel` calling convention are ignored otherwise.
if (CC == llvm::CallingConv::SPIR_KERNEL && CGM.getTriple().isNVPTX() &&
getContext().getLangOpts().SYCLIsDevice) {
CC = llvm::CallingConv::C;
}

// Construct the function info. We co-allocate the ArgInfos.
FI = CGFunctionInfo::create(CC, instanceMethod, chainCall, info,
Expand Down
13 changes: 13 additions & 0 deletions clang/lib/CodeGen/CodeGenAction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "CodeGenModule.h"
#include "CoverageMappingGen.h"
#include "MacroPPCallbacks.h"
#include "SYCLLowerIR/LowerWGScope.h"
#include "clang/AST/ASTConsumer.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/DeclCXX.h"
Expand All @@ -33,6 +34,7 @@
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/LLVMRemarkStreamer.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/Linker/Linker.h"
Expand Down Expand Up @@ -326,6 +328,17 @@ namespace clang {
CodeGenOpts.getProfileUse() != CodeGenOptions::ProfileNone)
Ctx.setDiagnosticsHotnessRequested(true);

// The parallel_for_work_group legalization pass can emit calls to
// builtins function. Definitions of those builtins can be provided in
// LinkModule. We force the pass to legalize the code before the link
// happens.
if (LangOpts.SYCLIsDevice) {
PrettyStackTraceString CrashInfo("Pre-linking SYCL passes");
legacy::PassManager PreLinkingSyclPasses;
PreLinkingSyclPasses.add(createSYCLLowerWGScopePass());
PreLinkingSyclPasses.run(*getModule());
}

// Link each LinkModule into our module.
if (LinkInModules())
return;
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/CodeGen/CodeGenModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,8 @@ void CodeGenModule::createSYCLRuntime() {
switch (getTriple().getArch()) {
case llvm::Triple::spir:
case llvm::Triple::spir64:
case llvm::Triple::nvptx:
case llvm::Triple::nvptx64:
SYCLRuntime.reset(new CGSYCLRuntime(*this));
break;
default:
Expand Down
Loading