Skip to content

Commit ff947d1

Browse files
Steffen Larsenaidan.belton
authored andcommitted
[SYCL][CUDA] Add support for MSVC
Signed-off-by: Steffen Larsen <[email protected]>
1 parent a0d9c40 commit ff947d1

File tree

15 files changed

+143
-67
lines changed

15 files changed

+143
-67
lines changed

clang/lib/Driver/ToolChains/Cuda.cpp

Lines changed: 34 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ CudaVersion getCudaVersion(uint32_t raw_version) {
7777
return CudaVersion::CUDA_110;
7878
if (raw_version < 11020)
7979
return CudaVersion::CUDA_111;
80+
if (raw_version < 11030)
81+
return CudaVersion::CUDA_112;
8082
return CudaVersion::LATEST;
8183
}
8284

@@ -131,7 +133,9 @@ CudaInstallationDetector::CudaInstallationDetector(
131133
SmallVector<Candidate, 4> Candidates;
132134

133135
// In decreasing order so we prefer newer versions to older versions.
134-
std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"};
136+
std::initializer_list<const char *> Versions = {
137+
"11.4", "11.3", "11.2", "11.1", "10.2", "10.1", "10.0",
138+
"9.2", "9.1", "9.0", "8.0", "7.5", "7.0"};
135139
auto &FS = D.getVFS();
136140

137141
if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) {
@@ -193,18 +197,27 @@ CudaInstallationDetector::CudaInstallationDetector(
193197
if (CheckLibDevice && !FS.exists(LibDevicePath))
194198
continue;
195199

196-
// On Linux, we have both lib and lib64 directories, and we need to choose
197-
// based on our triple. On MacOS, we have only a lib directory.
198-
//
199-
// It's sufficient for our purposes to be flexible: If both lib and lib64
200-
// exist, we choose whichever one matches our triple. Otherwise, if only
201-
// lib exists, we use it.
202-
if (HostTriple.isArch64Bit() && FS.exists(InstallPath + "/lib64"))
203-
LibPath = InstallPath + "/lib64";
204-
else if (FS.exists(InstallPath + "/lib"))
205-
LibPath = InstallPath + "/lib";
206-
else
207-
continue;
200+
if (HostTriple.isOSWindows()) {
201+
if (HostTriple.isArch64Bit() && FS.exists(InstallPath + "/lib/x64"))
202+
LibPath = InstallPath + "/lib/x64";
203+
else if (FS.exists(InstallPath + "/lib/Win32"))
204+
LibPath = InstallPath + "/lib/Win32";
205+
else
206+
continue;
207+
} else {
208+
// On Linux, we have both lib and lib64 directories, and we need to choose
209+
// based on our triple. On MacOS, we have only a lib directory.
210+
//
211+
// It's sufficient for our purposes to be flexible: If both lib and lib64
212+
// exist, we choose whichever one matches our triple. Otherwise, if only
213+
// lib exists, we use it.
214+
if (HostTriple.isArch64Bit() && FS.exists(InstallPath + "/lib64"))
215+
LibPath = InstallPath + "/lib64";
216+
else if (FS.exists(InstallPath + "/lib"))
217+
LibPath = InstallPath + "/lib";
218+
else
219+
continue;
220+
}
208221

209222
CudaVersionInfo VersionInfo = {"", CudaVersion::UNKNOWN};
210223
if (auto VersionFile = FS.getBufferForFile(InstallPath + "/version.txt"))
@@ -722,7 +735,14 @@ void CudaToolChain::addClangTargetOptions(
722735
llvm::sys::path::append(WithInstallPath, Twine("../../../share/clc"));
723736
LibraryPaths.emplace_back(WithInstallPath.c_str());
724737

725-
std::string LibSpirvTargetName = "libspirv-nvptx64--nvidiacl.bc";
738+
// Select remangled libclc variant. 64-bit longs default, 32-bit longs on
739+
// Windows
740+
std::string LibSpirvTargetName =
741+
"remangled-l64-signed_char.libspirv-nvptx64--nvidiacl.bc";
742+
if (HostTC.getTriple().isOSWindows())
743+
LibSpirvTargetName =
744+
"remangled-l32-signed_char.libspirv-nvptx64--nvidiacl.bc";
745+
726746
for (StringRef LibraryPath : LibraryPaths) {
727747
SmallString<128> LibSpirvTargetFile(LibraryPath);
728748
llvm::sys::path::append(LibSpirvTargetFile, LibSpirvTargetName);

libclc/cmake/modules/HandleInLLVMTree.cmake

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,25 @@
11
macro(configure_in_llvm_tree)
2-
set(LLVM_CLANG ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
3-
set(LLVM_AS ${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-as)
4-
set(LLVM_LINK ${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-link)
5-
set(LLVM_OPT ${LLVM_RUNTIME_OUTPUT_INTDIR}/opt)
2+
set(LLVM_CLANG ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang${CMAKE_EXECUTABLE_SUFFIX})
3+
set(LLVM_AS ${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-as${CMAKE_EXECUTABLE_SUFFIX})
4+
set(LLVM_LINK ${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-link${CMAKE_EXECUTABLE_SUFFIX})
5+
set(LLVM_OPT ${LLVM_RUNTIME_OUTPUT_INTDIR}/opt${CMAKE_EXECUTABLE_SUFFIX})
6+
set(LIBCLC_REMANGLER ${LLVM_RUNTIME_OUTPUT_INTDIR}/libclc-remangler${CMAKE_EXECUTABLE_SUFFIX})
67

7-
if (NOT EXISTS ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
8-
file(WRITE ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang "" )
9-
endif (NOT EXISTS ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
10-
if (NOT EXISTS ${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-as)
11-
file(WRITE ${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-as "" )
12-
endif (NOT EXISTS ${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-as)
13-
if (NOT EXISTS ${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-link)
14-
file(WRITE ${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-link "" )
15-
endif (NOT EXISTS ${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-link)
16-
if (NOT EXISTS ${LLVM_RUNTIME_OUTPUT_INTDIR}/opt)
17-
file(WRITE ${LLVM_RUNTIME_OUTPUT_INTDIR}/opt "" )
18-
endif (NOT EXISTS ${LLVM_RUNTIME_OUTPUT_INTDIR}/opt)
8+
if (NOT EXISTS ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang${CMAKE_EXECUTABLE_SUFFIX})
9+
file(WRITE ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang${CMAKE_EXECUTABLE_SUFFIX} "" )
10+
endif (NOT EXISTS ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang${CMAKE_EXECUTABLE_SUFFIX})
11+
if (NOT EXISTS ${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-as${CMAKE_EXECUTABLE_SUFFIX})
12+
file(WRITE ${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-as${CMAKE_EXECUTABLE_SUFFIX} "" )
13+
endif (NOT EXISTS ${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-as${CMAKE_EXECUTABLE_SUFFIX})
14+
if (NOT EXISTS ${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-link${CMAKE_EXECUTABLE_SUFFIX})
15+
file(WRITE ${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-link${CMAKE_EXECUTABLE_SUFFIX} "" )
16+
endif (NOT EXISTS ${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-link${CMAKE_EXECUTABLE_SUFFIX})
17+
if (NOT EXISTS ${LLVM_RUNTIME_OUTPUT_INTDIR}/opt${CMAKE_EXECUTABLE_SUFFIX})
18+
file(WRITE ${LLVM_RUNTIME_OUTPUT_INTDIR}/opt${CMAKE_EXECUTABLE_SUFFIX} "" )
19+
endif (NOT EXISTS ${LLVM_RUNTIME_OUTPUT_INTDIR}/opt${CMAKE_EXECUTABLE_SUFFIX})
20+
if (NOT EXISTS ${LLVM_RUNTIME_OUTPUT_INTDIR}/libclc-remangler${CMAKE_EXECUTABLE_SUFFIX})
21+
file(WRITE ${LLVM_RUNTIME_OUTPUT_INTDIR}/libclc-remangler${CMAKE_EXECUTABLE_SUFFIX} "" )
22+
endif (NOT EXISTS ${LLVM_RUNTIME_OUTPUT_INTDIR}/libclc-remangler${CMAKE_EXECUTABLE_SUFFIX})
1923

2024
# Assume all works well
2125
# We can't test the compilers as they haven't been built yet

libclc/utils/prepare-builtins.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,17 @@ int main(int argc, char **argv) {
7575
if (NamedMDNode *OCLVersion = M->getNamedMetadata("opencl.ocl.version"))
7676
M->eraseNamedMetadata(OCLVersion);
7777

78+
// Drop wchar_size module flag
79+
if (M->getModuleFlag("wchar_size")) {
80+
SmallVector<Module::ModuleFlagEntry, 4> ModuleFlags;
81+
M->getModuleFlagsMetadata(ModuleFlags);
82+
M->getModuleFlagsMetadata()->clearOperands();
83+
for (const Module::ModuleFlagEntry ModuleFlag : ModuleFlags)
84+
if (ModuleFlag.Key->getString() != "wchar_size")
85+
M->addModuleFlag(ModuleFlag.Behavior, ModuleFlag.Key->getString(),
86+
ModuleFlag.Val);
87+
}
88+
7889
// Set linkage of every external definition to linkonce_odr.
7990
for (Module::iterator i = M->begin(), e = M->end(); i != e; ++i) {
8091
if (!i->isDeclaration() && i->getLinkage() == GlobalValue::ExternalLinkage)

sycl/include/CL/sycl/detail/pi.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,13 @@
1414
#pragma once
1515

1616
#include <CL/sycl/backend_types.hpp>
17-
#include <CL/sycl/detail/common.hpp>
1817
#include <CL/sycl/detail/export.hpp>
1918
#include <CL/sycl/detail/os_util.hpp>
2019
#include <CL/sycl/detail/pi.h>
2120

2221
#include <cassert>
2322
#include <cstdint>
23+
#include <memory>
2424
#include <sstream>
2525
#include <string>
2626
#include <vector>

sycl/plugins/cuda/CMakeLists.txt

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,19 @@ find_package(CUDA 10.1 REQUIRED)
99
# Make imported library global to use it within the project.
1010
add_library(cudadrv SHARED IMPORTED GLOBAL)
1111

12-
set_target_properties(
13-
cudadrv PROPERTIES
14-
IMPORTED_LOCATION ${CUDA_CUDA_LIBRARY}
15-
INTERFACE_INCLUDE_DIRECTORIES ${CUDA_INCLUDE_DIRS}
16-
)
12+
if (WIN32)
13+
set_target_properties(
14+
cudadrv PROPERTIES
15+
IMPORTED_IMPLIB ${CUDA_CUDA_LIBRARY}
16+
INTERFACE_INCLUDE_DIRECTORIES ${CUDA_INCLUDE_DIRS}
17+
)
18+
else()
19+
set_target_properties(
20+
cudadrv PROPERTIES
21+
IMPORTED_LOCATION ${CUDA_CUDA_LIBRARY}
22+
INTERFACE_INCLUDE_DIRECTORIES ${CUDA_INCLUDE_DIRS}
23+
)
24+
endif()
1725

1826
add_library(pi_cuda SHARED
1927
"${sycl_inc_dir}/CL/sycl/detail/pi.h"
@@ -37,6 +45,24 @@ target_link_libraries(pi_cuda
3745
cudadrv
3846
)
3947

48+
if (MSVC)
49+
# by defining __SYCL_BUILD_SYCL_DLL, we can use __declspec(dllexport)
50+
# which are individually tagged for all pi* symbols in pi.h
51+
target_compile_definitions(pi_cuda PRIVATE __SYCL_BUILD_SYCL_DLL)
52+
else()
53+
# we set the visibility of all symbols 'hidden' by default.
54+
# In pi.h file, we set exported symbols with visibility==default individually
55+
target_compile_options(pi_cuda PUBLIC -fvisibility=hidden)
56+
57+
# This script file is used to allow exporting pi* symbols only.
58+
# All other symbols are regarded as local (hidden)
59+
set(linker_script "${CMAKE_CURRENT_SOURCE_DIR}/../ld-version-script.txt")
60+
61+
# Filter symbols based on the scope defined in the script file,
62+
# and export pi* function symbols in the library.
63+
target_link_libraries(pi_cuda PRIVATE "-Wl,--version-script=${linker_script}")
64+
endif()
65+
4066
add_common_options(pi_cuda)
4167

4268
install(TARGETS pi_cuda

sycl/plugins/cuda/pi_cuda.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -680,10 +680,10 @@ pi_result cuda_piPlatformsGet(pi_uint32 num_entries, pi_platform *platforms,
680680
static pi_uint32 numPlatforms = 1;
681681
static _pi_platform platformId;
682682

683-
if (num_entries == 0 and platforms != nullptr) {
683+
if (num_entries == 0 && platforms != nullptr) {
684684
return PI_INVALID_VALUE;
685685
}
686-
if (platforms == nullptr and num_platforms == nullptr) {
686+
if (platforms == nullptr && num_platforms == nullptr) {
687687
return PI_INVALID_VALUE;
688688
}
689689

@@ -4480,7 +4480,7 @@ pi_result cuda_piextUSMFree(pi_context context, void *ptr) {
44804480
CU_POINTER_ATTRIBUTE_MEMORY_TYPE};
44814481
result = PI_CHECK_ERROR(cuPointerGetAttributes(
44824482
2, attributes, attribute_values, (CUdeviceptr)ptr));
4483-
assert(type == CU_MEMORYTYPE_DEVICE or type == CU_MEMORYTYPE_HOST);
4483+
assert(type == CU_MEMORYTYPE_DEVICE || type == CU_MEMORYTYPE_HOST);
44844484
if (is_managed || type == CU_MEMORYTYPE_DEVICE) {
44854485
// Memory allocated with cuMemAlloc and cuMemAllocManaged must be freed
44864486
// with cuMemFree
@@ -4684,7 +4684,7 @@ pi_result cuda_piextUSMGetMemAllocInfo(pi_context context, const void *ptr,
46844684
}
46854685
result = PI_CHECK_ERROR(cuPointerGetAttribute(
46864686
&value, CU_POINTER_ATTRIBUTE_MEMORY_TYPE, (CUdeviceptr)ptr));
4687-
assert(value == CU_MEMORYTYPE_DEVICE or value == CU_MEMORYTYPE_HOST);
4687+
assert(value == CU_MEMORYTYPE_DEVICE || value == CU_MEMORYTYPE_HOST);
46884688
if (value == CU_MEMORYTYPE_DEVICE) {
46894689
// pointer to device memory
46904690
return getInfo(param_value_size, param_value, param_value_size_ret,
@@ -4696,7 +4696,11 @@ pi_result cuda_piextUSMGetMemAllocInfo(pi_context context, const void *ptr,
46964696
PI_MEM_TYPE_HOST);
46974697
}
46984698
// should never get here
4699+
#ifdef _MSC_VER
4700+
__assume(0);
4701+
#else
46994702
__builtin_unreachable();
4703+
#endif
47004704
return getInfo(param_value_size, param_value, param_value_size_ret,
47014705
PI_MEM_TYPE_UNKNOWN);
47024706
}

sycl/plugins/level_zero/CMakeLists.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,15 @@ if (NOT DEFINED LEVEL_ZERO_LIBRARY OR NOT DEFINED LEVEL_ZERO_INCLUDE_DIR)
5858
DEPENDEES install
5959
)
6060

61+
if (WIN32)
62+
# Copy DLL into binary directory
63+
ExternalProject_Add_Step(level-zero-loader llvmbininstall
64+
COMMAND ${CMAKE_COMMAND} -E copy_directory <INSTALL_DIR>/bin/ ${LLVM_BINARY_DIR}/bin
65+
COMMENT "Installing level-zero-loader into the LLVM binary directory"
66+
DEPENDEES install
67+
)
68+
endif()
69+
6170
install(DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/level_zero_loader_install/"
6271
DESTINATION "."
6372
COMPONENT level-zero-loader

sycl/unittests/pi/TestGetPlugin.hpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,11 @@
88
#include <algorithm>
99
#include <detail/plugin.hpp>
1010
#include <functional>
11+
#include <optional>
1112

1213
namespace pi {
13-
inline cl::sycl::detail::plugin *initializeAndGet(cl::sycl::backend backend) {
14+
inline std::optional<cl::sycl::detail::plugin>
15+
initializeAndGet(cl::sycl::backend backend) {
1416
auto plugins = cl::sycl::detail::pi::initialize();
1517
auto it = std::find_if(plugins.begin(), plugins.end(),
1618
[=](cl::sycl::detail::plugin p) -> bool {
@@ -20,9 +22,9 @@ inline cl::sycl::detail::plugin *initializeAndGet(cl::sycl::backend backend) {
2022
std::string msg = GetBackendString(backend);
2123
msg += " PI plugin not found!";
2224
std::cerr << "Warning: " << msg << " Tests using it will be skipped.\n";
23-
return nullptr;
25+
return std::nullopt;
2426
}
25-
return &*it;
27+
return std::optional<cl::sycl::detail::plugin>(*it);
2628
}
2729

2830
inline std::vector<cl::sycl::detail::plugin> initializeAndRemoveInvalid() {

sycl/unittests/pi/cuda/test_base_objects.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,11 @@ using namespace cl::sycl;
2424

2525
class CudaBaseObjectsTest : public ::testing::Test {
2626
protected:
27-
detail::plugin *plugin = pi::initializeAndGet(backend::cuda);
27+
std::optional<detail::plugin> plugin = pi::initializeAndGet(backend::cuda);
2828

2929
void SetUp() override {
3030
// skip the tests if the CUDA backend is not available
31-
if (!plugin) {
31+
if (!plugin.has_value()) {
3232
GTEST_SKIP();
3333
}
3434
}

sycl/unittests/pi/cuda/test_commands.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ using namespace cl::sycl;
2121
struct CudaCommandsTest : public ::testing::Test {
2222

2323
protected:
24-
detail::plugin *plugin = pi::initializeAndGet(backend::cuda);
24+
std::optional<detail::plugin> plugin = pi::initializeAndGet(backend::cuda);
2525

2626
pi_platform platform_;
2727
pi_device device_;
@@ -30,7 +30,7 @@ struct CudaCommandsTest : public ::testing::Test {
3030

3131
void SetUp() override {
3232
// skip the tests if the CUDA backend is not available
33-
if (!plugin) {
33+
if (!plugin.has_value()) {
3434
GTEST_SKIP();
3535
}
3636

@@ -65,7 +65,7 @@ struct CudaCommandsTest : public ::testing::Test {
6565
}
6666

6767
void TearDown() override {
68-
if (plugin) {
68+
if (plugin.has_value()) {
6969
plugin->call<detail::PiApiKind::piQueueRelease>(queue_);
7070
plugin->call<detail::PiApiKind::piContextRelease>(context_);
7171
}

sycl/unittests/pi/cuda/test_device.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,15 @@ using namespace cl::sycl;
2121
struct CudaDeviceTests : public ::testing::Test {
2222

2323
protected:
24-
detail::plugin *plugin = pi::initializeAndGet(backend::cuda);
24+
std::optional<detail::plugin> plugin = pi::initializeAndGet(backend::cuda);
2525

2626
pi_platform platform_;
2727
pi_device device_;
2828
pi_context context_;
2929

3030
void SetUp() override {
3131
// skip the tests if the CUDA backend is not available
32-
if (!plugin) {
32+
if (!plugin.has_value()) {
3333
GTEST_SKIP();
3434
}
3535

@@ -56,7 +56,7 @@ struct CudaDeviceTests : public ::testing::Test {
5656
}
5757

5858
void TearDown() override {
59-
if (plugin) {
59+
if (plugin.has_value()) {
6060
plugin->call<detail::PiApiKind::piDeviceRelease>(device_);
6161
plugin->call<detail::PiApiKind::piContextRelease>(context_);
6262
}

sycl/unittests/pi/cuda/test_kernels.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,15 @@ using namespace cl::sycl;
2424
struct CudaKernelsTest : public ::testing::Test {
2525

2626
protected:
27-
detail::plugin *plugin = pi::initializeAndGet(backend::cuda);
27+
std::optional<detail::plugin> plugin = pi::initializeAndGet(backend::cuda);
2828
pi_platform platform_;
2929
pi_device device_;
3030
pi_context context_;
3131
pi_queue queue_;
3232

3333
void SetUp() override {
3434
// skip the tests if the CUDA backend is not available
35-
if (!plugin) {
35+
if (!plugin.has_value()) {
3636
GTEST_SKIP();
3737
}
3838

@@ -65,7 +65,7 @@ struct CudaKernelsTest : public ::testing::Test {
6565
}
6666

6767
void TearDown() override {
68-
if (plugin) {
68+
if (plugin.has_value()) {
6969
plugin->call<detail::PiApiKind::piDeviceRelease>(device_);
7070
plugin->call<detail::PiApiKind::piQueueRelease>(queue_);
7171
plugin->call<detail::PiApiKind::piContextRelease>(context_);

0 commit comments

Comments
 (0)