Skip to content

Commit d4d9dc8

Browse files
committed
[CUDA] Added support for CUDA-8
Differential Revision: https://reviews.llvm.org/D24946 llvm-svn: 282610
1 parent fda9905 commit d4d9dc8

File tree

5 files changed

+55
-33
lines changed

5 files changed

+55
-33
lines changed

clang/lib/Driver/ToolChains.cpp

Lines changed: 24 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1774,8 +1774,7 @@ void Generic_GCC::CudaInstallationDetector::init(
17741774
Args.getLastArgValue(options::OPT_cuda_path_EQ));
17751775
else {
17761776
CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda");
1777-
// FIXME: Uncomment this once we can compile the cuda 8 headers.
1778-
// CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda-8.0");
1777+
CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda-8.0");
17791778
CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda-7.5");
17801779
CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda-7.0");
17811780
}
@@ -1795,6 +1794,16 @@ void Generic_GCC::CudaInstallationDetector::init(
17951794
FS.exists(LibDevicePath)))
17961795
continue;
17971796

1797+
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
1798+
FS.getBufferForFile(InstallPath + "/version.txt");
1799+
if (!VersionFile) {
1800+
// CUDA 7.0 doesn't have a version.txt, so guess that's our version if
1801+
// version.txt isn't present.
1802+
Version = CudaVersion::CUDA_70;
1803+
} else {
1804+
Version = ParseCudaVersionFile((*VersionFile)->getBuffer());
1805+
}
1806+
17981807
std::error_code EC;
17991808
for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
18001809
!EC && LI != LE; LI = LI.increment(EC)) {
@@ -1807,46 +1816,35 @@ void Generic_GCC::CudaInstallationDetector::init(
18071816
StringRef GpuArch = FileName.slice(
18081817
LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
18091818
LibDeviceMap[GpuArch] = FilePath.str();
1810-
// Insert map entries for specifc devices with this compute capability.
1811-
// NVCC's choice of libdevice library version is rather peculiar:
1812-
// http://docs.nvidia.com/cuda/libdevice-users-guide/basic-usage.html#version-selection
1813-
// TODO: this will need to be updated once CUDA-8 is released.
1819+
// Insert map entries for specifc devices with this compute
1820+
// capability. NVCC's choice of the libdevice library version is
1821+
// rather peculiar and depends on the CUDA version.
18141822
if (GpuArch == "compute_20") {
18151823
LibDeviceMap["sm_20"] = FilePath;
18161824
LibDeviceMap["sm_21"] = FilePath;
18171825
LibDeviceMap["sm_32"] = FilePath;
18181826
} else if (GpuArch == "compute_30") {
18191827
LibDeviceMap["sm_30"] = FilePath;
1820-
// compute_30 is the fallback libdevice variant for sm_30+,
1821-
// unless CUDA specifies different version for specific GPU
1822-
// arch.
1823-
LibDeviceMap["sm_50"] = FilePath;
1824-
LibDeviceMap["sm_52"] = FilePath;
1825-
LibDeviceMap["sm_53"] = FilePath;
1826-
// sm_6? are currently all aliases for sm_53 in LLVM and
1827-
// should use compute_30.
1828+
if (Version < CudaVersion::CUDA_80) {
1829+
LibDeviceMap["sm_50"] = FilePath;
1830+
LibDeviceMap["sm_52"] = FilePath;
1831+
LibDeviceMap["sm_53"] = FilePath;
1832+
}
18281833
LibDeviceMap["sm_60"] = FilePath;
18291834
LibDeviceMap["sm_61"] = FilePath;
18301835
LibDeviceMap["sm_62"] = FilePath;
18311836
} else if (GpuArch == "compute_35") {
18321837
LibDeviceMap["sm_35"] = FilePath;
18331838
LibDeviceMap["sm_37"] = FilePath;
18341839
} else if (GpuArch == "compute_50") {
1835-
// NVCC does not use compute_50 libdevice at all at the moment.
1836-
// The version that's shipped with CUDA-7.5 is a copy of compute_30.
1840+
if (Version >= CudaVersion::CUDA_80) {
1841+
LibDeviceMap["sm_50"] = FilePath;
1842+
LibDeviceMap["sm_52"] = FilePath;
1843+
LibDeviceMap["sm_53"] = FilePath;
1844+
}
18371845
}
18381846
}
18391847

1840-
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
1841-
FS.getBufferForFile(InstallPath + "/version.txt");
1842-
if (!VersionFile) {
1843-
// CUDA 7.0 doesn't have a version.txt, so guess that's our version if
1844-
// version.txt isn't present.
1845-
Version = CudaVersion::CUDA_70;
1846-
} else {
1847-
Version = ParseCudaVersionFile((*VersionFile)->getBuffer());
1848-
}
1849-
18501848
IsValid = true;
18511849
break;
18521850
}

clang/lib/Headers/__clang_cuda_runtime_wrapper.h

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@
6262
#include "cuda.h"
6363
#if !defined(CUDA_VERSION)
6464
#error "cuda.h did not define CUDA_VERSION"
65-
#elif CUDA_VERSION < 7000 || CUDA_VERSION > 7050
65+
#elif CUDA_VERSION < 7000 || CUDA_VERSION > 8000
6666
#error "Unsupported CUDA version!"
6767
#endif
6868

@@ -113,6 +113,7 @@
113113
#undef __cxa_vec_ctor
114114
#undef __cxa_vec_cctor
115115
#undef __cxa_vec_dtor
116+
#undef __cxa_vec_new
116117
#undef __cxa_vec_new2
117118
#undef __cxa_vec_new3
118119
#undef __cxa_vec_delete2
@@ -135,6 +136,21 @@
135136
// the headers we're about to include.
136137
#define __host__ UNEXPECTED_HOST_ATTRIBUTE
137138

139+
// CUDA 8.0.41 relies on __USE_FAST_MATH__ and __CUDA_PREC_DIV's values.
140+
// Previous versions used to check whether they are defined or not.
141+
// CU_DEVICE_INVALID macro is only defined in 8.0.41, so we use it
142+
// here to detect the switch.
143+
144+
#if defined(CU_DEVICE_INVALID)
145+
#if !defined(__USE_FAST_MATH__)
146+
#define __USE_FAST_MATH__ 0
147+
#endif
148+
149+
#if !defined(__CUDA_PREC_DIV)
150+
#define __CUDA_PREC_DIV 0
151+
#endif
152+
#endif
153+
138154
// device_functions.hpp and math_functions*.hpp use 'static
139155
// __forceinline__' (with no __device__) for definitions of device
140156
// functions. Temporarily redefine __forceinline__ to include
@@ -151,7 +167,7 @@
151167
// slow divides), so we need to scope our define carefully here.
152168
#pragma push_macro("__USE_FAST_MATH__")
153169
#if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__)
154-
#define __USE_FAST_MATH__
170+
#define __USE_FAST_MATH__ 1
155171
#endif
156172
#include "math_functions.hpp"
157173
#pragma pop_macro("__USE_FAST_MATH__")

clang/test/Driver/Inputs/CUDA_80/usr/local/cuda/nvvm/libdevice/libdevice.compute_50.10.bc

Whitespace-only changes.

clang/test/Driver/cuda-detect.cu

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,14 @@
2222
// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
2323
// RUN: | FileCheck %s -check-prefix COMMON \
2424
// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE20
25-
// sm_30, sm_5x and sm_6x map to compute_30
25+
// sm_30, sm_6x map to compute_30.
2626
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_30 \
2727
// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
2828
// RUN: | FileCheck %s -check-prefix COMMON \
2929
// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE30
30+
// sm_5x is a special case. Maps to compute_30 for cuda-7.x only.
3031
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_50 \
31-
// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
32+
// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
3233
// RUN: | FileCheck %s -check-prefix COMMON \
3334
// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE30
3435
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_60 \
@@ -44,6 +45,12 @@
4445
// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
4546
// RUN: | FileCheck %s -check-prefix COMMON -check-prefix CUDAINC \
4647
// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE35
48+
// sm_5x -> compute_50 for CUDA-8.0 and newer.
49+
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_50 \
50+
// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
51+
// RUN: | FileCheck %s -check-prefix COMMON \
52+
// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE50
53+
4754

4855
// Verify that -nocudainc prevents adding include path to CUDA headers.
4956
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
@@ -56,8 +63,8 @@
5663
// RUN: | FileCheck %s -check-prefix COMMON -check-prefix NOCUDAINC
5764

5865
// Verify that we get an error if there's no libdevice library to link with.
59-
// NOTE: Inputs/CUDA deliberately does *not* have libdevice.compute_30 for this purpose.
60-
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_30 \
66+
// NOTE: Inputs/CUDA deliberately does *not* have libdevice.compute_20 for this purpose.
67+
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_20 \
6168
// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
6269
// RUN: | FileCheck %s -check-prefix COMMON -check-prefix MISSINGLIBDEVICE
6370

@@ -81,7 +88,7 @@
8188
// CHECK: Found CUDA installation: {{.*}}/Inputs/CUDA/usr/local/cuda
8289
// NOCUDA-NOT: Found CUDA installation:
8390

84-
// MISSINGLIBDEVICE: error: cannot find libdevice for sm_30.
91+
// MISSINGLIBDEVICE: error: cannot find libdevice for sm_20.
8592

8693
// COMMON: "-triple" "nvptx-nvidia-cuda"
8794
// COMMON-SAME: "-fcuda-is-device"
@@ -90,6 +97,7 @@
9097
// LIBDEVICE20-SAME: libdevice.compute_20.10.bc
9198
// LIBDEVICE30-SAME: libdevice.compute_30.10.bc
9299
// LIBDEVICE35-SAME: libdevice.compute_35.10.bc
100+
// LIBDEVICE50-SAME: libdevice.compute_50.10.bc
93101
// NOLIBDEVICE-NOT: libdevice.compute_{{.*}}.bc
94102
// LIBDEVICE-SAME: "-target-feature" "+ptx42"
95103
// NOLIBDEVICE-NOT: "-target-feature" "+ptx42"

0 commit comments

Comments
 (0)