Skip to content

Commit 0127f16

Browse files
jodelekSebastian Jodlowski
andauthored
[CUDA] Add support for sm101 and sm120 target architectures (llvm#127187)
Add support for sm101 and sm120 target architectures. It requires CUDA 12.8. --------- Co-authored-by: Sebastian Jodlowski <[email protected]>
1 parent 6342095 commit 0127f16

File tree

6 files changed

+43
-8
lines changed

6 files changed

+43
-8
lines changed

clang/include/clang/Basic/BuiltinsNVPTX.td

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,14 @@ class SM<string version, list<SMFeatures> newer_list> : SMFeatures {
2121
!strconcat(f, "|", newer.Features));
2222
}
2323

24+
let Features = "sm_120a" in def SM_120a : SMFeatures;
25+
let Features = "sm_101a" in def SM_101a : SMFeatures;
2426
let Features = "sm_100a" in def SM_100a : SMFeatures;
25-
26-
def SM_100 : SM<"100", [SM_100a]>;
27-
2827
let Features = "sm_90a" in def SM_90a : SMFeatures;
2928

29+
def SM_120 : SM<"120", [SM_120a]>;
30+
def SM_101 : SM<"101", [SM_101a, SM_120]>;
31+
def SM_100 : SM<"100", [SM_100a, SM_101]>;
3032
def SM_90 : SM<"90", [SM_90a, SM_100]>;
3133
def SM_89 : SM<"89", [SM_90]>;
3234
def SM_87 : SM<"87", [SM_89]>;

clang/include/clang/Basic/Cuda.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,10 @@ enum class OffloadArch {
8282
SM_90a,
8383
SM_100,
8484
SM_100a,
85+
SM_101,
86+
SM_101a,
87+
SM_120,
88+
SM_120a,
8589
GFX600,
8690
GFX601,
8791
GFX602,

clang/lib/Basic/Cuda.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,10 @@ static const OffloadArchToStringMap arch_names[] = {
100100
SM(90a), // Hopper
101101
SM(100), // Blackwell
102102
SM(100a), // Blackwell
103+
SM(101), // Blackwell
104+
SM(101a), // Blackwell
105+
SM(120), // Blackwell
106+
SM(120a), // Blackwell
103107
GFX(600), // gfx600
104108
GFX(601), // gfx601
105109
GFX(602), // gfx602
@@ -228,6 +232,10 @@ CudaVersion MinVersionForOffloadArch(OffloadArch A) {
228232
return CudaVersion::CUDA_120;
229233
case OffloadArch::SM_100:
230234
case OffloadArch::SM_100a:
235+
case OffloadArch::SM_101:
236+
case OffloadArch::SM_101a:
237+
case OffloadArch::SM_120:
238+
case OffloadArch::SM_120a:
231239
return CudaVersion::CUDA_128;
232240
default:
233241
llvm_unreachable("invalid enum");

clang/lib/Basic/Targets/NVPTX.cpp

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
185185

186186
if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) {
187187
// Set __CUDA_ARCH__ for the GPU specified.
188-
std::string CUDAArchCode = [this] {
188+
llvm::StringRef CUDAArchCode = [this] {
189189
switch (GPU) {
190190
case OffloadArch::GFX600:
191191
case OffloadArch::GFX601:
@@ -290,14 +290,27 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
290290
case OffloadArch::SM_100:
291291
case OffloadArch::SM_100a:
292292
return "1000";
293+
case OffloadArch::SM_101:
294+
case OffloadArch::SM_101a:
295+
return "1010";
296+
case OffloadArch::SM_120:
297+
case OffloadArch::SM_120a:
298+
return "1200";
293299
}
294300
llvm_unreachable("unhandled OffloadArch");
295301
}();
296302
Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
297-
if (GPU == OffloadArch::SM_90a)
298-
Builder.defineMacro("__CUDA_ARCH_FEAT_SM90_ALL", "1");
299-
if (GPU == OffloadArch::SM_100a)
300-
Builder.defineMacro("__CUDA_ARCH_FEAT_SM100_ALL", "1");
303+
switch(GPU) {
304+
case OffloadArch::SM_90a:
305+
case OffloadArch::SM_100a:
306+
case OffloadArch::SM_101a:
307+
case OffloadArch::SM_120a:
308+
Builder.defineMacro("__CUDA_ARCH_FEAT_SM" + CUDAArchCode.drop_back() + "_ALL", "1");
309+
break;
310+
default:
311+
// Do nothing if this is not an enhanced architecture.
312+
break;
313+
}
301314
}
302315
}
303316

clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2278,6 +2278,10 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) {
22782278
case OffloadArch::SM_90a:
22792279
case OffloadArch::SM_100:
22802280
case OffloadArch::SM_100a:
2281+
case OffloadArch::SM_101:
2282+
case OffloadArch::SM_101a:
2283+
case OffloadArch::SM_120:
2284+
case OffloadArch::SM_120a:
22812285
case OffloadArch::GFX600:
22822286
case OffloadArch::GFX601:
22832287
case OffloadArch::GFX602:

clang/test/Misc/target-invalid-cpu-note/nvptx.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@
2828
// CHECK-SAME: {{^}}, sm_90a
2929
// CHECK-SAME: {{^}}, sm_100
3030
// CHECK-SAME: {{^}}, sm_100a
31+
// CHECK-SAME: {{^}}, sm_101
32+
// CHECK-SAME: {{^}}, sm_101a
33+
// CHECK-SAME: {{^}}, sm_120
34+
// CHECK-SAME: {{^}}, sm_120a
3135
// CHECK-SAME: {{^}}, gfx600
3236
// CHECK-SAME: {{^}}, gfx601
3337
// CHECK-SAME: {{^}}, gfx602

0 commit comments

Comments
 (0)