Skip to content

Commit b84ffb9

Browse files
jodelekSebastian Jodlowski
authored andcommitted
[CUDA] Add support for sm101 and sm120 target architectures (#127187)
Add support for sm101 and sm120 target architectures. It requires CUDA 12.8. --------- Co-authored-by: Sebastian Jodlowski <[email protected]> (cherry picked from commit 0127f16)
1 parent b727a13 commit b84ffb9

File tree

6 files changed

+43
-8
lines changed

6 files changed

+43
-8
lines changed

clang/include/clang/Basic/BuiltinsNVPTX.td

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,14 @@ class SM<string version, list<SMFeatures> newer_list> : SMFeatures {
2121
!strconcat(f, "|", newer.Features));
2222
}
2323

24+
let Features = "sm_120a" in def SM_120a : SMFeatures;
25+
let Features = "sm_101a" in def SM_101a : SMFeatures;
2426
let Features = "sm_100a" in def SM_100a : SMFeatures;
25-
26-
def SM_100 : SM<"100", [SM_100a]>;
27-
2827
let Features = "sm_90a" in def SM_90a : SMFeatures;
2928

29+
def SM_120 : SM<"120", [SM_120a]>;
30+
def SM_101 : SM<"101", [SM_101a, SM_120]>;
31+
def SM_100 : SM<"100", [SM_100a, SM_101]>;
3032
def SM_90 : SM<"90", [SM_90a, SM_100]>;
3133
def SM_89 : SM<"89", [SM_90]>;
3234
def SM_87 : SM<"87", [SM_89]>;

clang/include/clang/Basic/Cuda.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,10 @@ enum class OffloadArch {
8282
SM_90a,
8383
SM_100,
8484
SM_100a,
85+
SM_101,
86+
SM_101a,
87+
SM_120,
88+
SM_120a,
8589
GFX600,
8690
GFX601,
8791
GFX602,

clang/lib/Basic/Cuda.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,10 @@ static const OffloadArchToStringMap arch_names[] = {
100100
SM(90a), // Hopper
101101
SM(100), // Blackwell
102102
SM(100a), // Blackwell
103+
SM(101), // Blackwell
104+
SM(101a), // Blackwell
105+
SM(120), // Blackwell
106+
SM(120a), // Blackwell
103107
GFX(600), // gfx600
104108
GFX(601), // gfx601
105109
GFX(602), // gfx602
@@ -230,6 +234,10 @@ CudaVersion MinVersionForOffloadArch(OffloadArch A) {
230234
return CudaVersion::CUDA_120;
231235
case OffloadArch::SM_100:
232236
case OffloadArch::SM_100a:
237+
case OffloadArch::SM_101:
238+
case OffloadArch::SM_101a:
239+
case OffloadArch::SM_120:
240+
case OffloadArch::SM_120a:
233241
return CudaVersion::CUDA_128;
234242
default:
235243
llvm_unreachable("invalid enum");

clang/lib/Basic/Targets/NVPTX.cpp

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
176176

177177
if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) {
178178
// Set __CUDA_ARCH__ for the GPU specified.
179-
std::string CUDAArchCode = [this] {
179+
llvm::StringRef CUDAArchCode = [this] {
180180
switch (GPU) {
181181
case OffloadArch::GFX600:
182182
case OffloadArch::GFX601:
@@ -283,14 +283,27 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
283283
case OffloadArch::SM_100:
284284
case OffloadArch::SM_100a:
285285
return "1000";
286+
case OffloadArch::SM_101:
287+
case OffloadArch::SM_101a:
288+
return "1010";
289+
case OffloadArch::SM_120:
290+
case OffloadArch::SM_120a:
291+
return "1200";
286292
}
287293
llvm_unreachable("unhandled OffloadArch");
288294
}();
289295
Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
290-
if (GPU == OffloadArch::SM_90a)
291-
Builder.defineMacro("__CUDA_ARCH_FEAT_SM90_ALL", "1");
292-
if (GPU == OffloadArch::SM_100a)
293-
Builder.defineMacro("__CUDA_ARCH_FEAT_SM100_ALL", "1");
296+
switch(GPU) {
297+
case OffloadArch::SM_90a:
298+
case OffloadArch::SM_100a:
299+
case OffloadArch::SM_101a:
300+
case OffloadArch::SM_120a:
301+
Builder.defineMacro("__CUDA_ARCH_FEAT_SM" + CUDAArchCode.drop_back() + "_ALL", "1");
302+
break;
303+
default:
304+
// Do nothing if this is not an enhanced architecture.
305+
break;
306+
}
294307
}
295308
}
296309

clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2278,6 +2278,10 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) {
22782278
case OffloadArch::SM_90a:
22792279
case OffloadArch::SM_100:
22802280
case OffloadArch::SM_100a:
2281+
case OffloadArch::SM_101:
2282+
case OffloadArch::SM_101a:
2283+
case OffloadArch::SM_120:
2284+
case OffloadArch::SM_120a:
22812285
case OffloadArch::GFX600:
22822286
case OffloadArch::GFX601:
22832287
case OffloadArch::GFX602:

clang/test/Misc/target-invalid-cpu-note/nvptx.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@
2828
// CHECK-SAME: {{^}}, sm_90a
2929
// CHECK-SAME: {{^}}, sm_100
3030
// CHECK-SAME: {{^}}, sm_100a
31+
// CHECK-SAME: {{^}}, sm_101
32+
// CHECK-SAME: {{^}}, sm_101a
33+
// CHECK-SAME: {{^}}, sm_120
34+
// CHECK-SAME: {{^}}, sm_120a
3135
// CHECK-SAME: {{^}}, gfx600
3236
// CHECK-SAME: {{^}}, gfx601
3337
// CHECK-SAME: {{^}}, gfx602

0 commit comments

Comments
 (0)