Skip to content

Commit 616979e

Browse files
authored
[NVPTX] Add support for PTX 8.6 and CUDA 12.6 (12.8) (#123398)
Add CUDA versions 12.7, 12.8, 12.9 which support PTX8.6+ (enables using Blackwell-specific instructions).
1 parent 9b6e8df commit 616979e

File tree

8 files changed

+31
-4
lines changed

8 files changed

+31
-4
lines changed

clang/include/clang/Basic/BuiltinsNVPTX.def

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,9 @@
2828
#pragma push_macro("SM_90")
2929
#pragma push_macro("SM_90a")
3030
#pragma push_macro("SM_100")
31-
#define SM_100 "sm_100"
31+
#pragma push_macro("SM_100a")
32+
#define SM_100a "sm_100a"
33+
#define SM_100 "sm_100|" SM_100a
3234
#define SM_90a "sm_90a"
3335
#define SM_90 "sm_90|" SM_90a "|" SM_100
3436
#define SM_89 "sm_89|" SM_90
@@ -1091,6 +1093,7 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78))
10911093
#pragma pop_macro("SM_90")
10921094
#pragma pop_macro("SM_90a")
10931095
#pragma pop_macro("SM_100")
1096+
#pragma pop_macro("SM_100a")
10941097
#pragma pop_macro("PTX42")
10951098
#pragma pop_macro("PTX60")
10961099
#pragma pop_macro("PTX61")

clang/include/clang/Basic/Cuda.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,12 @@ enum class CudaVersion {
4444
CUDA_124,
4545
CUDA_125,
4646
CUDA_126,
47+
CUDA_127,
48+
CUDA_128,
49+
CUDA_129,
4750
FULLY_SUPPORTED = CUDA_123,
4851
PARTIALLY_SUPPORTED =
49-
CUDA_126, // Partially supported. Proceed with a warning.
52+
CUDA_129, // Partially supported. Proceed with a warning.
5053
NEW = 10000, // Too new. Issue a warning, but allow using it.
5154
};
5255
const char *CudaVersionToString(CudaVersion V);
@@ -80,6 +83,7 @@ enum class OffloadArch {
8083
SM_90,
8184
SM_90a,
8285
SM_100,
86+
SM_100a,
8387
GFX600,
8488
GFX601,
8589
GFX602,

clang/lib/Basic/Cuda.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ static const CudaVersionMapEntry CudaNameVersionMap[] = {
4444
CUDA_ENTRY(12, 4),
4545
CUDA_ENTRY(12, 5),
4646
CUDA_ENTRY(12, 6),
47+
CUDA_ENTRY(12, 7),
48+
CUDA_ENTRY(12, 8),
49+
CUDA_ENTRY(12, 9),
4750
{"", CudaVersion::NEW, llvm::VersionTuple(std::numeric_limits<int>::max())},
4851
{"unknown", CudaVersion::UNKNOWN, {}} // End of list tombstone.
4952
};
@@ -98,6 +101,7 @@ static const OffloadArchToStringMap arch_names[] = {
98101
SM(90), // Hopper
99102
SM(90a), // Hopper
100103
SM(100), // Blackwell
104+
SM(100a), // Blackwell
101105
GFX(600), // gfx600
102106
GFX(601), // gfx601
103107
GFX(602), // gfx602
@@ -227,8 +231,8 @@ CudaVersion MinVersionForOffloadArch(OffloadArch A) {
227231
case OffloadArch::SM_90a:
228232
return CudaVersion::CUDA_120;
229233
case OffloadArch::SM_100:
230-
return CudaVersion::NEW; // TODO: use specific CUDA version once it's
231-
// public.
234+
case OffloadArch::SM_100a:
235+
return CudaVersion::CUDA_127;
232236
default:
233237
llvm_unreachable("invalid enum");
234238
}

clang/lib/Basic/Targets/NVPTX.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,13 +285,16 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
285285
case OffloadArch::SM_90a:
286286
return "900";
287287
case OffloadArch::SM_100:
288+
case OffloadArch::SM_100a:
288289
return "1000";
289290
}
290291
llvm_unreachable("unhandled OffloadArch");
291292
}();
292293
Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
293294
if (GPU == OffloadArch::SM_90a)
294295
Builder.defineMacro("__CUDA_ARCH_FEAT_SM90_ALL", "1");
296+
if (GPU == OffloadArch::SM_100a)
297+
Builder.defineMacro("__CUDA_ARCH_FEAT_SM100_ALL", "1");
295298
}
296299
}
297300

clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2277,6 +2277,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) {
22772277
case OffloadArch::SM_90:
22782278
case OffloadArch::SM_90a:
22792279
case OffloadArch::SM_100:
2280+
case OffloadArch::SM_100a:
22802281
case OffloadArch::GFX600:
22812282
case OffloadArch::GFX601:
22822283
case OffloadArch::GFX602:

clang/lib/Driver/ToolChains/Cuda.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,12 @@ CudaVersion getCudaVersion(uint32_t raw_version) {
8989
return CudaVersion::CUDA_125;
9090
if (raw_version < 12070)
9191
return CudaVersion::CUDA_126;
92+
if (raw_version < 12080)
93+
return CudaVersion::CUDA_127;
94+
if (raw_version < 12090)
95+
return CudaVersion::CUDA_128;
96+
if (raw_version < 12100)
97+
return CudaVersion::CUDA_129;
9298
return CudaVersion::NEW;
9399
}
94100

@@ -682,6 +688,9 @@ void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
682688
case CudaVersion::CUDA_##CUDA_VER: \
683689
PtxFeature = "+ptx" #PTX_VER; \
684690
break;
691+
CASE_CUDA_VERSION(129, 87);
692+
CASE_CUDA_VERSION(128, 87);
693+
CASE_CUDA_VERSION(127, 86);
685694
CASE_CUDA_VERSION(126, 85);
686695
CASE_CUDA_VERSION(125, 85);
687696
CASE_CUDA_VERSION(124, 84);

clang/test/Misc/target-invalid-cpu-note/nvptx.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
// CHECK-SAME: {{^}}, sm_90
2828
// CHECK-SAME: {{^}}, sm_90a
2929
// CHECK-SAME: {{^}}, sm_100
30+
// CHECK-SAME: {{^}}, sm_100a
3031
// CHECK-SAME: {{^}}, gfx600
3132
// CHECK-SAME: {{^}}, gfx601
3233
// CHECK-SAME: {{^}}, gfx602

llvm/lib/Target/NVPTX/NVPTX.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ foreach sm = [20, 21, 30, 32, 35, 37, 50, 52, 53,
3939
def SM#sm: FeatureSM<""#sm, !mul(sm, 10)>;
4040

4141
def SM90a: FeatureSM<"90a", 901>;
42+
def SM100a: FeatureSM<"100a", 1001>;
4243

4344
foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 62, 63, 64, 65,
4445
70, 71, 72, 73, 74, 75, 76, 77, 78,
@@ -74,6 +75,7 @@ def : Proc<"sm_89", [SM89, PTX78]>;
7475
def : Proc<"sm_90", [SM90, PTX78]>;
7576
def : Proc<"sm_90a", [SM90a, PTX80]>;
7677
def : Proc<"sm_100", [SM100, PTX86]>;
78+
def : Proc<"sm_100a", [SM100a, PTX86]>;
7779

7880
def NVPTXInstrInfo : InstrInfo {
7981
}

0 commit comments

Comments
 (0)