Skip to content

Commit 30a06e8

Browse files
[CUDA] Add support for CUDA-12.6 and sm_100 (#112028)
This is a copy of #97402(with minor updates), which is now ready to land. --------- Co-authored-by: Sergey Kozub <[email protected]>
1 parent c2c4db8 commit 30a06e8

File tree

9 files changed

+32
-5
lines changed

9 files changed

+32
-5
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -620,6 +620,8 @@ CUDA/HIP Language Changes
620620

621621
CUDA Support
622622
^^^^^^^^^^^^
623+
- Clang now supports CUDA SDK up to 12.6
624+
- Added support for sm_100
623625

624626
AIX Support
625627
^^^^^^^^^^^

clang/include/clang/Basic/BuiltinsNVPTX.def

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,10 @@
2727
#pragma push_macro("SM_89")
2828
#pragma push_macro("SM_90")
2929
#pragma push_macro("SM_90a")
30+
#pragma push_macro("SM_100")
31+
#define SM_100 "sm_100"
3032
#define SM_90a "sm_90a"
31-
#define SM_90 "sm_90|" SM_90a
33+
#define SM_90 "sm_90|" SM_90a "|" SM_100
3234
#define SM_89 "sm_89|" SM_90
3335
#define SM_87 "sm_87|" SM_89
3436
#define SM_86 "sm_86|" SM_87
@@ -63,7 +65,9 @@
6365
#pragma push_macro("PTX83")
6466
#pragma push_macro("PTX84")
6567
#pragma push_macro("PTX85")
66-
#define PTX85 "ptx85"
68+
#pragma push_macro("PTX86")
69+
#define PTX86 "ptx86"
70+
#define PTX85 "ptx85|" PTX86
6771
#define PTX84 "ptx84|" PTX85
6872
#define PTX83 "ptx83|" PTX84
6973
#define PTX82 "ptx82|" PTX83
@@ -1086,6 +1090,7 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78))
10861090
#pragma pop_macro("SM_89")
10871091
#pragma pop_macro("SM_90")
10881092
#pragma pop_macro("SM_90a")
1093+
#pragma pop_macro("SM_100")
10891094
#pragma pop_macro("PTX42")
10901095
#pragma pop_macro("PTX60")
10911096
#pragma pop_macro("PTX61")
@@ -1108,3 +1113,4 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78))
11081113
#pragma pop_macro("PTX83")
11091114
#pragma pop_macro("PTX84")
11101115
#pragma pop_macro("PTX85")
1116+
#pragma pop_macro("PTX86")

clang/include/clang/Basic/Cuda.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,10 @@ enum class CudaVersion {
4343
CUDA_123,
4444
CUDA_124,
4545
CUDA_125,
46+
CUDA_126,
4647
FULLY_SUPPORTED = CUDA_123,
4748
PARTIALLY_SUPPORTED =
48-
CUDA_125, // Partially supported. Proceed with a warning.
49+
CUDA_126, // Partially supported. Proceed with a warning.
4950
NEW = 10000, // Too new. Issue a warning, but allow using it.
5051
};
5152
const char *CudaVersionToString(CudaVersion V);
@@ -78,6 +79,7 @@ enum class OffloadArch {
7879
SM_89,
7980
SM_90,
8081
SM_90a,
82+
SM_100,
8183
GFX600,
8284
GFX601,
8385
GFX602,

clang/lib/Basic/Cuda.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ static const CudaVersionMapEntry CudaNameVersionMap[] = {
4343
CUDA_ENTRY(12, 3),
4444
CUDA_ENTRY(12, 4),
4545
CUDA_ENTRY(12, 5),
46+
CUDA_ENTRY(12, 6),
4647
{"", CudaVersion::NEW, llvm::VersionTuple(std::numeric_limits<int>::max())},
4748
{"unknown", CudaVersion::UNKNOWN, {}} // End of list tombstone.
4849
};
@@ -96,6 +97,7 @@ static const OffloadArchToStringMap arch_names[] = {
9697
SM(89), // Ada Lovelace
9798
SM(90), // Hopper
9899
SM(90a), // Hopper
100+
SM(100), // Blackwell
99101
GFX(600), // gfx600
100102
GFX(601), // gfx601
101103
GFX(602), // gfx602
@@ -221,6 +223,9 @@ CudaVersion MinVersionForOffloadArch(OffloadArch A) {
221223
return CudaVersion::CUDA_118;
222224
case OffloadArch::SM_90a:
223225
return CudaVersion::CUDA_120;
226+
case OffloadArch::SM_100:
227+
return CudaVersion::NEW; // TODO: use specific CUDA version once it's
228+
// public.
224229
default:
225230
llvm_unreachable("invalid enum");
226231
}

clang/lib/Basic/Targets/NVPTX.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,8 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
281281
case OffloadArch::SM_90:
282282
case OffloadArch::SM_90a:
283283
return "900";
284+
case OffloadArch::SM_100:
285+
return "1000";
284286
}
285287
llvm_unreachable("unhandled OffloadArch");
286288
}();

clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2274,6 +2274,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) {
22742274
case OffloadArch::SM_89:
22752275
case OffloadArch::SM_90:
22762276
case OffloadArch::SM_90a:
2277+
case OffloadArch::SM_100:
22772278
case OffloadArch::GFX600:
22782279
case OffloadArch::GFX601:
22792280
case OffloadArch::GFX602:

clang/lib/Driver/ToolChains/Cuda.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@ CudaVersion getCudaVersion(uint32_t raw_version) {
8787
return CudaVersion::CUDA_124;
8888
if (raw_version < 12060)
8989
return CudaVersion::CUDA_125;
90+
if (raw_version < 12070)
91+
return CudaVersion::CUDA_126;
9092
return CudaVersion::NEW;
9193
}
9294

@@ -669,6 +671,7 @@ void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
669671
case CudaVersion::CUDA_##CUDA_VER: \
670672
PtxFeature = "+ptx" #PTX_VER; \
671673
break;
674+
CASE_CUDA_VERSION(126, 85);
672675
CASE_CUDA_VERSION(125, 85);
673676
CASE_CUDA_VERSION(124, 84);
674677
CASE_CUDA_VERSION(123, 83);
@@ -691,6 +694,10 @@ void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
691694
CASE_CUDA_VERSION(91, 61);
692695
CASE_CUDA_VERSION(90, 60);
693696
#undef CASE_CUDA_VERSION
697+
// TODO: Use specific CUDA version once it's public.
698+
case clang::CudaVersion::NEW:
699+
PtxFeature = "+ptx86";
700+
break;
694701
default:
695702
PtxFeature = "+ptx42";
696703
}

clang/test/Misc/target-invalid-cpu-note/nvptx.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
// CHECK-SAME: {{^}}, sm_89
2727
// CHECK-SAME: {{^}}, sm_90
2828
// CHECK-SAME: {{^}}, sm_90a
29+
// CHECK-SAME: {{^}}, sm_100
2930
// CHECK-SAME: {{^}}, gfx600
3031
// CHECK-SAME: {{^}}, gfx601
3132
// CHECK-SAME: {{^}}, gfx602

llvm/lib/Target/NVPTX/NVPTX.td

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,14 @@ class FeaturePTX<int version>:
3535
"Use PTX version " # version>;
3636

3737
foreach sm = [20, 21, 30, 32, 35, 37, 50, 52, 53,
38-
60, 61, 62, 70, 72, 75, 80, 86, 87, 89, 90] in
38+
60, 61, 62, 70, 72, 75, 80, 86, 87, 89, 90, 100] in
3939
def SM#sm: FeatureSM<""#sm, !mul(sm, 10)>;
4040

4141
def SM90a: FeatureSM<"90a", 901>;
4242

4343
foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 62, 63, 64, 65,
4444
70, 71, 72, 73, 74, 75, 76, 77, 78,
45-
80, 81, 82, 83, 84, 85] in
45+
80, 81, 82, 83, 84, 85, 86] in
4646
def PTX#version: FeaturePTX<version>;
4747

4848
//===----------------------------------------------------------------------===//
@@ -73,6 +73,7 @@ def : Proc<"sm_87", [SM87, PTX74]>;
7373
def : Proc<"sm_89", [SM89, PTX78]>;
7474
def : Proc<"sm_90", [SM90, PTX78]>;
7575
def : Proc<"sm_90a", [SM90a, PTX80]>;
76+
def : Proc<"sm_100", [SM100, PTX86]>;
7677

7778
def NVPTXInstrInfo : InstrInfo {
7879
}

0 commit comments

Comments
 (0)