release/20.x: [CUDA] Add support for sm101 and sm120 target architectures (#127187) #127918

llvmbot · 2025-02-19T22:50:11Z

Backport 0127f16

Requested by: @Artem-B

llvmbot · 2025-02-19T22:50:18Z

@Artem-B What do you think about merging this PR to the release branch?

llvmbot · 2025-02-19T22:50:46Z

@llvm/pr-subscribers-clang

@llvm/pr-subscribers-clang-codegen

Author: None (llvmbot)

Changes

Backport 0127f16

Requested by: @Artem-B

Full diff: https://github.com/llvm/llvm-project/pull/127918.diff

6 Files Affected:

(modified) clang/include/clang/Basic/BuiltinsNVPTX.td (+5-3)
(modified) clang/include/clang/Basic/Cuda.h (+4)
(modified) clang/lib/Basic/Cuda.cpp (+8)
(modified) clang/lib/Basic/Targets/NVPTX.cpp (+18-5)
(modified) clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp (+4)
(modified) clang/test/Misc/target-invalid-cpu-note/nvptx.c (+4)

diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.td b/clang/include/clang/Basic/BuiltinsNVPTX.td
index 9d24a992563a4..b550fff8567df 100644
--- a/clang/include/clang/Basic/BuiltinsNVPTX.td
+++ b/clang/include/clang/Basic/BuiltinsNVPTX.td
@@ -21,12 +21,14 @@ class SM<string version, list<SMFeatures> newer_list> : SMFeatures {
                         !strconcat(f, "|", newer.Features));
 }
 
+let Features = "sm_120a" in def SM_120a : SMFeatures;
+let Features = "sm_101a" in def SM_101a : SMFeatures;
 let Features = "sm_100a" in def SM_100a : SMFeatures;
-
-def SM_100 : SM<"100", [SM_100a]>;
-
 let Features = "sm_90a" in def SM_90a : SMFeatures;
 
+def SM_120 : SM<"120", [SM_120a]>;
+def SM_101 : SM<"101", [SM_101a, SM_120]>;
+def SM_100 : SM<"100", [SM_100a, SM_101]>;
 def SM_90 : SM<"90", [SM_90a, SM_100]>;
 def SM_89 : SM<"89", [SM_90]>;
 def SM_87 : SM<"87", [SM_89]>;
diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h
index f33ba46233a7a..5c909a8e9ca11 100644
--- a/clang/include/clang/Basic/Cuda.h
+++ b/clang/include/clang/Basic/Cuda.h
@@ -82,6 +82,10 @@ enum class OffloadArch {
   SM_90a,
   SM_100,
   SM_100a,
+  SM_101,
+  SM_101a,
+  SM_120,
+  SM_120a,
   GFX600,
   GFX601,
   GFX602,
diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp
index 1bfec0b37c5ee..79cac0ec119dd 100644
--- a/clang/lib/Basic/Cuda.cpp
+++ b/clang/lib/Basic/Cuda.cpp
@@ -100,6 +100,10 @@ static const OffloadArchToStringMap arch_names[] = {
     SM(90a),                         // Hopper
     SM(100),                         // Blackwell
     SM(100a),                        // Blackwell
+    SM(101),                         // Blackwell
+    SM(101a),                        // Blackwell
+    SM(120),                         // Blackwell
+    SM(120a),                        // Blackwell
     GFX(600),  // gfx600
     GFX(601),  // gfx601
     GFX(602),  // gfx602
@@ -230,6 +234,10 @@ CudaVersion MinVersionForOffloadArch(OffloadArch A) {
     return CudaVersion::CUDA_120;
   case OffloadArch::SM_100:
   case OffloadArch::SM_100a:
+  case OffloadArch::SM_101:
+  case OffloadArch::SM_101a:
+  case OffloadArch::SM_120:
+  case OffloadArch::SM_120a:
     return CudaVersion::CUDA_128;
   default:
     llvm_unreachable("invalid enum");
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp
index a03f4983b9d03..9be12cbe7ac19 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -176,7 +176,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
 
   if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) {
     // Set __CUDA_ARCH__ for the GPU specified.
-    std::string CUDAArchCode = [this] {
+    llvm::StringRef CUDAArchCode = [this] {
       switch (GPU) {
       case OffloadArch::GFX600:
       case OffloadArch::GFX601:
@@ -283,14 +283,27 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
       case OffloadArch::SM_100:
       case OffloadArch::SM_100a:
         return "1000";
+      case OffloadArch::SM_101:
+      case OffloadArch::SM_101a:
+         return "1010";
+      case OffloadArch::SM_120:
+      case OffloadArch::SM_120a:
+         return "1200";
       }
       llvm_unreachable("unhandled OffloadArch");
     }();
     Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
-    if (GPU == OffloadArch::SM_90a)
-      Builder.defineMacro("__CUDA_ARCH_FEAT_SM90_ALL", "1");
-    if (GPU == OffloadArch::SM_100a)
-      Builder.defineMacro("__CUDA_ARCH_FEAT_SM100_ALL", "1");
+    switch(GPU) {
+      case OffloadArch::SM_90a:
+      case OffloadArch::SM_100a:
+      case OffloadArch::SM_101a:
+      case OffloadArch::SM_120a:
+        Builder.defineMacro("__CUDA_ARCH_FEAT_SM" + CUDAArchCode.drop_back() + "_ALL", "1");
+        break;
+      default:
+        // Do nothing if this is not an enhanced architecture.
+        break;
+    }
   }
 }
 
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index c13928f61a748..dc417880a50e9 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -2278,6 +2278,10 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) {
       case OffloadArch::SM_90a:
       case OffloadArch::SM_100:
       case OffloadArch::SM_100a:
+      case OffloadArch::SM_101:
+      case OffloadArch::SM_101a:
+      case OffloadArch::SM_120:
+      case OffloadArch::SM_120a:
       case OffloadArch::GFX600:
       case OffloadArch::GFX601:
       case OffloadArch::GFX602:
diff --git a/clang/test/Misc/target-invalid-cpu-note/nvptx.c b/clang/test/Misc/target-invalid-cpu-note/nvptx.c
index 3afcdf8c9fe5c..d8e4d7e63e234 100644
--- a/clang/test/Misc/target-invalid-cpu-note/nvptx.c
+++ b/clang/test/Misc/target-invalid-cpu-note/nvptx.c
@@ -28,6 +28,10 @@
 // CHECK-SAME: {{^}}, sm_90a
 // CHECK-SAME: {{^}}, sm_100
 // CHECK-SAME: {{^}}, sm_100a
+// CHECK-SAME: {{^}}, sm_101
+// CHECK-SAME: {{^}}, sm_101a
+// CHECK-SAME: {{^}}, sm_120
+// CHECK-SAME: {{^}}, sm_120a
 // CHECK-SAME: {{^}}, gfx600
 // CHECK-SAME: {{^}}, gfx601
 // CHECK-SAME: {{^}}, gfx602

jodelek · 2025-02-21T20:13:28Z

@Artem-B, just curious - is there anything additional that needs to happen before you can approve this?

Artem-B

I was the one proposing to merge this change, so I assumed that it's the release maintainers who'd need to stamp it.

I am all for merging it.

jodelek · 2025-02-21T21:07:52Z

Do you know who is the person I should bother?

efriedma-quic · 2025-02-21T21:15:48Z

The process is that the patch is first reviewed by someone familiar with the code. They approve the patch, and describe how the fix meets the release branch patch requirements (https://llvm.org/docs/HowToReleaseLLVM.html#release-patch-rules).

Once it's approved, the release manager will look at the look at the patch, and either merge or request changes. You don't need to specifically ping the release manager; they track all the pending pull requests.

Artem-B · 2025-02-21T21:27:35Z

patch is first reviewed by someone familiar with the code.

That would be me, as I am the maintainer of CUDA code and had reviewed the original PR.

They approve the patch, and describe how the fix meets the release branch patch requirements (https://llvm.org/docs/HowToReleaseLLVM.html#release-patch-rules).

This patch fits item #3 on the rule list "or completion of features that were started before the branch was created. "

These changes allow clang users to compile CUDA code with just-released cuda-12.8 which adds these new GPU variants.

) Add support for sm101 and sm120 target architectures. It requires CUDA 12.8. --------- Co-authored-by: Sebastian Jodlowski <[email protected]> (cherry picked from commit 0127f16)

github-actions · 2025-02-21T22:07:54Z

@Artem-B (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR.

Artem-B · 2025-02-21T22:15:51Z

# CUDA
- Clang now supports CUDA compilation with CUDA SDK up to v12.8
- Clang can now target sm_100, sm_101, and sm_120 GPUs (Blackwell)

llvmbot added this to the LLVM 20.X Release milestone Feb 19, 2025

llvmbot requested a review from Artem-B February 19, 2025 22:50

llvmbot mentioned this pull request Feb 19, 2025

[CUDA] Add support for sm101 and sm120 target architectures #127187

Merged

llvmbot added clang Clang issues not falling into any other category clang:frontend Language frontend issues, e.g. anything involving "Sema" clang:codegen IR generation bugs: mangling, exceptions, etc. clang:openmp OpenMP related changes to Clang labels Feb 19, 2025

Artem-B approved these changes Feb 21, 2025

View reviewed changes

[CUDA] Add support for sm101 and sm120 target architectures (llvm#127187

b84ffb9

) Add support for sm101 and sm120 target architectures. It requires CUDA 12.8. --------- Co-authored-by: Sebastian Jodlowski <[email protected]> (cherry picked from commit 0127f16)

llvmbot force-pushed the issue127187 branch from c3f0998 to b84ffb9 Compare February 21, 2025 22:07

tstellar merged commit b84ffb9 into llvm:release/20.x Feb 21, 2025
7 of 9 checks passed

Artem-B added the release:note label Feb 21, 2025

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

release/20.x: [CUDA] Add support for sm101 and sm120 target architectures (#127187) #127918

release/20.x: [CUDA] Add support for sm101 and sm120 target architectures (#127187) #127918

Uh oh!

llvmbot commented Feb 19, 2025

Uh oh!

llvmbot commented Feb 19, 2025

Uh oh!

llvmbot commented Feb 19, 2025 •

edited

Loading

Uh oh!

jodelek commented Feb 21, 2025

Uh oh!

Artem-B left a comment

Uh oh!

jodelek commented Feb 21, 2025

Uh oh!

efriedma-quic commented Feb 21, 2025

Uh oh!

Artem-B commented Feb 21, 2025

Uh oh!

Uh oh!

github-actions bot commented Feb 21, 2025

Uh oh!

Artem-B commented Feb 21, 2025

Uh oh!

Uh oh!

release/20.x: [CUDA] Add support for sm101 and sm120 target architectures (#127187) #127918

release/20.x: [CUDA] Add support for sm101 and sm120 target architectures (#127187) #127918

Uh oh!

Conversation

llvmbot commented Feb 19, 2025

Uh oh!

llvmbot commented Feb 19, 2025

Uh oh!

llvmbot commented Feb 19, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

jodelek commented Feb 21, 2025

Uh oh!

Artem-B left a comment

Choose a reason for hiding this comment

Uh oh!

jodelek commented Feb 21, 2025

Uh oh!

efriedma-quic commented Feb 21, 2025

Uh oh!

Artem-B commented Feb 21, 2025

Uh oh!

Uh oh!

github-actions bot commented Feb 21, 2025

Uh oh!

Artem-B commented Feb 21, 2025

Uh oh!

Uh oh!

llvmbot commented Feb 19, 2025 •

edited

Loading