-
Notifications
You must be signed in to change notification settings - Fork 14.3k
release/20.x: [CUDA] Add support for sm101 and sm120 target architectures (#127187) #127918
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@Artem-B What do you think about merging this PR to the release branch? |
@llvm/pr-subscribers-clang @llvm/pr-subscribers-clang-codegen Author: None (llvmbot) ChangesBackport 0127f16 Requested by: @Artem-B Full diff: https://github.com/llvm/llvm-project/pull/127918.diff 6 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.td b/clang/include/clang/Basic/BuiltinsNVPTX.td
index 9d24a992563a4..b550fff8567df 100644
--- a/clang/include/clang/Basic/BuiltinsNVPTX.td
+++ b/clang/include/clang/Basic/BuiltinsNVPTX.td
@@ -21,12 +21,14 @@ class SM<string version, list<SMFeatures> newer_list> : SMFeatures {
!strconcat(f, "|", newer.Features));
}
+let Features = "sm_120a" in def SM_120a : SMFeatures;
+let Features = "sm_101a" in def SM_101a : SMFeatures;
let Features = "sm_100a" in def SM_100a : SMFeatures;
-
-def SM_100 : SM<"100", [SM_100a]>;
-
let Features = "sm_90a" in def SM_90a : SMFeatures;
+def SM_120 : SM<"120", [SM_120a]>;
+def SM_101 : SM<"101", [SM_101a, SM_120]>;
+def SM_100 : SM<"100", [SM_100a, SM_101]>;
def SM_90 : SM<"90", [SM_90a, SM_100]>;
def SM_89 : SM<"89", [SM_90]>;
def SM_87 : SM<"87", [SM_89]>;
diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h
index f33ba46233a7a..5c909a8e9ca11 100644
--- a/clang/include/clang/Basic/Cuda.h
+++ b/clang/include/clang/Basic/Cuda.h
@@ -82,6 +82,10 @@ enum class OffloadArch {
SM_90a,
SM_100,
SM_100a,
+ SM_101,
+ SM_101a,
+ SM_120,
+ SM_120a,
GFX600,
GFX601,
GFX602,
diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp
index 1bfec0b37c5ee..79cac0ec119dd 100644
--- a/clang/lib/Basic/Cuda.cpp
+++ b/clang/lib/Basic/Cuda.cpp
@@ -100,6 +100,10 @@ static const OffloadArchToStringMap arch_names[] = {
SM(90a), // Hopper
SM(100), // Blackwell
SM(100a), // Blackwell
+ SM(101), // Blackwell
+ SM(101a), // Blackwell
+ SM(120), // Blackwell
+ SM(120a), // Blackwell
GFX(600), // gfx600
GFX(601), // gfx601
GFX(602), // gfx602
@@ -230,6 +234,10 @@ CudaVersion MinVersionForOffloadArch(OffloadArch A) {
return CudaVersion::CUDA_120;
case OffloadArch::SM_100:
case OffloadArch::SM_100a:
+ case OffloadArch::SM_101:
+ case OffloadArch::SM_101a:
+ case OffloadArch::SM_120:
+ case OffloadArch::SM_120a:
return CudaVersion::CUDA_128;
default:
llvm_unreachable("invalid enum");
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp
index a03f4983b9d03..9be12cbe7ac19 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -176,7 +176,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) {
// Set __CUDA_ARCH__ for the GPU specified.
- std::string CUDAArchCode = [this] {
+ llvm::StringRef CUDAArchCode = [this] {
switch (GPU) {
case OffloadArch::GFX600:
case OffloadArch::GFX601:
@@ -283,14 +283,27 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
case OffloadArch::SM_100:
case OffloadArch::SM_100a:
return "1000";
+ case OffloadArch::SM_101:
+ case OffloadArch::SM_101a:
+ return "1010";
+ case OffloadArch::SM_120:
+ case OffloadArch::SM_120a:
+ return "1200";
}
llvm_unreachable("unhandled OffloadArch");
}();
Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
- if (GPU == OffloadArch::SM_90a)
- Builder.defineMacro("__CUDA_ARCH_FEAT_SM90_ALL", "1");
- if (GPU == OffloadArch::SM_100a)
- Builder.defineMacro("__CUDA_ARCH_FEAT_SM100_ALL", "1");
+ switch(GPU) {
+ case OffloadArch::SM_90a:
+ case OffloadArch::SM_100a:
+ case OffloadArch::SM_101a:
+ case OffloadArch::SM_120a:
+ Builder.defineMacro("__CUDA_ARCH_FEAT_SM" + CUDAArchCode.drop_back() + "_ALL", "1");
+ break;
+ default:
+ // Do nothing if this is not an enhanced architecture.
+ break;
+ }
}
}
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index c13928f61a748..dc417880a50e9 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -2278,6 +2278,10 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) {
case OffloadArch::SM_90a:
case OffloadArch::SM_100:
case OffloadArch::SM_100a:
+ case OffloadArch::SM_101:
+ case OffloadArch::SM_101a:
+ case OffloadArch::SM_120:
+ case OffloadArch::SM_120a:
case OffloadArch::GFX600:
case OffloadArch::GFX601:
case OffloadArch::GFX602:
diff --git a/clang/test/Misc/target-invalid-cpu-note/nvptx.c b/clang/test/Misc/target-invalid-cpu-note/nvptx.c
index 3afcdf8c9fe5c..d8e4d7e63e234 100644
--- a/clang/test/Misc/target-invalid-cpu-note/nvptx.c
+++ b/clang/test/Misc/target-invalid-cpu-note/nvptx.c
@@ -28,6 +28,10 @@
// CHECK-SAME: {{^}}, sm_90a
// CHECK-SAME: {{^}}, sm_100
// CHECK-SAME: {{^}}, sm_100a
+// CHECK-SAME: {{^}}, sm_101
+// CHECK-SAME: {{^}}, sm_101a
+// CHECK-SAME: {{^}}, sm_120
+// CHECK-SAME: {{^}}, sm_120a
// CHECK-SAME: {{^}}, gfx600
// CHECK-SAME: {{^}}, gfx601
// CHECK-SAME: {{^}}, gfx602
|
@Artem-B, just curious - is there anything additional that needs to happen before you can approve this? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was the one proposing to merge this change, so I assumed that it's the release maintainers who'd need to stamp it.
I am all for merging it.
Do you know who is the person I should bother? |
The process is that the patch is first reviewed by someone familiar with the code. They approve the patch, and describe how the fix meets the release branch patch requirements (https://llvm.org/docs/HowToReleaseLLVM.html#release-patch-rules). Once it's approved, the release manager will look at the look at the patch, and either merge or request changes. You don't need to specifically ping the release manager; they track all the pending pull requests. |
That would be me, as I am the maintainer of CUDA code and had reviewed the original PR.
This patch fits item #3 on the rule list "or completion of features that were started before the branch was created. " These changes allow clang users to compile CUDA code with just-released cuda-12.8 which adds these new GPU variants. |
) Add support for sm101 and sm120 target architectures. It requires CUDA 12.8. --------- Co-authored-by: Sebastian Jodlowski <[email protected]> (cherry picked from commit 0127f16)
@Artem-B (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. |
|
Backport 0127f16
Requested by: @Artem-B