Skip to content

[NVPTX] Add support for PTX 8.6 and CUDA 12.6 (12.8) #123398

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion clang/include/clang/Basic/BuiltinsNVPTX.def
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@
#pragma push_macro("SM_90")
#pragma push_macro("SM_90a")
#pragma push_macro("SM_100")
#define SM_100 "sm_100"
#pragma push_macro("SM_100a")
#define SM_100a "sm_100a"
#define SM_100 "sm_100|" SM_100a
#define SM_90a "sm_90a"
#define SM_90 "sm_90|" SM_90a "|" SM_100
#define SM_89 "sm_89|" SM_90
Expand Down Expand Up @@ -1091,6 +1093,7 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78))
#pragma pop_macro("SM_90")
#pragma pop_macro("SM_90a")
#pragma pop_macro("SM_100")
#pragma pop_macro("SM_100a")
#pragma pop_macro("PTX42")
#pragma pop_macro("PTX60")
#pragma pop_macro("PTX61")
Expand Down
6 changes: 5 additions & 1 deletion clang/include/clang/Basic/Cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,12 @@ enum class CudaVersion {
CUDA_124,
CUDA_125,
CUDA_126,
CUDA_127,
CUDA_128,
CUDA_129,
FULLY_SUPPORTED = CUDA_123,
PARTIALLY_SUPPORTED =
CUDA_126, // Partially supported. Proceed with a warning.
CUDA_129, // Partially supported. Proceed with a warning.
NEW = 10000, // Too new. Issue a warning, but allow using it.
};
const char *CudaVersionToString(CudaVersion V);
Expand Down Expand Up @@ -80,6 +83,7 @@ enum class OffloadArch {
SM_90,
SM_90a,
SM_100,
SM_100a,
GFX600,
GFX601,
GFX602,
Expand Down
8 changes: 6 additions & 2 deletions clang/lib/Basic/Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ static const CudaVersionMapEntry CudaNameVersionMap[] = {
CUDA_ENTRY(12, 4),
CUDA_ENTRY(12, 5),
CUDA_ENTRY(12, 6),
CUDA_ENTRY(12, 7),
CUDA_ENTRY(12, 8),
CUDA_ENTRY(12, 9),
{"", CudaVersion::NEW, llvm::VersionTuple(std::numeric_limits<int>::max())},
{"unknown", CudaVersion::UNKNOWN, {}} // End of list tombstone.
};
Expand Down Expand Up @@ -98,6 +101,7 @@ static const OffloadArchToStringMap arch_names[] = {
SM(90), // Hopper
SM(90a), // Hopper
SM(100), // Blackwell
SM(100a), // Blackwell
GFX(600), // gfx600
GFX(601), // gfx601
GFX(602), // gfx602
Expand Down Expand Up @@ -227,8 +231,8 @@ CudaVersion MinVersionForOffloadArch(OffloadArch A) {
case OffloadArch::SM_90a:
return CudaVersion::CUDA_120;
case OffloadArch::SM_100:
return CudaVersion::NEW; // TODO: use specific CUDA version once it's
// public.
case OffloadArch::SM_100a:
return CudaVersion::CUDA_127;
default:
llvm_unreachable("invalid enum");
}
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/Basic/Targets/NVPTX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -285,13 +285,16 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
case OffloadArch::SM_90a:
return "900";
case OffloadArch::SM_100:
case OffloadArch::SM_100a:
return "1000";
}
llvm_unreachable("unhandled OffloadArch");
}();
Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
if (GPU == OffloadArch::SM_90a)
Builder.defineMacro("__CUDA_ARCH_FEAT_SM90_ALL", "1");
if (GPU == OffloadArch::SM_100a)
Builder.defineMacro("__CUDA_ARCH_FEAT_SM100_ALL", "1");
}
}

Expand Down
1 change: 1 addition & 0 deletions clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2277,6 +2277,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) {
case OffloadArch::SM_90:
case OffloadArch::SM_90a:
case OffloadArch::SM_100:
case OffloadArch::SM_100a:
case OffloadArch::GFX600:
case OffloadArch::GFX601:
case OffloadArch::GFX602:
Expand Down
9 changes: 9 additions & 0 deletions clang/lib/Driver/ToolChains/Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,12 @@ CudaVersion getCudaVersion(uint32_t raw_version) {
return CudaVersion::CUDA_125;
if (raw_version < 12070)
return CudaVersion::CUDA_126;
if (raw_version < 12080)
return CudaVersion::CUDA_127;
if (raw_version < 12090)
return CudaVersion::CUDA_128;
if (raw_version < 12100)
return CudaVersion::CUDA_129;
return CudaVersion::NEW;
}

Expand Down Expand Up @@ -682,6 +688,9 @@ void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
case CudaVersion::CUDA_##CUDA_VER: \
PtxFeature = "+ptx" #PTX_VER; \
break;
CASE_CUDA_VERSION(129, 87);
Copy link
Member

@Artem-B Artem-B Jan 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like we forgot about adding PTX87 to the builtins in the clang/include/clang/Basic/BuiltinsNVPTX.def

Right now cc1 ends up running with -target-features +ptx87, but the target builtins only enumerate up to ptx86, so the compilation fails with

In file included from trunk/include/__clang_cuda_runtime_wrapper.h:473:
trunk/include/__clang_cuda_intrinsics.h:179:40: error: '__nvvm_shfl_sync_down_i32' needs target feature ptx60|ptx61|ptx62|ptx63|ptx64|ptx65|ptx70|ptx71|ptx72|ptx73|ptx74|ptx75|ptx76|ptx77|ptx78|ptx80|ptx81|ptx82|ptx83|ptx84|ptx85|ptx86
  179 | __MAKE_SYNC_SHUFFLES(__shfl_down_sync, __nvvm_shfl_sync_down_i32,
      |                                        ^

CASE_CUDA_VERSION(128, 87);
CASE_CUDA_VERSION(127, 86);
CASE_CUDA_VERSION(126, 85);
CASE_CUDA_VERSION(125, 85);
CASE_CUDA_VERSION(124, 84);
Expand Down
1 change: 1 addition & 0 deletions clang/test/Misc/target-invalid-cpu-note/nvptx.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
// CHECK-SAME: {{^}}, sm_90
// CHECK-SAME: {{^}}, sm_90a
// CHECK-SAME: {{^}}, sm_100
// CHECK-SAME: {{^}}, sm_100a
// CHECK-SAME: {{^}}, gfx600
// CHECK-SAME: {{^}}, gfx601
// CHECK-SAME: {{^}}, gfx602
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTX.td
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ foreach sm = [20, 21, 30, 32, 35, 37, 50, 52, 53,
def SM#sm: FeatureSM<""#sm, !mul(sm, 10)>;

def SM90a: FeatureSM<"90a", 901>;
def SM100a: FeatureSM<"100a", 1001>;

foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 62, 63, 64, 65,
70, 71, 72, 73, 74, 75, 76, 77, 78,
Expand Down Expand Up @@ -74,6 +75,7 @@ def : Proc<"sm_89", [SM89, PTX78]>;
def : Proc<"sm_90", [SM90, PTX78]>;
def : Proc<"sm_90a", [SM90a, PTX80]>;
def : Proc<"sm_100", [SM100, PTX86]>;
def : Proc<"sm_100a", [SM100a, PTX86]>;

def NVPTXInstrInfo : InstrInfo {
}
Expand Down
Loading