Skip to content

Commit 0a0bae1

Browse files
committed
[CUDA] plumb through new sm_90-specific builtins.
Differential Revision: https://reviews.llvm.org/D151168
1 parent 3d4964f commit 0a0bae1

File tree

3 files changed

+169
-0
lines changed

3 files changed

+169
-0
lines changed

clang/include/clang/Basic/BuiltinsNVPTX.def

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,31 @@ BUILTIN(__nvvm_read_ptx_sreg_nctaid_y, "i", "nc")
9999
BUILTIN(__nvvm_read_ptx_sreg_nctaid_z, "i", "nc")
100100
BUILTIN(__nvvm_read_ptx_sreg_nctaid_w, "i", "nc")
101101

102+
TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_x, "i", "nc", AND(SM_90, PTX78))
103+
TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_y, "i", "nc", AND(SM_90, PTX78))
104+
TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_z, "i", "nc", AND(SM_90, PTX78))
105+
TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_w, "i", "nc", AND(SM_90, PTX78))
106+
107+
TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_x, "i", "nc", AND(SM_90, PTX78))
108+
TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_y, "i", "nc", AND(SM_90, PTX78))
109+
TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_z, "i", "nc", AND(SM_90, PTX78))
110+
TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_w, "i", "nc", AND(SM_90, PTX78))
111+
112+
TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_x, "i", "nc", AND(SM_90, PTX78))
113+
TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_y, "i", "nc", AND(SM_90, PTX78))
114+
TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_z, "i", "nc", AND(SM_90, PTX78))
115+
TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_w, "i", "nc", AND(SM_90, PTX78))
116+
117+
TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_x, "i", "nc", AND(SM_90, PTX78))
118+
TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_y, "i", "nc", AND(SM_90, PTX78))
119+
TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_z, "i", "nc", AND(SM_90, PTX78))
120+
TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_w, "i", "nc", AND(SM_90, PTX78))
121+
122+
TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctarank, "i", "nc", AND(SM_90, PTX78))
123+
TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctarank, "i", "nc", AND(SM_90, PTX78))
124+
125+
TARGET_BUILTIN(__nvvm_is_explicit_cluster, "b", "nc", AND(SM_90, PTX78))
126+
102127
BUILTIN(__nvvm_read_ptx_sreg_laneid, "i", "nc")
103128
BUILTIN(__nvvm_read_ptx_sreg_warpid, "i", "nc")
104129
BUILTIN(__nvvm_read_ptx_sreg_nwarpid, "i", "nc")
@@ -865,6 +890,7 @@ BUILTIN(__nvvm_isspacep_const, "bvC*", "nc")
865890
BUILTIN(__nvvm_isspacep_global, "bvC*", "nc")
866891
BUILTIN(__nvvm_isspacep_local, "bvC*", "nc")
867892
BUILTIN(__nvvm_isspacep_shared, "bvC*", "nc")
893+
TARGET_BUILTIN(__nvvm_isspacep_shared_cluster,"bvC*", "nc", AND(SM_90,PTX78))
868894

869895
// Builtins to support WMMA instructions on sm_70
870896
TARGET_BUILTIN(__hmma_m16n16k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX60))
@@ -988,6 +1014,11 @@ TARGET_BUILTIN(__nvvm_abs_bf16x2, "ZUiZUi", "", AND(SM_80,PTX70))
9881014
TARGET_BUILTIN(__nvvm_neg_bf16, "UsUs", "", AND(SM_80,PTX70))
9891015
TARGET_BUILTIN(__nvvm_neg_bf16x2, "ZUiZUi", "", AND(SM_80,PTX70))
9901016

1017+
TARGET_BUILTIN(__nvvm_mapa, "v*v*i", "", AND(SM_90, PTX78))
1018+
TARGET_BUILTIN(__nvvm_mapa_shared_cluster, "v*3v*3i", "", AND(SM_90, PTX78))
1019+
TARGET_BUILTIN(__nvvm_getctarank, "iv*", "", AND(SM_90, PTX78))
1020+
TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78))
1021+
9911022
#undef BUILTIN
9921023
#undef TARGET_BUILTIN
9931024
#pragma pop_macro("AND")

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18885,6 +18885,83 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
1888518885
return MakeCpAsync(Intrinsic::nvvm_cp_async_cg_shared_global_16,
1888618886
Intrinsic::nvvm_cp_async_cg_shared_global_16_s, *this, E,
1888718887
16);
18888+
case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_x:
18889+
return Builder.CreateCall(
18890+
CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_x));
18891+
case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_y:
18892+
return Builder.CreateCall(
18893+
CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_y));
18894+
case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_z:
18895+
return Builder.CreateCall(
18896+
CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_z));
18897+
case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_w:
18898+
return Builder.CreateCall(
18899+
CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_w));
18900+
case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_x:
18901+
return Builder.CreateCall(
18902+
CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_x));
18903+
case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_y:
18904+
return Builder.CreateCall(
18905+
CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_y));
18906+
case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_z:
18907+
return Builder.CreateCall(
18908+
CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_z));
18909+
case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_w:
18910+
return Builder.CreateCall(
18911+
CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_w));
18912+
case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_x:
18913+
return Builder.CreateCall(
18914+
CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x));
18915+
case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_y:
18916+
return Builder.CreateCall(
18917+
CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y));
18918+
case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_z:
18919+
return Builder.CreateCall(
18920+
CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z));
18921+
case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_w:
18922+
return Builder.CreateCall(
18923+
CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_w));
18924+
case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_x:
18925+
return Builder.CreateCall(
18926+
CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x));
18927+
case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_y:
18928+
return Builder.CreateCall(
18929+
CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y));
18930+
case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_z:
18931+
return Builder.CreateCall(
18932+
CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z));
18933+
case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_w:
18934+
return Builder.CreateCall(
18935+
CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_w));
18936+
case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctarank:
18937+
return Builder.CreateCall(
18938+
CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank));
18939+
case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctarank:
18940+
return Builder.CreateCall(
18941+
CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank));
18942+
case NVPTX::BI__nvvm_is_explicit_cluster:
18943+
return Builder.CreateCall(
18944+
CGM.getIntrinsic(Intrinsic::nvvm_is_explicit_cluster));
18945+
case NVPTX::BI__nvvm_isspacep_shared_cluster:
18946+
return Builder.CreateCall(
18947+
CGM.getIntrinsic(Intrinsic::nvvm_isspacep_shared_cluster),
18948+
EmitScalarExpr(E->getArg(0)));
18949+
case NVPTX::BI__nvvm_mapa:
18950+
return Builder.CreateCall(
18951+
CGM.getIntrinsic(Intrinsic::nvvm_mapa),
18952+
{EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
18953+
case NVPTX::BI__nvvm_mapa_shared_cluster:
18954+
return Builder.CreateCall(
18955+
CGM.getIntrinsic(Intrinsic::nvvm_mapa_shared_cluster),
18956+
{EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
18957+
case NVPTX::BI__nvvm_getctarank:
18958+
return Builder.CreateCall(
18959+
CGM.getIntrinsic(Intrinsic::nvvm_getctarank),
18960+
EmitScalarExpr(E->getArg(0)));
18961+
case NVPTX::BI__nvvm_getctarank_shared_cluster:
18962+
return Builder.CreateCall(
18963+
CGM.getIntrinsic(Intrinsic::nvvm_getctarank_shared_cluster),
18964+
EmitScalarExpr(E->getArg(0)));
1888818965
default:
1888918966
return nullptr;
1889018967
}
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
// RUN: %clang_cc1 "-triple" "nvptx64-nvidia-cuda" "-target-feature" "+ptx78" "-target-cpu" "sm_90" -emit-llvm -fcuda-is-device -o - %s | FileCheck %s
2+
3+
// CHECK: define{{.*}} void @_Z6kernelPlPvj(
4+
__attribute__((global)) void kernel(long *out, void *ptr, unsigned u) {
5+
int i = 0;
6+
// CHECK: call i1 @llvm.nvvm.isspacep.shared.cluster
7+
out[i++] = __nvvm_isspacep_shared_cluster(ptr);
8+
9+
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.clusterid.x()
10+
out[i++] = __nvvm_read_ptx_sreg_clusterid_x();
11+
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.clusterid.y()
12+
out[i++] = __nvvm_read_ptx_sreg_clusterid_y();
13+
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.clusterid.z()
14+
out[i++] = __nvvm_read_ptx_sreg_clusterid_z();
15+
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.clusterid.w()
16+
out[i++] = __nvvm_read_ptx_sreg_clusterid_w();
17+
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.x()
18+
out[i++] = __nvvm_read_ptx_sreg_nclusterid_x();
19+
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.y()
20+
out[i++] = __nvvm_read_ptx_sreg_nclusterid_y();
21+
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.z()
22+
out[i++] = __nvvm_read_ptx_sreg_nclusterid_z();
23+
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.w()
24+
out[i++] = __nvvm_read_ptx_sreg_nclusterid_w();
25+
26+
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.x()
27+
out[i++] = __nvvm_read_ptx_sreg_cluster_ctaid_x();
28+
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.y()
29+
out[i++] = __nvvm_read_ptx_sreg_cluster_ctaid_y();
30+
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.z()
31+
out[i++] = __nvvm_read_ptx_sreg_cluster_ctaid_z();
32+
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.w()
33+
out[i++] = __nvvm_read_ptx_sreg_cluster_ctaid_w();
34+
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.x()
35+
out[i++] = __nvvm_read_ptx_sreg_cluster_nctaid_x();
36+
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.y()
37+
out[i++] = __nvvm_read_ptx_sreg_cluster_nctaid_y();
38+
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.z()
39+
out[i++] = __nvvm_read_ptx_sreg_cluster_nctaid_z();
40+
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.w()
41+
out[i++] = __nvvm_read_ptx_sreg_cluster_nctaid_w();
42+
43+
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctarank()
44+
out[i++] = __nvvm_read_ptx_sreg_cluster_ctarank();
45+
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctarank()
46+
out[i++] = __nvvm_read_ptx_sreg_cluster_nctarank();
47+
// CHECK: call i1 @llvm.nvvm.is_explicit_cluster()
48+
out[i++] = __nvvm_is_explicit_cluster();
49+
50+
auto * sptr = (__attribute__((address_space(3))) void *)ptr;
51+
// CHECK: call ptr @llvm.nvvm.mapa(ptr %{{.*}}, i32 %{{.*}})
52+
out[i++] = (long) __nvvm_mapa(ptr, u);
53+
// CHECK: call ptr addrspace(3) @llvm.nvvm.mapa.shared.cluster(ptr addrspace(3) %{{.*}}, i32 %{{.*}})
54+
out[i++] = (long) __nvvm_mapa_shared_cluster(sptr, u);
55+
// CHECK: call i32 @llvm.nvvm.getctarank(ptr {{.*}})
56+
out[i++] = __nvvm_getctarank(ptr);
57+
// CHECK: call i32 @llvm.nvvm.getctarank.shared.cluster(ptr addrspace(3) {{.*}})
58+
out[i++] = __nvvm_getctarank_shared_cluster(sptr);
59+
60+
// CHECK: ret void
61+
}

0 commit comments

Comments
 (0)