Skip to content

Commit 8d817f6

Browse files
authored
[LLVM][NVPTX]: Add aligned versions of cluster barriers (#77940)
1 parent 8e8c954 commit 8d817f6

File tree

3 files changed

+31
-0
lines changed

3 files changed

+31
-0
lines changed

llvm/include/llvm/IR/IntrinsicsNVVM.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1372,6 +1372,14 @@ let TargetPrefix = "nvvm" in {
13721372
def int_nvvm_barrier_cluster_wait :
13731373
Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
13741374

1375+
// 'aligned' versions of the above barrier.cluster.* intrinsics
1376+
def int_nvvm_barrier_cluster_arrive_aligned :
1377+
Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
1378+
def int_nvvm_barrier_cluster_arrive_relaxed_aligned :
1379+
Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
1380+
def int_nvvm_barrier_cluster_wait_aligned :
1381+
Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
1382+
13751383
// Membar
13761384
def int_nvvm_membar_cta : ClangBuiltin<"__nvvm_membar_cta">,
13771385
Intrinsic<[], [], [IntrNoCallback]>;

llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt),
132132
"barrier.sync \t$id, $cnt;",
133133
[(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>,
134134
Requires<[hasPTX<60>, hasSM<30>]>;
135+
135136
class INT_BARRIER_CLUSTER<string variant, Intrinsic Intr,
136137
list<Predicate> Preds = [hasPTX<78>, hasSM<90>]>:
137138
NVPTXInst<(outs), (ins), "barrier.cluster."# variant #";", [(Intr)]>,
@@ -145,6 +146,15 @@ def barrier_cluster_arrive_relaxed:
145146
def barrier_cluster_wait:
146147
INT_BARRIER_CLUSTER<"wait", int_nvvm_barrier_cluster_wait>;
147148

149+
// 'aligned' versions of the cluster barrier intrinsics
150+
def barrier_cluster_arrive_aligned:
151+
INT_BARRIER_CLUSTER<"arrive.aligned", int_nvvm_barrier_cluster_arrive_aligned>;
152+
def barrier_cluster_arrive_relaxed_aligned:
153+
INT_BARRIER_CLUSTER<"arrive.relaxed.aligned",
154+
int_nvvm_barrier_cluster_arrive_relaxed_aligned, [hasPTX<80>, hasSM<90>]>;
155+
def barrier_cluster_wait_aligned:
156+
INT_BARRIER_CLUSTER<"wait.aligned", int_nvvm_barrier_cluster_wait_aligned>;
157+
148158
class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred,
149159
bit offset_imm, bit mask_imm, bit threadmask_imm>
150160
: NVPTXInst<(outs), (ins), "?", []> {

llvm/test/CodeGen/NVPTX/intrinsics-sm90.ll

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,16 @@ define void @test_barrier_cluster() {
133133
ret void
134134
}
135135

136+
; CHECK-LABEL: test_barrier_cluster_aligned(
137+
define void @test_barrier_cluster_aligned() {
138+
; CHECK: barrier.cluster.arrive.aligned;
139+
call void @llvm.nvvm.barrier.cluster.arrive.aligned()
140+
; CHECK: barrier.cluster.arrive.relaxed.aligned;
141+
call void @llvm.nvvm.barrier.cluster.arrive.relaxed.aligned()
142+
; CHECK: barrier.cluster.wait.aligned;
143+
call void @llvm.nvvm.barrier.cluster.wait.aligned()
144+
ret void
145+
}
136146

137147
declare i1 @llvm.nvvm.isspacep.shared.cluster(ptr %p);
138148
declare ptr @llvm.nvvm.mapa(ptr %p, i32 %r);
@@ -153,4 +163,7 @@ declare i1 @llvm.nvvm.is_explicit_cluster()
153163
declare void @llvm.nvvm.barrier.cluster.arrive()
154164
declare void @llvm.nvvm.barrier.cluster.arrive.relaxed()
155165
declare void @llvm.nvvm.barrier.cluster.wait()
166+
declare void @llvm.nvvm.barrier.cluster.arrive.aligned()
167+
declare void @llvm.nvvm.barrier.cluster.arrive.relaxed.aligned()
168+
declare void @llvm.nvvm.barrier.cluster.wait.aligned()
156169
declare void @llvm.nvvm.fence.sc.cluster()

0 commit comments

Comments
 (0)