Skip to content

[LLVM][NVPTX]: Add aligned versions of cluster barriers #77940

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsNVVM.td
Original file line number Diff line number Diff line change
Expand Up @@ -1372,6 +1372,14 @@ let TargetPrefix = "nvvm" in {
def int_nvvm_barrier_cluster_wait :
Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;

// 'aligned' versions of the above barrier.cluster.* intrinsics
def int_nvvm_barrier_cluster_arrive_aligned :
Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
def int_nvvm_barrier_cluster_arrive_relaxed_aligned :
Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
def int_nvvm_barrier_cluster_wait_aligned :
Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;

// Membar
def int_nvvm_membar_cta : ClangBuiltin<"__nvvm_membar_cta">,
Intrinsic<[], [], [IntrNoCallback]>;
Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt),
"barrier.sync \t$id, $cnt;",
[(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>,
Requires<[hasPTX<60>, hasSM<30>]>;

class INT_BARRIER_CLUSTER<string variant, Intrinsic Intr,
list<Predicate> Preds = [hasPTX<78>, hasSM<90>]>:
NVPTXInst<(outs), (ins), "barrier.cluster."# variant #";", [(Intr)]>,
Expand All @@ -145,6 +146,15 @@ def barrier_cluster_arrive_relaxed:
def barrier_cluster_wait:
INT_BARRIER_CLUSTER<"wait", int_nvvm_barrier_cluster_wait>;

// 'aligned' versions of the cluster barrier intrinsics
def barrier_cluster_arrive_aligned:
INT_BARRIER_CLUSTER<"arrive.aligned", int_nvvm_barrier_cluster_arrive_aligned>;
def barrier_cluster_arrive_relaxed_aligned:
INT_BARRIER_CLUSTER<"arrive.relaxed.aligned",
int_nvvm_barrier_cluster_arrive_relaxed_aligned, [hasPTX<80>, hasSM<90>]>;
def barrier_cluster_wait_aligned:
INT_BARRIER_CLUSTER<"wait.aligned", int_nvvm_barrier_cluster_wait_aligned>;

class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred,
bit offset_imm, bit mask_imm, bit threadmask_imm>
: NVPTXInst<(outs), (ins), "?", []> {
Expand Down
13 changes: 13 additions & 0 deletions llvm/test/CodeGen/NVPTX/intrinsics-sm90.ll
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,16 @@ define void @test_barrier_cluster() {
ret void
}

; CHECK-LABEL: test_barrier_cluster_aligned(
define void @test_barrier_cluster_aligned() {
; CHECK: barrier.cluster.arrive.aligned;
call void @llvm.nvvm.barrier.cluster.arrive.aligned()
; CHECK: barrier.cluster.arrive.relaxed.aligned;
call void @llvm.nvvm.barrier.cluster.arrive.relaxed.aligned()
; CHECK: barrier.cluster.wait.aligned;
call void @llvm.nvvm.barrier.cluster.wait.aligned()
ret void
}

declare i1 @llvm.nvvm.isspacep.shared.cluster(ptr %p);
declare ptr @llvm.nvvm.mapa(ptr %p, i32 %r);
Expand All @@ -153,4 +163,7 @@ declare i1 @llvm.nvvm.is_explicit_cluster()
declare void @llvm.nvvm.barrier.cluster.arrive()
declare void @llvm.nvvm.barrier.cluster.arrive.relaxed()
declare void @llvm.nvvm.barrier.cluster.wait()
declare void @llvm.nvvm.barrier.cluster.arrive.aligned()
declare void @llvm.nvvm.barrier.cluster.arrive.relaxed.aligned()
declare void @llvm.nvvm.barrier.cluster.wait.aligned()
declare void @llvm.nvvm.fence.sc.cluster()