Skip to content

Commit 43531e7

Browse files
authored
[LLVM][NVPTX] Add cp.async.bulk.commit/wait intrinsics (#78698)
This patch adds NVVM intrinsics and NVPTX codegen for the bulk variants of the async-copy commit/wait instructions. lit tests are added to verify the generated PTX. PTX Doc link: https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-commit-group Signed-off-by: Durgadoss R <[email protected]>
1 parent 42b1603 commit 43531e7

File tree

3 files changed

+54
-0
lines changed

3 files changed

+54
-0
lines changed

llvm/include/llvm/IR/IntrinsicsNVVM.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1454,6 +1454,16 @@ def int_nvvm_cp_async_wait_all :
14541454
ClangBuiltin<"__nvvm_cp_async_wait_all">,
14551455
Intrinsic<[],[],[]>;
14561456

1457+
// cp.async.bulk variants of the commit/wait group
1458+
def int_nvvm_cp_async_bulk_commit_group :
1459+
Intrinsic<[],[],[]>;
1460+
1461+
def int_nvvm_cp_async_bulk_wait_group :
1462+
Intrinsic<[],[llvm_i32_ty],[ImmArg<ArgIndex<0>>]>;
1463+
1464+
def int_nvvm_cp_async_bulk_wait_group_read :
1465+
Intrinsic<[],[llvm_i32_ty],[ImmArg<ArgIndex<0>>]>;
1466+
14571467
// mbarrier
14581468
def int_nvvm_mbarrier_init : ClangBuiltin<"__nvvm_mbarrier_init">,
14591469
Intrinsic<[],[llvm_ptr_ty, llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;

llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,22 @@ def CP_ASYNC_WAIT_ALL :
411411
[(int_nvvm_cp_async_wait_all)]>,
412412
Requires<[hasPTX<70>, hasSM<80>]>;
413413

414+
// cp.async.bulk variants of the commit/wait group
415+
def CP_ASYNC_BULK_COMMIT_GROUP :
416+
NVPTXInst<(outs), (ins), "cp.async.bulk.commit_group;",
417+
[(int_nvvm_cp_async_bulk_commit_group)]>,
418+
Requires<[hasPTX<80>, hasSM<90>]>;
419+
420+
def CP_ASYNC_BULK_WAIT_GROUP :
421+
NVPTXInst<(outs), (ins i32imm:$n), "cp.async.bulk.wait_group $n;",
422+
[(int_nvvm_cp_async_bulk_wait_group (i32 timm:$n))]>,
423+
Requires<[hasPTX<80>, hasSM<90>]>;
424+
425+
def CP_ASYNC_BULK_WAIT_GROUP_READ :
426+
NVPTXInst<(outs), (ins i32imm:$n), "cp.async.bulk.wait_group.read $n;",
427+
[(int_nvvm_cp_async_bulk_wait_group_read (i32 timm:$n))]>,
428+
Requires<[hasPTX<80>, hasSM<90>]>;
429+
414430
//-----------------------------------
415431
// MBarrier Functions
416432
//-----------------------------------

llvm/test/CodeGen/NVPTX/intrinsics-sm90.ll

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,31 @@ define void @test_barrier_cluster_aligned() {
144144
ret void
145145
}
146146

147+
; CHECK-LABEL: test_cp_async_bulk_commit_group(
148+
define void @test_cp_async_bulk_commit_group() {
149+
; CHECK: cp.async.bulk.commit_group;
150+
call void @llvm.nvvm.cp.async.bulk.commit.group()
151+
ret void
152+
}
153+
154+
; CHECK-LABEL: test_cp_async_bulk_wait_group(
155+
define void @test_cp_async_bulk_wait_group() {
156+
; CHECK: cp.async.bulk.wait_group 8;
157+
call void @llvm.nvvm.cp.async.bulk.wait.group(i32 8)
158+
; CHECK: cp.async.bulk.wait_group 0;
159+
call void @llvm.nvvm.cp.async.bulk.wait.group(i32 0)
160+
ret void
161+
}
162+
163+
; CHECK-LABEL: test_cp_async_bulk_wait_group_read(
164+
define void @test_cp_async_bulk_wait_group_read() {
165+
; CHECK: cp.async.bulk.wait_group.read 8;
166+
call void @llvm.nvvm.cp.async.bulk.wait.group.read(i32 8)
167+
; CHECK: cp.async.bulk.wait_group.read 0;
168+
call void @llvm.nvvm.cp.async.bulk.wait.group.read(i32 0)
169+
ret void
170+
}
171+
147172
declare i1 @llvm.nvvm.isspacep.shared.cluster(ptr %p);
148173
declare ptr @llvm.nvvm.mapa(ptr %p, i32 %r);
149174
declare ptr addrspace(3) @llvm.nvvm.mapa.shared.cluster(ptr addrspace(3) %p, i32 %r);
@@ -167,3 +192,6 @@ declare void @llvm.nvvm.barrier.cluster.arrive.aligned()
167192
declare void @llvm.nvvm.barrier.cluster.arrive.relaxed.aligned()
168193
declare void @llvm.nvvm.barrier.cluster.wait.aligned()
169194
declare void @llvm.nvvm.fence.sc.cluster()
195+
declare void @llvm.nvvm.cp.async.bulk.commit.group()
196+
declare void @llvm.nvvm.cp.async.bulk.wait.group(i32)
197+
declare void @llvm.nvvm.cp.async.bulk.wait.group.read(i32)

0 commit comments

Comments
 (0)