Skip to content

File tree

5 files changed

+72
-0
lines changed

5 files changed

+72
-0
lines changed

llvm/include/llvm/IR/IntrinsicsNVVM.td

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4805,6 +4805,21 @@ def int_nvvm_redux_sync_or : ClangBuiltin<"__nvvm_redux_sync_or">,
48054805
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
48064806
[IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
48074807

4808+
//
4809+
// WGMMA instructions
4810+
//
4811+
// wgmma.fence.sync.aligned;
4812+
def int_nvvm_wgmma_fence_sync_aligned
4813+
: Intrinsic<[], [], [IntrConvergent], "llvm.nvvm.wgmma.fence.sync.aligned">;
4814+
4815+
// wgmma.commit_group.sync.aligned;
4816+
def int_nvvm_wgmma_commit_group_sync_aligned
4817+
: Intrinsic<[], [], [IntrConvergent], "llvm.nvvm.wgmma.commit_group.sync.aligned">;
4818+
4819+
// wgmma.wait_group.sync.aligned N;
4820+
def int_nvvm_wgmma_wait_group_sync_aligned
4821+
: Intrinsic<[], [llvm_i32_ty], [IntrConvergent, ImmArg<ArgIndex<0>>], "llvm.nvvm.wgmma.wait_group.sync.aligned">;
4822+
48084823
//
48094824
// WMMA instructions
48104825
//

llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7484,4 +7484,16 @@ defm INT_SET_MAXNREG_DEC : SET_MAXNREG<"dec", int_nvvm_setmaxnreg_dec_sync_align
74847484

74857485
} // isConvergent
74867486

7487+
//
7488+
// WGMMA instructions
7489+
//
7490+
def INT_NVVM_WGMMA_FENCE_SYNC_ALIGNED : NVPTXInst<(outs), (ins), "wgmma.fence.sync.aligned;",
7491+
[(int_nvvm_wgmma_fence_sync_aligned)]>, Requires<[hasSM90a, hasPTX<80>]>;
7492+
7493+
def INT_NVVM_WGMMA_COMMIT_GROUP_SYNC_ALIGNED : NVPTXInst<(outs), (ins), "wgmma.commit_group.sync.aligned;",
7494+
[(int_nvvm_wgmma_commit_group_sync_aligned)]>, Requires<[hasSM90a, hasPTX<80>]>;
7495+
7496+
def INT_NVVM_WGMMA_WAIT_GROUP_SYNC_ALIGNED : NVPTXInst<(outs), (ins i32imm:$n), "wgmma.wait_group.sync.aligned \t$n;",
7497+
[(int_nvvm_wgmma_wait_group_sync_aligned timm:$n)]>, Requires<[hasSM90a, hasPTX<80>]>;
7498+
74877499
def INT_EXIT : NVPTXInst<(outs), (ins), "exit;", [(int_nvvm_exit)]>;
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
; RUN: llc < %s -march=nvptx64 -mcpu=sm_90a -mattr=+ptx80 | FileCheck %s
2+
; RUN: %if ptxas-12.0 %{ llc < %s -march=nvptx64 -mcpu=sm_90a -mattr=+ptx80 | %ptxas-verify -arch=sm_90a %}
3+
4+
target triple = "nvptx64-nvidia-cuda"
5+
6+
declare void @llvm.nvvm.wgmma.commit_group.sync.aligned()
7+
8+
define void @test_wgmma_commit_group_sync_aligned() {
9+
; CHECK-LABEL: test_wgmma_commit_group_sync_aligned(
10+
; CHECK: // %bb.0:
11+
; CHECK-NEXT: wgmma.commit_group.sync.aligned;
12+
; CHECK-NEXT: ret;
13+
call void @llvm.nvvm.wgmma.commit_group.sync.aligned()
14+
ret void
15+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
; RUN: llc < %s -march=nvptx64 -mcpu=sm_90a -mattr=+ptx80 | FileCheck %s
2+
; RUN: %if ptxas-12.0 %{ llc < %s -march=nvptx64 -mcpu=sm_90a -mattr=+ptx80 | %ptxas-verify -arch=sm_90a %}
3+
4+
target triple = "nvptx64-nvidia-cuda"
5+
6+
declare void @llvm.nvvm.wgmma.fence.sync.aligned()
7+
8+
define void @test_wgmma_fence_sync_aligned() {
9+
; CHECK-LABEL: test_wgmma_fence_sync_aligned(
10+
; CHECK: // %bb.0:
11+
; CHECK-NEXT: wgmma.fence.sync.aligned;
12+
; CHECK-NEXT: ret;
13+
call void @llvm.nvvm.wgmma.fence.sync.aligned()
14+
ret void
15+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
; RUN: llc < %s -march=nvptx64 -mcpu=sm_90a -mattr=+ptx80 | FileCheck %s
2+
; RUN: %if ptxas-12.0 %{ llc < %s -march=nvptx64 -mcpu=sm_90a -mattr=+ptx80 | %ptxas-verify -arch=sm_90a %}
3+
4+
target triple = "nvptx64-nvidia-cuda"
5+
6+
declare void @llvm.nvvm.wgmma.wait_group.sync.aligned(i32)
7+
8+
define void @test_wgmma_wait_group_sync_aligned() {
9+
; CHECK-LABEL: test_wgmma_wait_group_sync_aligned(
10+
; CHECK: // %bb.0:
11+
; CHECK-NEXT: wgmma.wait_group.sync.aligned 10;
12+
; CHECK-NEXT: ret;
13+
call void @llvm.nvvm.wgmma.wait_group.sync.aligned(i32 10)
14+
ret void
15+
}

0 commit comments

Comments
 (0)