Skip to content

Commit 86cb0bd

Browse files
authored
[MLIR][NVVM] [NFC] Update test cmd-lines and doc links (#128207)
For the NVVM Dialect tests under Target/LLVMIR/nvvm/ dir, we verify the lowering to the intrinsics using mlir-translate. Remove the -verify-diagnostics option from the cmd-line for these tests since all the verifier checks are tested through the nvvmir-invalid.mlir file. Similarly, remove the split-input-file option which is not relevant here. Update a few remaining links in the NVVMOps.td file. All the reference links follow the same style now. Rename the tcgen05-barriers.mlir file to tcgen05-commit.mlir and move the wait/fence tests to a separate file. Signed-off-by: Durgadoss R <[email protected]>
1 parent 9d19105 commit 86cb0bd

File tree

10 files changed

+37
-45
lines changed

10 files changed

+37
-45
lines changed

mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2648,8 +2648,7 @@ def NVVM_Tcgen05AllocOp : NVVM_Op<"tcgen05.alloc"> {
26482648
the amount specified by `nCols` and writes the destination
26492649
address to the `addr` argument. The `nCols` operand specifies the
26502650
number of columns to be allocated and it must be a power-of-two.
2651-
[For more information, refer to the PTX ISA]
2652-
(https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-memory-alloc-manage-instructions)
2651+
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-memory-alloc-manage-instructions)
26532652
}];
26542653

26552654
let arguments = (ins
@@ -2679,8 +2678,7 @@ def NVVM_Tcgen05DeallocOp : NVVM_Op<"tcgen05.dealloc"> {
26792678
specified by `tmemAddr`, which must be from a previous tensor
26802679
memory allocation. The `nCols` operand specifies the number
26812680
of columns to be de-allocated, and it must be a power-of-two.
2682-
[For more information, refer to the PTX ISA]
2683-
(https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-memory-alloc-manage-instructions)
2681+
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-memory-alloc-manage-instructions)
26842682
}];
26852683

26862684
let arguments = (ins LLVM_PointerTensor:$taddr, I32:$nCols,
@@ -2708,8 +2706,7 @@ def NVVM_Tcgen05RelinquishAllocPermitOp : NVVM_Op<"tcgen05.relinquish_alloc_perm
27082706
of the executing thread is relinquishing the right to allocate
27092707
Tensor Memory. So, it is illegal for a CTA to perform `tcgen05.alloc`
27102708
after any of its constituent threads execute `tcgen05.relinquish_alloc_permit`.
2711-
[For more information, refer to the PTX ISA]
2712-
(https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-memory-alloc-manage-instructions)
2709+
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-memory-alloc-manage-instructions)
27132710
}];
27142711

27152712
let arguments = (ins
@@ -2733,8 +2730,7 @@ def NVVM_Tcgen05FenceOp : NVVM_Op<"tcgen05.fence"> {
27332730
The `tcgen05.fence<after>` orders all subsequent async tcgen05 operations
27342731
with respect to the prior tcgen05 and execution ordering operations.
27352732

2736-
[For more information refer to the PTX ISA]
2737-
(https://docs.nvidia.com/cuda/parallel-thread-execution/#tensorcore-5th-generation-instructions-tcgen05-fence)
2733+
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tensorcore-5th-generation-instructions-tcgen05-fence)
27382734
}];
27392735

27402736
let arguments = (ins Tcgen05FenceKindAttr:$kind);
@@ -2756,8 +2752,7 @@ def NVVM_Tcgen05WaitOp : NVVM_Op<"tcgen05.wait"> {
27562752
have completed. Similarly, the `tcgen05.wait<store>` causes the executing
27572753
thread to block until all prior `tcgen05.st` operations issued by the
27582754
executing thread have completed.
2759-
[For more information refer PTX ISA]
2760-
(https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-instructions-tcgen05-wait)
2755+
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-instructions-tcgen05-wait)
27612756
}];
27622757

27632758
let arguments = (ins Tcgen05WaitKindAttr:$kind);
@@ -2782,8 +2777,7 @@ def NVVM_Tcgen05CommitOp : NVVM_Op<"tcgen05.commit"> {
27822777
when present, specifies the destination CTAs in the cluster such
27832778
that each bit position in the 16-bit `multicastMask` operand
27842779
corresponds to the `nvvm.read.ptx.sreg.ctaid` of the destination CTA.
2785-
[For more information refer PTX ISA]
2786-
(https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen-async-sync-operations-commit)
2780+
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen-async-sync-operations-commit)
27872781
}];
27882782

27892783
let arguments = (ins

mlir/test/Target/LLVMIR/nvvm/cvt_tf32.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: mlir-translate -mlir-to-llvmir %s -split-input-file --verify-diagnostics | FileCheck %s
1+
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
22

33
// CHECK-LABEL: @convert_float_to_tf32_rna
44
llvm.func @convert_float_to_tf32_rna(%src : f32) -> i32 {

mlir/test/Target/LLVMIR/nvvm/tcgen05-alloc.mlir

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
// RUN: mlir-opt -split-input-file -verify-diagnostics %s
2-
// RUN: mlir-translate -mlir-to-llvmir -split-input-file -verify-diagnostics %s | FileCheck %s --check-prefix=CHECK-LLVM
1+
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s --check-prefix=CHECK-LLVM
32

43
// CHECK-LABEL: @llvm_nvvm_tcgen05_alloc
54
llvm.func @llvm_nvvm_tcgen05_alloc(%addr : !llvm.ptr, %ncols : i32) {

mlir/test/Target/LLVMIR/nvvm/tcgen05-barriers.mlir renamed to mlir/test/Target/LLVMIR/nvvm/tcgen05-commit.mlir

Lines changed: 1 addition & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,4 @@
1-
// RUN: mlir-opt -split-input-file -verify-diagnostics %s
2-
// RUN: mlir-translate -mlir-to-llvmir -split-input-file -verify-diagnostics %s | FileCheck %s --check-prefix=CHECK-LLVM
3-
4-
// CHECK-LABEL: @llvm_nvvm_tcgen05_fence
5-
llvm.func @llvm_nvvm_tcgen05_fence() {
6-
// CHECK-LLVM: call void @llvm.nvvm.tcgen05.fence.before.thread.sync()
7-
nvvm.tcgen05.fence #nvvm.tcgen05_fence<before>
8-
9-
// CHECK-LLVM: call void @llvm.nvvm.tcgen05.fence.after.thread.sync()
10-
nvvm.tcgen05.fence #nvvm.tcgen05_fence<after>
11-
12-
llvm.return
13-
}
14-
15-
// CHECK-LABEL: @llvm_nvvm_tcgen05_wait
16-
llvm.func @llvm_nvvm_tcgen05_wait() {
17-
// CHECK-LLVM: call void @llvm.nvvm.tcgen05.wait.ld()
18-
nvvm.tcgen05.wait #nvvm.tcgen05_wait<load>
19-
20-
// CHECK-LLVM: call void @llvm.nvvm.tcgen05.wait.st()
21-
nvvm.tcgen05.wait #nvvm.tcgen05_wait<store>
22-
23-
llvm.return
24-
}
1+
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s --check-prefix=CHECK-LLVM
252

263
// CHECK-LABEL: @llvm_nvvm_tcgen05_commit_generic
274
llvm.func @llvm_nvvm_tcgen05_commit_generic(%barrier : !llvm.ptr, %cta_mask : i16) {

mlir/test/Target/LLVMIR/nvvm/tcgen05-cp.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
1+
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
22

33
// CHECK-LABEL: @nvvm_tcgen05_cp_128x256b
44
llvm.func @nvvm_tcgen05_cp_128x256b(%taddr : !llvm.ptr<6>, %smem_desc : i64) {
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s --check-prefix=CHECK-LLVM
2+
3+
// CHECK-LABEL: @llvm_nvvm_tcgen05_fence
4+
llvm.func @llvm_nvvm_tcgen05_fence() {
5+
// CHECK-LLVM: call void @llvm.nvvm.tcgen05.fence.before.thread.sync()
6+
nvvm.tcgen05.fence #nvvm.tcgen05_fence<before>
7+
8+
// CHECK-LLVM: call void @llvm.nvvm.tcgen05.fence.after.thread.sync()
9+
nvvm.tcgen05.fence #nvvm.tcgen05_fence<after>
10+
11+
llvm.return
12+
}
13+
14+
// CHECK-LABEL: @llvm_nvvm_tcgen05_wait
15+
llvm.func @llvm_nvvm_tcgen05_wait() {
16+
// CHECK-LLVM: call void @llvm.nvvm.tcgen05.wait.ld()
17+
nvvm.tcgen05.wait #nvvm.tcgen05_wait<load>
18+
19+
// CHECK-LLVM: call void @llvm.nvvm.tcgen05.wait.st()
20+
nvvm.tcgen05.wait #nvvm.tcgen05_wait<store>
21+
22+
llvm.return
23+
}

mlir/test/Target/LLVMIR/nvvm/tcgen05-shift.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
1+
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
22

33
// CHECK-LABEL: @llvm_nvvm_tcgen05_shift
44
llvm.func @llvm_nvvm_tcgen05_shift(%taddr : !llvm.ptr<6>) {

mlir/test/Target/LLVMIR/nvvm/tma_bulk_copy.mlir

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
// RUN: mlir-opt -split-input-file -verify-diagnostics %s
2-
// RUN: mlir-translate -mlir-to-llvmir -split-input-file -verify-diagnostics %s | FileCheck %s
1+
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
32

43
// CHECK-LABEL: @llvm_nvvm_cp_async_bulk_global_to_shared_cluster
54
llvm.func @llvm_nvvm_cp_async_bulk_global_to_shared_cluster(%dst : !llvm.ptr<3>, %src : !llvm.ptr<1>, %mbar : !llvm.ptr<3>, %size : i32, %mc : i16, %ch : i64) {

mlir/test/Target/LLVMIR/nvvm/tma_prefetch.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: mlir-translate -mlir-to-llvmir %s -split-input-file --verify-diagnostics | FileCheck %s
1+
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
22

33
// CHECK-LABEL: @tma_prefetch_1d
44
llvm.func @tma_prefetch_1d(%tma_desc : !llvm.ptr, %d0 : i32, %ch : i64) {

mlir/test/Target/LLVMIR/nvvm/tma_store_reduce.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: mlir-translate -mlir-to-llvmir -split-input-file --verify-diagnostics %s | FileCheck %s
1+
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
22

33
// CHECK-LABEL: define void @tma_store_reduce_1d(
44
llvm.func @tma_store_reduce_1d(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr, %d0 : i32, %ch : i64) {

0 commit comments

Comments
 (0)