Skip to content

Commit 8031a08

Browse files
committed
[MLIR] Run the TMA test for sm_90
TMA was introduced to MLIR, however, it needed `ptxas` compiler. Recent work D154117 introduced that! This work runs the existing integration test. Reviewed By: fmorac Differential Revision: https://reviews.llvm.org/D159347
1 parent e6971cb commit 8031a08

File tree

1 file changed

+27
-2
lines changed

1 file changed

+27
-2
lines changed

mlir/test/Integration/GPU/CUDA/sm90/tmaload.mlir

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
// RUN: -convert-func-to-llvm \
1111
// RUN: -canonicalize \
1212
// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,convert-nvgpu-to-nvvm{use-opaque-pointers=1},lower-affine,convert-scf-to-cf,convert-vector-to-llvm,convert-math-to-llvm,expand-strided-metadata,lower-affine,convert-index-to-llvm{index-bitwidth=32},convert-arith-to-llvm,reconcile-unrealized-casts,gpu-to-cubin{chip=sm_90 features=+ptx80 dump-ptx}))' \
13-
// RUN: 2&>1 | FileCheck %s --check-prefixes=CHECK-PTX
13+
// RUN: 2>&1 | FileCheck %s --check-prefixes=CHECK-PTX
1414

1515
// CHECK-PTX: mbarrier.init.shared.b64
1616
// CHECK-PTX: mbarrier.arrive.expect_tx.shared.b64
@@ -19,6 +19,31 @@
1919
// CHECK-PTX: mbarrier.arrive.expect_tx.shared.b64
2020
// CHECK-PTX: mbarrier.try_wait.parity.shared.b64
2121

22+
// RUN: mlir-opt %s --convert-nvgpu-to-nvvm \
23+
// RUN: -gpu-kernel-outlining \
24+
// RUN: -convert-nvvm-to-llvm \
25+
// RUN: -convert-nvgpu-to-nvvm \
26+
// RUN: -convert-scf-to-cf \
27+
// RUN: -convert-vector-to-llvm \
28+
// RUN: -convert-index-to-llvm=index-bitwidth=32 \
29+
// RUN: -convert-arith-to-llvm \
30+
// RUN: -finalize-memref-to-llvm='use-opaque-pointers=1' \
31+
// RUN: -convert-func-to-llvm \
32+
// RUN: -expand-strided-metadata --nvvm-attach-target="module=main_kernel features=+ptx80 chip=sm_90 O=3" \
33+
// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,convert-index-to-llvm{index-bitwidth=32},canonicalize,cse))' \
34+
// RUN: | mlir-opt --gpu-to-llvm --gpu-module-to-binary -canonicalize -cse -reconcile-unrealized-casts \
35+
// RUN: | mlir-cpu-runner \
36+
// RUN: --shared-libs=%mlir_cuda_runtime \
37+
// RUN: --shared-libs=%mlir_runner_utils \
38+
// RUN: --entry-point-result=void \
39+
// RUN: | FileCheck %s
40+
41+
42+
// CHECK: [GPU] TMA BEFORE lhs[45][7] 0.000000
43+
// CHECK: [GPU] TMA BEFORE rhs[7][0] 0.000000
44+
// CHECK: [GPU] TMA LOADED lhs[45][7] 7.000000
45+
// CHECK: [GPU] TMA LOADED rhs[7][0] 3.000000
46+
2247
module @mymod {
2348
memref.global "private" @bufferLhsGlobal : memref<64x8xf32, 3>
2449
memref.global "private" @bufferRhsGlobal : memref<8x128xf32, 3>
@@ -87,4 +112,4 @@ module @mymod {
87112
}
88113
return
89114
}
90-
}
115+
}

0 commit comments

Comments
 (0)