[MLIR] Cleanup Pass Pipeline in sm_90 Integration Tests (#67416)

grypp · web-flow · commit f4fb03937ad6 · 2023-09-26T14:21:22.000+02:00
MLIR has begun supporting many features of Nvidia's sm_90 architecture,
and new tests have been added for it. Although the tests worked well,
there were redundancies in the pipeline. This PR cleans up unnecessary
passes.
diff --git a/mlir/test/Integration/GPU/CUDA/sm90/tma_load_128x64_swizzle128b.mlir b/mlir/test/Integration/GPU/CUDA/sm90/tma_load_128x64_swizzle128b.mlir
@@ -1,20 +1,16 @@
-// RUN: mlir-opt %s --convert-nvgpu-to-nvvm \
-// RUN:         -convert-linalg-to-loops \
-// RUN:         -gpu-kernel-outlining \
-// RUN:         -convert-vector-to-scf  \
-// RUN:         -lower-affine \
-// RUN:         -convert-scf-to-cf \
-// RUN:         -convert-nvvm-to-llvm \
-// RUN:         -convert-nvgpu-to-nvvm \
-// RUN:         -convert-scf-to-cf  \
-// RUN:         -convert-vector-to-llvm \
-// RUN:         -convert-math-to-llvm \
-// RUN:         -convert-index-to-llvm=index-bitwidth=32 \
-// RUN:         -convert-arith-to-llvm \
-// RUN:         -finalize-memref-to-llvm='use-opaque-pointers=1' \
-// RUN:         -convert-func-to-llvm \
-// RUN:         -canonicalize -cse \
-// RUN:         -expand-strided-metadata --nvvm-attach-target="module=main_kernel features=+ptx80 chip=sm_90 O=3" \
+// RUN: mlir-opt %s \
+// RUN:    -convert-nvgpu-to-nvvm \
+// RUN:    -gpu-kernel-outlining \
+// RUN:    -convert-vector-to-scf  \
+// RUN:    -convert-scf-to-cf \
+// RUN:    -convert-nvvm-to-llvm \
+// RUN:    -convert-vector-to-llvm \
+// RUN:    -convert-index-to-llvm=index-bitwidth=32 \
+// RUN:    -convert-arith-to-llvm \
+// RUN:    -finalize-memref-to-llvm='use-opaque-pointers=1' \
+// RUN:    -convert-func-to-llvm \
+// RUN:    -canonicalize -cse \
+// RUN:    -expand-strided-metadata --nvvm-attach-target="module=main_kernel features=+ptx80 chip=sm_90 O=3" \
 // RUN:  | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,convert-index-to-llvm{index-bitwidth=32},canonicalize,cse))' \
 // RUN:  | mlir-opt --gpu-to-llvm --gpu-module-to-binary=format=%gpu_compilation_format -canonicalize -cse -reconcile-unrealized-casts \
 // RUN:  | mlir-cpu-runner \
diff --git a/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x64_swizzle128b.mlir b/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x64_swizzle128b.mlir
@@ -1,30 +1,17 @@
-// RUN: mlir-opt %s --convert-nvgpu-to-nvvm \
-// RUN:         -convert-linalg-to-loops \
-// RUN:         -canonicalize -cse \
-// RUN:         -gpu-kernel-outlining \
-// RUN:         -canonicalize -cse \
-// RUN:         -convert-vector-to-scf  \
-// RUN:         -canonicalize -cse \
-// RUN:         -lower-affine \
-// RUN:         -canonicalize -cse \
-// RUN:         -convert-scf-to-cf \
-// RUN:         -canonicalize -cse \
-// RUN:         -convert-nvvm-to-llvm \
-// RUN:         -canonicalize -cse \
-// RUN:         -convert-nvgpu-to-nvvm \
-// RUN:         -canonicalize -cse \
-// RUN:         -convert-scf-to-cf  \
-// RUN:         -convert-vector-to-llvm \
-// RUN:         -canonicalize -cse \
-// RUN:         -convert-math-to-llvm \
-// RUN:         -canonicalize -cse \
-// RUN:         -lower-affine \
-// RUN:         -convert-index-to-llvm=index-bitwidth=32 \
-// RUN:         -convert-arith-to-llvm \
-// RUN:         -finalize-memref-to-llvm='use-opaque-pointers=1' \
-// RUN:         -convert-func-to-llvm \
-// RUN:         -canonicalize -cse \
-// RUN:         -expand-strided-metadata --nvvm-attach-target="module=main_kernel features=+ptx80 chip=sm_90 O=3" \
+// RUN: mlir-opt %s \
+// RUN:    -convert-nvgpu-to-nvvm \
+// RUN:    -canonicalize -cse \
+// RUN:    -gpu-kernel-outlining \
+// RUN:    -convert-vector-to-scf  \
+// RUN:    -convert-scf-to-cf \
+// RUN:    -convert-nvvm-to-llvm \
+// RUN:    -convert-vector-to-llvm \
+// RUN:    -convert-index-to-llvm=index-bitwidth=32 \
+// RUN:    -convert-arith-to-llvm \
+// RUN:    -finalize-memref-to-llvm='use-opaque-pointers=1' \
+// RUN:    -convert-func-to-llvm \
+// RUN:    -canonicalize -cse \
+// RUN:    -expand-strided-metadata --nvvm-attach-target="module=main_kernel features=+ptx80 chip=sm_90 O=3" \
 // RUN:  | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,convert-index-to-llvm{index-bitwidth=32},canonicalize,cse))' \
 // RUN:  | mlir-opt --gpu-to-llvm --gpu-module-to-binary -canonicalize -cse -reconcile-unrealized-casts \
 // RUN:  | mlir-cpu-runner \
diff --git a/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x8_8x128_noswizzle.mlir b/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x8_8x128_noswizzle.mlir
@@ -1,7 +1,6 @@
 // RUN: mlir-opt %s --convert-nvgpu-to-nvvm \
 // RUN:         -gpu-kernel-outlining \
 // RUN:         -convert-nvvm-to-llvm \
-// RUN:         -convert-nvgpu-to-nvvm \
 // RUN:         -convert-scf-to-cf  \
 // RUN:         -convert-vector-to-llvm \
 // RUN:         -convert-index-to-llvm=index-bitwidth=32 \
@@ -25,7 +24,6 @@
 // RUN: mlir-opt %s --convert-nvgpu-to-nvvm \
 // RUN:         -gpu-kernel-outlining \
 // RUN:         -convert-nvvm-to-llvm \
-// RUN:         -convert-nvgpu-to-nvvm \
 // RUN:         -convert-scf-to-cf  \
 // RUN:         -convert-vector-to-llvm \
 // RUN:         -convert-index-to-llvm=index-bitwidth=32 \
@@ -41,7 +39,6 @@
 // RUN:   --entry-point-result=void \
 // RUN:  | FileCheck %s
 
-
 // CHECK: [GPU] TMA BEFORE lhs[45][7] 0.000000
 // CHECK: [GPU] TMA BEFORE rhs[7][0] 0.000000
 // CHECK: [GPU] TMA LOADED lhs[45][7] 7.000000