more tests

Hardcode84 · Hardcode84 · commit 8f7b4a40ff44 · 2024-02-01T15:16:03.000+01:00
diff --git a/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir b/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir
@@ -24,6 +24,32 @@ func.func @fuse_empty_loops() {
 
 // -----
 
+func.func @fuse_ops_between(%A: f32, %B: f32) -> f32 {
+  %c2 = arith.constant 2 : index
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+    scf.reduce
+  }
+  %res = arith.addf %A, %B : f32
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+    scf.reduce
+  }
+  return %res : f32
+}
+// CHECK-LABEL: func @fuse_ops_between
+// CHECK-DAG:    [[C0:%.*]] = arith.constant 0 : index
+// CHECK-DAG:    [[C1:%.*]] = arith.constant 1 : index
+// CHECK-DAG:    [[C2:%.*]] = arith.constant 2 : index
+// CHECK:        %{{.*}} = arith.addf %{{.*}}, %{{.*}} : f32
+// CHECK:        scf.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]])
+// CHECK-SAME:       to ([[C2]], [[C2]]) step ([[C1]], [[C1]]) {
+// CHECK:          scf.reduce
+// CHECK:        }
+// CHECK-NOT:    scf.parallel
+
+// -----
+
 func.func @fuse_two(%A: memref<2x2xf32>, %B: memref<2x2xf32>) {
   %c2 = arith.constant 2 : index
   %c0 = arith.constant 0 : index
@@ -754,3 +780,36 @@ func.func @reductions_use_res_inside(%A: memref<2x2xf32>, %B: memref<2x2xf32>) -
 // CHECK-LABEL: func @reductions_use_res_inside
 // CHECK:      scf.parallel
 // CHECK:      scf.parallel
+
+// -----
+
+func.func @reductions_use_res_between(%A: memref<2x2xf32>, %B: memref<2x2xf32>) -> (f32, f32, f32) {
+  %c2 = arith.constant 2 : index
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+  %init1 = arith.constant 1.0 : f32
+  %init2 = arith.constant 2.0 : f32
+  %res1 = scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) init(%init1) -> f32 {
+    %A_elem = memref.load %A[%i, %j] : memref<2x2xf32>
+    scf.reduce(%A_elem : f32) {
+    ^bb0(%lhs: f32, %rhs: f32):
+      %1 = arith.addf %lhs, %rhs : f32
+      scf.reduce.return %1 : f32
+    }
+  }
+  %res3 = arith.addf %res1, %init2 : f32
+  %res2 = scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) init(%init2) -> f32 {
+    %B_elem = memref.load %B[%i, %j] : memref<2x2xf32>
+    scf.reduce(%B_elem : f32) {
+    ^bb0(%lhs: f32, %rhs: f32):
+      %1 = arith.mulf %lhs, %rhs : f32
+      scf.reduce.return %1 : f32
+    }
+  }
+  return %res1, %res2, %res3 : f32, f32, f32
+}
+
+// instruction in between the loops uses the first loop result
+// CHECK-LABEL: func @reductions_use_res_between
+// CHECK:      scf.parallel
+// CHECK:      scf.parallel