Add test for reproducing the crash

vivekkhandelwal1 · vivekkhandelwal1 · commit 5c6b295e80af · 2025-04-03T19:47:37.000+05:30
diff --git a/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir b/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir
@@ -977,3 +977,93 @@ module {
 //   CHECK-DAG:     %[[T3:.+]] = arith.addf %[[T2]], %[[B1]]
 //       CHECK:     linalg.yield %[[T3]] : f32
 //       CHECK:   return %[[GENERIC]]
+
+// -----
+
+#map = affine_map<()[s0, s1] -> (s0 * s1)>
+#map1 = affine_map<(d0, d1, d2) -> (d0)>
+#map2 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
+module {
+  func.func @no_fusio(%arg0: tensor<?x?x?xf32>, %arg1: tensor<?x?xi64>, %arg2: tensor<?x?xi64>, %arg3: tensor<?x?xi64>) -> tensor<?x?x?x?xf32> {
+    %c1 = arith.constant 1 : index
+    %c0 = arith.constant 0 : index
+    %c2 = arith.constant 2 : index
+    %dim = tensor.dim %arg1, %c0 : tensor<?x?xi64>
+    %dim_0 = tensor.dim %arg1, %c1 : tensor<?x?xi64>
+    %0 = arith.index_cast %dim : index to i64
+    %1 = arith.index_cast %dim_0 : index to i64
+    %collapsed = tensor.collapse_shape %arg3 [[0, 1]] : tensor<?x?xi64> into tensor<?xi64>
+    %dim_1 = tensor.dim %arg0, %c1 : tensor<?x?x?xf32>
+    %dim_2 = tensor.dim %arg0, %c2 : tensor<?x?x?xf32>
+    %2 = affine.apply #map()[%dim, %dim_0]
+    %3 = tensor.empty(%2, %dim_1, %dim_2) : tensor<?x?x?xf32>
+    %4 = linalg.generic {indexing_maps = [#map1, #map2], iterator_types = ["parallel", "parallel", "parallel"]} ins(%collapsed : tensor<?xi64>) outs(%3 : tensor<?x?x?xf32>) {
+    ^bb0(%in: i64, %out: f32):
+      %7 = arith.index_cast %in : i64 to index
+      %8 = linalg.index 1 : index
+      %9 = linalg.index 2 : index
+      %extracted = tensor.extract %arg0[%7, %8, %9] : tensor<?x?x?xf32>
+      linalg.yield %extracted : f32
+    } -> tensor<?x?x?xf32>
+    %5 = arith.index_cast %dim_1 : index to i64
+    %6 = arith.index_cast %dim_2 : index to i64
+    %from_elements = tensor.from_elements %0, %1, %5, %6 : tensor<4xi64>
+    %reshape = tensor.reshape %4(%from_elements) : (tensor<?x?x?xf32>, tensor<4xi64>) -> tensor<?x?x?x?xf32>
+    return %reshape : tensor<?x?x?x?xf32>
+  }
+}
+
+// -----
+
+#map = affine_map<()[s0, s1] -> (s0 * s1)>
+#map1 = affine_map<(d0, d1, d2) -> (d0)>
+#map2 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
+// CHECK-DAG: #[[$MAP_0:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1)>
+// CHECK-DAG: #[[$MAP_1:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
+// CHECK-LABEL:   func.func @no_fuse_expand_collapsed_generic_input(
+// CHECK-SAME:                                              %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: tensor<?x?x?xf32>,
+// CHECK-SAME:                                              %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: tensor<?x?xi64>,
+// CHECK-SAME:                                              %[[VAL_2:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: tensor<?x?xi64>,
+// CHECK-SAME:                                              %[[VAL_3:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: tensor<?x?xi64>)
+func.func @no_fuse_expand_collapsed_generic_input(%arg0: tensor<?x?x?xf32>, %arg1: tensor<?x?xi64>, %arg2: tensor<?x?xi64>, %arg3: tensor<?x?xi64>) -> tensor<?x?x?x?xf32> {
+  // CHECK:           %[[EXPANDED:.*]] = tensor.expand_shape %{{.+}} {{\[\[}}0, 1], [2], [3]] output_shape {{\[}}%{{.+}}, %{{.+}}, %{{.+}}, %{{.+}} : tensor<?x?x?xf32> into tensor<?x?x?x?xf32>
+  // CHECK:           %[[OUT:.*]] = tensor.empty(%{{.+}}, %{{.+}}, %{{.+}}, %{{.+}}) : tensor<?x?x?x?xf32>
+  // CHECK:           %[[VAL_4:.*]] = linalg.generic {indexing_maps = [#[[$MAP_0]], #[[$MAP_1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[VAL_3]] : tensor<?x?xi64>) outs(%[[OUT]] : tensor<?x?x?x?xf32>) {
+  // CHECK:           ^bb0(%[[VAL_5:.*]]: i64, %[[VAL_6:.*]]: f32):
+  // CHECK:             %[[OFFSETS:.*]] = arith.index_cast %[[VAL_5]] : i64 to index
+  // CHECK:             %[[SIZES:.*]] = linalg.index 2 : index
+  // CHECK:             %[[STRIDES:.*]] = linalg.index 3 : index
+  // CHECK:             %[[EXTRACT:.*]] = tensor.extract %[[VAL_0]]{{\[}}%[[OFFSETS]], %[[SIZES]], %[[STRIDES]]] : tensor<?x?x?xf32>
+  // CHECK:             linalg.yield %[[EXTRACT]] : f32
+  // CHECK:           } -> tensor<?x?x?x?xf32>
+  // CHECK:           %[[COLLAPSED:.*]] = tensor.collapse_shape %[[VAL_4]] {{\[\[}}0, 1], [2], [3]] : tensor<?x?x?x?xf32> into tensor<?x?x?xf32>
+  // CHECK:           %[[SHAPE:.*]] = tensor.from_elements
+  // CHECK:           %[[RESULT:.*]] = tensor.reshape %[[COLLAPSED]](%[[SHAPE]]) : (tensor<?x?x?xf32>, tensor<4xi64>) -> tensor<?x?x?x?xf32>
+  // CHECK:           return %[[RESULT]] : tensor<?x?x?x?xf32>
+  // CHECK:         }
+  %c1 = arith.constant 1 : index
+  %c0 = arith.constant 0 : index
+  %c2 = arith.constant 2 : index
+  %dim = tensor.dim %arg1, %c0 : tensor<?x?xi64>
+  %dim_0 = tensor.dim %arg1, %c1 : tensor<?x?xi64>
+  %0 = arith.index_cast %dim : index to i64
+  %1 = arith.index_cast %dim_0 : index to i64
+  %collapsed = tensor.collapse_shape %arg3 [[0, 1]] : tensor<?x?xi64> into tensor<?xi64>
+  %dim_1 = tensor.dim %arg0, %c1 : tensor<?x?x?xf32>
+  %dim_2 = tensor.dim %arg0, %c2 : tensor<?x?x?xf32>
+  %2 = affine.apply #map()[%dim, %dim_0]
+  %3 = tensor.empty(%2, %dim_1, %dim_2) : tensor<?x?x?xf32>
+  %4 = linalg.generic {indexing_maps = [#map1, #map2], iterator_types = ["parallel", "parallel", "parallel"]} ins(%collapsed : tensor<?xi64>) outs(%3 : tensor<?x?x?xf32>) {
+  ^bb0(%in: i64, %out: f32):
+    %7 = arith.index_cast %in : i64 to index
+    %8 = linalg.index 1 : index
+    %9 = linalg.index 2 : index
+    %extracted = tensor.extract %arg0[%7, %8, %9] : tensor<?x?x?xf32>
+    linalg.yield %extracted : f32
+  } -> tensor<?x?x?xf32>
+  %5 = arith.index_cast %dim_1 : index to i64
+  %6 = arith.index_cast %dim_2 : index to i64
+  %from_elements = tensor.from_elements %0, %1, %5, %6 : tensor<4xi64>
+  %reshape = tensor.reshape %4(%from_elements) : (tensor<?x?x?xf32>, tensor<4xi64>) -> tensor<?x?x?x?xf32>
+  return %reshape : tensor<?x?x?x?xf32>
+}