@@ -85,31 +85,32 @@ module attributes {transform.with_named_sequence} {
85
85
// CHECK: scf.yield %[[S9]]
86
86
// CHECK: %[[COLLAPSED:.*]] = tensor.collapse_shape %[[S1]] {{\[}}[0, 1], [2], [3]]
87
87
// CHECK: %[[COLLAPSED_6:.*]] = tensor.collapse_shape %[[S4]] {{\[}}[0, 1], [2, 3, 4], [5]]
88
+ // CHECK: %[[S7:.*]] = tensor.empty()
88
89
// CHECK: %[[S6:.*]] = linalg.batch_matmul
89
90
// CHECK: %[[EXPANDED:.*]] = tensor.expand_shape %[[S6]] {{\[}}[0, 1], [2, 3, 4], [5]] output_shape [6, 6, 2, 2, 2, 2]
90
- // CHECK: %[[S7:.*]] = tensor.empty() : tensor<2x8x8x2xf32>
91
- // CHECK: %[[S8:.*]] = scf.for %[[ARG3:.*]] = %[[C0]] to %[[C2]] step %[[C1]] iter_args(%[[ARG4:.*]] = %[[S7]])
91
+ // CHECK: %[[S8:.*]] = scf.for %[[ARG3:.*]] = %[[C0]] to %[[C2]] step %[[C1]] iter_args(%[[ARG4:.*]] = %[[ARG2]])
92
92
// CHECK: %[[S9:.*]] = scf.for %[[ARG5:.*]] = %[[C0]] to %[[C2]] step %[[C1]] iter_args(%[[ARG6:.*]] = %[[ARG4]])
93
93
// CHECK: %[[EXTRACTED_SLICE:.*]] = tensor.extract_slice %[[EXPANDED]][0, 0, %[[ARG3]], %[[ARG5]], 0, 0] [6, 6, 1, 1, 2, 2] [1, 1, 1, 1, 1, 1]
94
94
// CHECK: %[[S10:.*]] = affine.apply #[[$MAP0]](%[[ARG3]])
95
95
// CHECK: %[[S11:.*]] = affine.apply #[[$MAP0]](%[[ARG5]])
96
- // CHECK: %[[EXTRACTED_SLICE_7:.*]] = tensor.extract_slice %[[ARG2 ]][0, %[[S10]], %[[S11]], 0] [2, 4, 4, 2] [1, 1, 1, 1]
96
+ // CHECK: %[[EXTRACTED_SLICE_7:.*]] = tensor.extract_slice %[[ARG6 ]][0, %[[S10]], %[[S11]], 0] [2, 4, 4, 2] [1, 1, 1, 1]
97
97
// CHECK: %[[S12:.*]] = scf.for %[[ARG7:.*]] = %[[C0]] to %[[C2]] step %[[C1]] iter_args(%[[ARG8:.*]] = %[[EXTRACTED_SLICE_7]])
98
98
// CHECK: %[[S15:.*]] = scf.for %[[ARG9:.*]] = %[[C0]] to %[[C2]] step %[[C1]] iter_args(%[[ARG10:.*]] = %[[ARG8]])
99
99
// CHECK: %[[EXTRACTED_SLICE_8:.*]] = tensor.extract_slice %[[EXTRACTED_SLICE]][0, 0, 0, 0, %[[ARG7]], %[[ARG9]]] [6, 6, 1, 1, 1, 1] [1, 1, 1, 1, 1, 1]
100
+ // CHECK: %[[S25:.*]] = tensor.extract_slice %[[ARG10]][%[[ARG7]], 0, 0, %[[ARG9]]] [1, 4, 4, 1] [1, 1, 1, 1]
100
101
// CHECK: %[[S16:.*]] = tensor.empty() : tensor<4x6xf32>
101
102
// CHECK: %[[S17:.*]] = linalg.fill ins(%[[CST_6]] : f32) outs(%[[S16]] : tensor<4x6xf32>) -> tensor<4x6xf32>
102
103
// CHECK: %[[S18:.*]] = linalg.matmul ins(%[[CST_1]], %[[EXTRACTED_SLICE_8]] : tensor<4x6xf32>, tensor<6x6xf32>) outs(%[[S17]] : tensor<4x6xf32>) -> tensor<4x6xf32>
103
104
// CHECK: %[[S19:.*]] = tensor.empty() : tensor<4x4xf32>
104
105
// CHECK: %[[S20:.*]] = linalg.fill ins(%[[CST_6]] : f32) outs(%[[S19]] : tensor<4x4xf32>) -> tensor<4x4xf32>
105
106
// CHECK: %[[S21:.*]] = linalg.matmul ins(%[[S18]], %[[CST_0]] : tensor<4x6xf32>, tensor<6x4xf32>) outs(%[[S20]] : tensor<4x4xf32>) -> tensor<4x4xf32>
106
- // CHECK: %[[S22:.*]] = tensor.empty() : tensor<4x4xf32>
107
- // CHECK: %[[S23:.*]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%[[CST]] : f32) outs(%[[S22]] : tensor<4x4xf32>) {
108
- // CHECK: ^bb0(%[[IN:.*]]: f32, %[[OUT:.*]]: f32):
109
- // CHECK: linalg.yield %[[IN]] : f32
107
+ // CHECK: %[[S23:.*]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%[[CST]], %[[S21]] : f32, tensor<4x4xf32>) outs(%[[S25]] : tensor<4x4xf32>) {
108
+ // CHECK: ^bb0(%[[IN1:.*]]: f32, %[[IN2:.*]]: f32, %[[OUT:.*]]: f32):
109
+ // CHECK: %[[VAL_90:.*]] = arith.mulf %[[IN1]], %[[IN2]] : f32
110
+ // CHECK: %[[VAL_91:.*]] = arith.addf %[[VAL_90]], %[[OUT]] : f32
111
+ /// CHECK: linalg.yield %[[VAL_91]] : f32
110
112
// CHECK: } -> tensor<4x4xf32>
111
- // CHECK: %[[S24:.*]] = linalg.mul ins(%[[S23]], %[[S21]] : tensor<4x4xf32>, tensor<4x4xf32>) outs(%[[S22]] : tensor<4x4xf32>) -> tensor<4x4xf32>
112
- // CHECK: %[[INSERTED_SLICE_9:.*]] = tensor.insert_slice %[[S24]] into %[[ARG10]][%[[ARG7]], 0, 0, %[[ARG9]]] [1, 4, 4, 1] [1, 1, 1, 1]
113
+ // CHECK: %[[INSERTED_SLICE_9:.*]] = tensor.insert_slice %[[S23]] into %[[ARG10]][%[[ARG7]], 0, 0, %[[ARG9]]] [1, 4, 4, 1] [1, 1, 1, 1]
113
114
// CHECK: scf.yield %[[INSERTED_SLICE_9]]
114
115
// CHECK: scf.yield %[[S15]]
115
116
// CHECK: %[[S13:.*]] = affine.apply #[[$MAP0]](%[[ARG3]])
@@ -218,32 +219,33 @@ module attributes {transform.with_named_sequence} {
218
219
// CHECK: scf.yield %[[S9]]
219
220
// CHECK: %[[COLLAPSED:.*]] = tensor.collapse_shape %[[S1]] {{\[}}[0, 1], [2], [3]]
220
221
// CHECK: %[[COLLAPSED_7:.*]] = tensor.collapse_shape %[[S4]] {{\[}}[0, 1], [2, 3, 4], [5]]
222
+ // CHECK: %[[S7:.*]] = tensor.empty()
221
223
// CHECK: %[[S6:.*]] = linalg.batch_matmul
222
224
// CHECK: %[[EXPANDED:.*]] = tensor.expand_shape %[[S6]] {{\[}}[0, 1], [2, 3, 4], [5]] output_shape [6, 6, 3, 3, 2, 2]
223
225
// CHECK: %[[PADDED_8:.*]] = tensor.pad %[[ARG2]] low[0, 0, 0, 0] high[0, 3, 3, 0]
224
- // CHECK: %[[S7:.*]] = tensor.empty() : tensor<2x12x12x2xf32>
225
- // CHECK: %[[S8:.*]] = scf.for %[[ARG4:.*]] = %[[C0]] to %[[C3]] step %[[C1]] iter_args(%[[ARG5:.*]] = %[[S7]])
226
+ // CHECK: %[[S8:.*]] = scf.for %[[ARG4:.*]] = %[[C0]] to %[[C3]] step %[[C1]] iter_args(%[[ARG5:.*]] = %[[PADDED_8]])
226
227
// CHECK: %[[S9:.*]] = scf.for %[[ARG6:.*]] = %[[C0]] to %[[C3]] step %[[C1]] iter_args(%[[ARG7:.*]] = %[[ARG5]])
227
228
// CHECK: %[[EXTRACTED_SLICE_9:.*]] = tensor.extract_slice %[[EXPANDED]][0, 0, %[[ARG4]], %[[ARG6]], 0, 0] [6, 6, 1, 1, 2, 2] [1, 1, 1, 1, 1, 1]
228
229
// CHECK: %[[S10:.*]] = affine.apply #[[$MAP0]](%[[ARG4]])
229
230
// CHECK: %[[S11:.*]] = affine.apply #[[$MAP0]](%[[ARG6]])
230
- // CHECK: %[[EXTRACTED_SLICE_10:.*]] = tensor.extract_slice %[[PADDED_8 ]][0, %[[S10]], %[[S11]], 0] [2, 4, 4, 2] [1, 1, 1, 1]
231
+ // CHECK: %[[EXTRACTED_SLICE_10:.*]] = tensor.extract_slice %[[ARG7 ]][0, %[[S10]], %[[S11]], 0] [2, 4, 4, 2] [1, 1, 1, 1]
231
232
// CHECK: %[[S12:.*]] = scf.for %[[ARG8:.*]] = %[[C0]] to %[[C2]] step %[[C1]] iter_args(%[[ARG9:.*]] = %[[EXTRACTED_SLICE_10]])
232
233
// CHECK: %[[S15:.*]] = scf.for %[[ARG10:.*]] = %[[C0]] to %[[C2]] step %[[C1]] iter_args(%[[ARG11:.*]] = %[[ARG9]])
233
234
// CHECK: %[[EXTRACTED_SLICE_11:.*]] = tensor.extract_slice %[[EXTRACTED_SLICE_9]][0, 0, 0, 0, %[[ARG8]], %[[ARG10]]] [6, 6, 1, 1, 1, 1] [1, 1, 1, 1, 1, 1]
235
+ // CHECK: %[[S26:.*]] = tensor.extract_slice %[[ARG11]][%[[ARG8]], 0, 0, %[[ARG10]]] [1, 4, 4, 1] [1, 1, 1, 1]
234
236
// CHECK: %[[S17:.*]] = tensor.empty() : tensor<4x6xf32>
235
237
// CHECK: %[[S18:.*]] = linalg.fill ins(%[[CST_6]] : f32) outs(%[[S17]] : tensor<4x6xf32>) -> tensor<4x6xf32>
236
238
// CHECK: %[[S19:.*]] = linalg.matmul ins(%[[CST_1]], %[[EXTRACTED_SLICE_11]] : tensor<4x6xf32>, tensor<6x6xf32>) outs(%[[S18]] : tensor<4x6xf32>) -> tensor<4x6xf32>
237
239
// CHECK: %[[S20:.*]] = tensor.empty() : tensor<4x4xf32>
238
240
// CHECK: %[[S21:.*]] = linalg.fill ins(%[[CST_6]] : f32) outs(%[[S20]] : tensor<4x4xf32>) -> tensor<4x4xf32>
239
241
// CHECK: %[[S22:.*]] = linalg.matmul ins(%[[S19]], %[[CST_0]] : tensor<4x6xf32>, tensor<6x4xf32>) outs(%[[S21]] : tensor<4x4xf32>) -> tensor<4x4xf32>
240
- // CHECK: %[[S23:.*]] = tensor.empty() : tensor<4x4xf32>
241
- // CHECK: %[[S24:.*]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%[[CST]] : f32) outs(%[[S23]] : tensor<4x4xf32>) {
242
- // CHECK: ^bb0(%[[IN:.*]]: f32, %[[OUT:.*]]: f32):
243
- // CHECK: linalg.yield %[[IN]] : f32
242
+ // CHECK: %[[S24:.*]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%[[CST]], %[[S22]] : f32, tensor<4x4xf32>) outs(%[[S26]] : tensor<4x4xf32>) {
243
+ // CHECK: ^bb0(%[[IN1:.*]]: f32, %[[IN2:.*]]: f32, %[[OUT:.*]]: f32):
244
+ // CHECK: %[[VAL_104:.*]] = arith.mulf %[[IN1]], %[[IN2]] : f32
245
+ // CHECK: %[[VAL_105:.*]] = arith.addf %[[VAL_104]], %[[OUT]] : f32
246
+ /// CHECK: linalg.yield %[[VAL_105]] : f32
244
247
// CHECK: } -> tensor<4x4xf32>
245
- // CHECK: %[[S25:.*]] = linalg.mul ins(%[[S24]], %[[S22]] : tensor<4x4xf32>, tensor<4x4xf32>) outs(%[[S23]] : tensor<4x4xf32>) -> tensor<4x4xf32>
246
- // CHECK: %[[INSERTED_SLICE_12:.*]] = tensor.insert_slice %[[S25]] into %[[ARG11]][%[[ARG8]], 0, 0, %[[ARG10]]] [1, 4, 4, 1] [1, 1, 1, 1]
248
+ // CHECK: %[[INSERTED_SLICE_12:.*]] = tensor.insert_slice %[[S24]] into %[[ARG11]][%[[ARG8]], 0, 0, %[[ARG10]]] [1, 4, 4, 1] [1, 1, 1, 1]
247
249
// CHECK: scf.yield %[[INSERTED_SLICE_12]]
248
250
// CHECK: scf.yield %[[S15]] : tensor<2x4x4x2xf32>
249
251
// CHECK: %[[S13:.*]] = affine.apply #[[$MAP0]](%[[ARG4]])
@@ -330,16 +332,17 @@ module attributes {transform.with_named_sequence} {
330
332
// CHECK: %[[S6:.*]] = scf.for %[[ARG3:.*]] = %[[C0]] to %[[C2]] step %[[C1]] iter_args(%[[ARG4:.*]] = %[[ARG2]])
331
333
// CHECK: %[[S7:.*]] = scf.for %[[ARG5:.*]] = %[[C0]] to %[[C2]] step %[[C1]] iter_args(%[[ARG6:.*]] = %[[ARG4]])
332
334
// CHECK: %[[EXTRACTED_SLICE:.*]] = tensor.extract_slice %[[EXPANDED]][0, 0, 0, 0, %[[ARG3]], %[[ARG5]]] [6, 1, 1, 1, 1, 1] [1, 1, 1, 1, 1, 1]
335
+ // CHECK: %[[S15:.*]] = tensor.extract_slice %[[ARG6]][%[[ARG3]], 0, 0, %[[ARG5]]] [1, 4, 1, 1] [1, 1, 1, 1]
333
336
// CHECK: %[[S9:.*]] = tensor.empty() : tensor<4x1xf32>
334
337
// CHECK: %[[S10:.*]] = linalg.fill ins(%[[CST_3]] : f32) outs(%[[S9]] : tensor<4x1xf32>) -> tensor<4x1xf32>
335
338
// CHECK: %[[S11:.*]] = linalg.matmul ins(%[[CST_0]], %[[EXTRACTED_SLICE]] : tensor<4x6xf32>, tensor<6x1xf32>) outs(%[[S10]] : tensor<4x1xf32>) -> tensor<4x1xf32>
336
- // CHECK: %[[S12:.*]] = tensor.empty() : tensor<4x1xf32>
337
- // CHECK: %[[S13:.*]] = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel"]} ins(%[[CST]] : f32) outs(%[[S12]] : tensor<4x1xf32>) {
338
- // CHECK: ^bb0(%[[IN:.*]]: f32, %[[OUT:.*]]: f32):
339
- // CHECK: linalg.yield %[[IN]] : f32
339
+ // CHECK: %[[S13:.*]] = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%[[CST]], %[[S11]] : f32, tensor<4x1xf32>) outs(%[[S15]] : tensor<4x1xf32>) {
340
+ // CHECK: ^bb0(%[[IN1:.*]]: f32, %[[IN2:.*]]: f32, %[[OUT:.*]]: f32):
341
+ // CHECK: %[[VAL_57:.*]] = arith.mulf %[[IN1]], %[[IN2]] : f32
342
+ // CHECK: %[[VAL_58:.*]] = arith.addf %[[VAL_57]], %[[OUT]] : f32
343
+ /// CHECK: linalg.yield %[[VAL_58]] : f32
340
344
// CHECK: } -> tensor<4x1xf32>
341
- // CHECK: %[[S14:.*]] = linalg.mul ins(%[[S13]], %[[S11]] : tensor<4x1xf32>, tensor<4x1xf32>) outs(%[[S12]] : tensor<4x1xf32>) -> tensor<4x1xf32>
342
- // CHECK: %[[INSERTED_SLICE:.*]] = tensor.insert_slice %[[S14]] into %[[ARG6]][%[[ARG3]], 0, 0, %[[ARG5]]] [1, 4, 1, 1] [1, 1, 1, 1]
345
+ // CHECK: %[[INSERTED_SLICE:.*]] = tensor.insert_slice %[[S13]] into %[[ARG6]][%[[ARG3]], 0, 0, %[[ARG5]]] [1, 4, 1, 1] [1, 1, 1, 1]
343
346
// CHECK: scf.yield %[[INSERTED_SLICE]]
344
347
// CHECK: scf.yield %[[S7]]
345
348
// CHECK: return %[[S6]]
0 commit comments