@@ -23,9 +23,8 @@ module attributes {transform.with_named_sequence} {
23
23
}
24
24
}
25
25
26
- // CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)>
27
- // CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0, d1) -> (d0)>
28
- // CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 5)>
26
+ // CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 5)>
27
+ // CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0, d1) -> (d0, d1)>
29
28
// CHECK: func @reduction_tile(%[[ARG0:.+]]: tensor<?x?xf32>, %[[ARG1:.+]]: tensor<?xf32>
30
29
// CHECK-DAG: %[[I:.*]] = arith.constant 0.000000e+00 : f32
31
30
// CHECK-DAG: %[[C5:.*]] = arith.constant 5 : index
@@ -37,21 +36,21 @@ module attributes {transform.with_named_sequence} {
37
36
// CHECK: %[[E:.*]] = tensor.empty(%[[D2]]) : tensor<?x5xf32>
38
37
// CHECK: %[[F:.*]] = linalg.fill ins(%[[I]] : f32) outs(%[[E]] : tensor<?x5xf32>) -> tensor<?x5xf32>
39
38
// CHECK: %[[L:.*]] = scf.for %[[K:.*]] = %[[C0]] to %[[D1]] step %[[C5]] iter_args(%[[ARG3:.*]] = %[[F]]) -> (tensor<?x5xf32>) {
40
- // CHECK: %[[PS:.*]] = affine.min #[[MAP2 ]](%[[K]])[%[[D1]]]
39
+ // CHECK: %[[PS:.*]] = affine.min #[[MAP0 ]](%[[K]])[%[[D1]]]
41
40
// CHECK: %[[EXT2:.*]] = tensor.extract_slice %[[ARG0]][0, %[[K:.*]]] [%[[D0]], %[[PS]]] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
42
41
// CHECK: %[[EXT:.*]] = tensor.extract_slice %[[ARG3]][0, 0] [%[[D0]], %[[PS]]] [1, 1] : tensor<?x5xf32> to tensor<?x?xf32>
43
- // CHECK: %[[PR:.*]] = linalg.generic {indexing_maps = [#[[MAP0 ]], #[[MAP0 ]]], iterator_types = ["parallel", "parallel"]} ins(%[[EXT2]] : tensor<?x?xf32>) outs(%[[EXT]] : tensor<?x?xf32>) {
42
+ // CHECK: %[[PR:.*]] = linalg.generic {indexing_maps = [#[[MAP1 ]], #[[MAP1 ]]], iterator_types = ["parallel", "parallel"]} ins(%[[EXT2]] : tensor<?x?xf32>) outs(%[[EXT]] : tensor<?x?xf32>) {
44
43
// CHECK: arith.mulf
45
44
// CHECK: arith.addf
46
45
// CHECK: linalg.yield
47
46
// CHECK: } -> tensor<?x?xf32>
48
47
// CHECK: %[[INS:.*]] = tensor.insert_slice %[[PR]] into %[[ARG3]][0, 0] [%[[D0]], %[[PS]]] [1, 1] : tensor<?x?xf32> into tensor<?x5xf32>
49
48
// CHECK: scf.yield %[[INS]] : tensor<?x5xf32>
50
49
// CHECK: }
51
- // CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[MAP0]], #[[MAP1]]], iterator_types = ["parallel", "reduction"]} ins(%[[L]] : tensor<?x5xf32>) outs(%[[ARG1]] : tensor<?xf32>) {
50
+ // CHECK: %[[R:.*]] = linalg.reduce ins(%[[L]] : tensor<?x5xf32>) outs(%[[ARG1]] : tensor<?xf32>) dimensions = [1]
52
51
// CHECK: arith.addf
53
52
// CHECK: linalg.yield
54
- // CHECK: } -> tensor<?xf32>
53
+ // CHECK: }
55
54
// CHECK: return %[[R]] : tensor<?xf32>
56
55
57
56
// -----
@@ -81,7 +80,6 @@ module attributes {transform.with_named_sequence} {
81
80
// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 5)>
82
81
// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0, d1) -> (d0, d1)>
83
82
// CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0, d1) -> (d1, d0)>
84
- // CHECK-DAG: #[[MAP3:.*]] = affine_map<(d0, d1) -> (d1)>
85
83
// CHECK: func @reduction_tile_transpose
86
84
// CHECK: tensor.empty(%{{.*}}) : tensor<5x?xf32>
87
85
// CHECK: linalg.fill {{.*}} : tensor<5x?xf32>) -> tensor<5x?xf32>
@@ -91,7 +89,7 @@ module attributes {transform.with_named_sequence} {
91
89
// CHECK: %[[INS:.*]] = tensor.insert_slice %[[R]] into %[[ARG3]][0, 0] [%[[D0]], %[[D1]]] [1, 1] : tensor<?x?xf32> into tensor<5x?xf32>
92
90
// CHECK: scf.yield {{.*}} : tensor<5x?xf32>
93
91
// CHECK: }
94
- // CHECK: linalg.generic
92
+ // CHECK: linalg.reduce
95
93
// CHECK: return
96
94
97
95
// -----
@@ -150,10 +148,11 @@ module attributes {transform.with_named_sequence} {
150
148
// CHECK: tensor.parallel_insert_slice %[[PARTIAL]] into %[[ARG3]][0, %[[IV]]] [%[[D0]], 1] [1, 1] : tensor<?xf32> into tensor<?x5xf32>
151
149
// CHECK: }
152
150
// CHECK: }
153
- // CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[MAP3]], #[[MAP4]]], iterator_types = ["parallel", "reduction"]} ins(%[[L]] : tensor<?x5xf32>) outs(%[[ARG1]] : tensor<?xf32>) {
151
+ // CHECK: %[[R:.*]] = linalg.reduce ins(%[[L]] : tensor<?x5xf32>) outs(%[[ARG1]] : tensor<?xf32>) dimensions = [1]
152
+ // CHECK: {
154
153
// CHECK: arith.addf
155
154
// CHECK: linalg.yield
156
- // CHECK: } -> tensor<?xf32>
155
+ // CHECK: }
157
156
// CHECK: return %[[R]] : tensor<?xf32>
158
157
159
158
// -----
@@ -177,8 +176,6 @@ module attributes {transform.with_named_sequence} {
177
176
// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0] -> (-(d0 * (s0 ceildiv 5)) + s0, s0 ceildiv 5)>
178
177
// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0) -> (0, d0)>
179
178
// CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0] -> (d0 * (s0 ceildiv 5))>
180
- // CHECK-DAG: #[[MAP3:.*]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
181
- // CHECK-DAG: #[[MAP4:.*]] = affine_map<(d0, d1, d2) -> (d0, d1)>
182
179
// CHECK: func @matmul_tile_parallel(%[[ARG0:.+]]: tensor<?x?xf32>, %[[ARG1:.+]]: tensor<?x?xf32>, %[[ARG2:.+]]: tensor<?x?xf32>
183
180
// CHECK-DAG: %[[I:.*]] = arith.constant 0.000000e+00 : f32
184
181
// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
@@ -203,10 +200,10 @@ module attributes {transform.with_named_sequence} {
203
200
// CHECK: tensor.parallel_insert_slice %[[PARTIAL]] into %[[ARG3]][0, 0, %[[IV]]] [%[[D0]], %[[D2]], 1] [1, 1, 1] : tensor<?x?xf32> into tensor<?x?x5xf32>
204
201
// CHECK: }
205
202
// CHECK: }
206
- // CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[MAP3]], #[[MAP4]]], iterator_types = ["parallel", "parallel", "reduction"]} ins(%[[L]] : tensor<?x?x5xf32>) outs(%[[ARG2]] : tensor<?x?xf32>) {
203
+ // CHECK: %[[R:.*]] = linalg.reduce ins(%[[L]] : tensor<?x?x5xf32>) outs(%[[ARG2]] : tensor<?x?xf32>) dimensions = [2]
207
204
// CHECK: arith.addf
208
205
// CHECK: linalg.yield
209
- // CHECK: } -> tensor<?x?xf32>
206
+ // CHECK: }
210
207
// CHECK: return %[[R]] : tensor<?x?xf32>
211
208
212
209
// -----
@@ -270,10 +267,10 @@ module attributes {transform.with_named_sequence} {
270
267
// CHECK: tensor.parallel_insert_slice %[[CARRY]] into %[[ARG3]][0, %[[IV]]] [%[[D0]], 1] [1, 1] : tensor<?xf32> into tensor<?x5xf32>
271
268
// CHECK: }
272
269
// CHECK: }
273
- // CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[MAP2]], #[[MAP3]]], iterator_types = ["parallel", "reduction"]} ins(%[[L]] : tensor<?x5xf32>) outs(%[[ARG1]] : tensor<?xf32>) {
270
+ // CHECK: %[[R:.*]] = linalg.reduce ins(%[[L]] : tensor<?x5xf32>) outs(%[[ARG1]] : tensor<?xf32>) dimensions = [1]
274
271
// CHECK: arith.addf
275
272
// CHECK: linalg.yield
276
- // CHECK: } -> tensor<?xf32>
273
+ // CHECK: }
277
274
// CHECK: return %[[R]] : tensor<?xf32>
278
275
279
276
// -----
@@ -307,7 +304,7 @@ module attributes {transform.with_named_sequence} {
307
304
// CHECK: iterator_types = ["parallel", "reduction"]
308
305
transform.print %2 {name = " expecting parallel reduction" } : !transform.any_op
309
306
// CHECK: expecting parallel reduction
310
- // CHECK-NEXT: linalg.generic
307
+ // CHECK-NEXT: linalg.reduce
311
308
// CHECK: iterator_types = ["parallel", "reduction"]
312
309
transform.print %3 {name = " expecting parallel reduction" } : !transform.any_op
313
310
transform.yield
@@ -401,7 +398,7 @@ module {
401
398
// CHECK: %[[OUT:.*]] = linalg.generic {indexing_maps = [{{.*}}, {{.*}}, {{.*}}], iterator_types = ["parallel", "parallel", "parallel"]} ins(%{{.*}}, %{{.*}}: tensor<2x64xf32>, tensor<4096x2x64xf32>) outs(%{{.*}}: tensor<4096x2x64xf32>)
402
399
// CHECK: scf.yield %[[OUT]] : tensor<4096x2x64xf32>
403
400
// CHECK: scf.yield %[[L1]] : tensor<4096x2x64xf32>
404
- // CHECK: %[[OUT2:.*]] = linalg.generic {indexing_maps = [{{.*}}, {{.*}}], iterator_types = ["parallel", "reduction", "reduction"]} ins(%{{.*}} : tensor<4096x2x64xf32>) outs(%{{.*}} : tensor<4096xf32>)
401
+ // CHECK: %[[OUT2:.*]] = linalg.reduce ins(%{{.*}} : tensor<4096x2x64xf32>) outs(%{{.*}} : tensor<4096xf32>)
405
402
// CHECK: return %[[OUT2]] : tensor<4096xf32>
406
403
407
404
// -----
@@ -445,6 +442,6 @@ module attributes {transform.with_named_sequence} {
445
442
// CHECK: %[[INSERT1:.+]] = tensor.insert_slice %[[UPDATED]]#0 into %[[SUM]]
446
443
// CHECK: %[[INSERT2:.+]] = tensor.insert_slice %[[UPDATED]]#1 into %[[MAX]]
447
444
// CHECK: scf.yield %[[INSERT1]], %[[INSERT1]]
448
- // CHECK: linalg.generic
445
+ // CHECK: linalg.reduce
449
446
// CHECK: arith.addf
450
447
// CHECK: arith.maximumf
0 commit comments