@@ -142,124 +142,6 @@ func @add_mul_scalar_fusion(%arg0: tensor<f32>, %arg1: tensor<f32>, %arg2: tenso
142
142
143
143
// -----
144
144
145
- // CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1 * 4 + d2, d3)>
146
- // CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
147
-
148
- #map0 = affine_map <(d0 , d1 , d2 , d3 ) -> (d0 , d1 , d2 , d3 )>
149
- func @generic_op_reshape_producer_fusion (%arg0 : tensor <?x?x?xf32 >,
150
- %arg1 : tensor <?x?x4 x?xf32 >) ->
151
- tensor <?x?x4 x?xf32 >
152
- {
153
- %0 = linalg.tensor_reshape %arg0 [affine_map <(i , j , k , l ) -> (i )>,
154
- affine_map <(i , j , k , l ) -> (j , k )>,
155
- affine_map <(i , j , k , l ) -> (l )>] :
156
- tensor <?x?x?xf32 > into tensor <?x?x4 x?xf32 >
157
- %1 = linalg.generic {
158
- indexing_maps = [#map0 , #map0 , #map0 ],
159
- iterator_types = [" parallel" , " parallel" , " parallel" , " parallel" ]}
160
- ins (%0 , %arg1 : tensor <?x?x4 x?xf32 >, tensor <?x?x4 x?xf32 >) {
161
- ^bb0 (%arg3: f32 , %arg4: f32 ): // no predecessors
162
- %1 = mulf %arg3 , %arg4 : f32
163
- linalg.yield %1 : f32
164
- } -> tensor <?x?x4 x?xf32 >
165
- return %1 : tensor <?x?x4 x?xf32 >
166
- }
167
-
168
- // CHECK-LABEL: func @generic_op_reshape_producer_fusion
169
- // CHECK: linalg.generic
170
- // CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]]
171
- // CHECK-NOT: linalg.generic
172
-
173
-
174
- // -----
175
-
176
- // CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
177
- // CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1 * 20 + d2 * 5 + d3)>
178
-
179
- #map0 = affine_map <(d0 , d1 , d2 , d3 ) -> (d0 , d1 , d2 , d3 )>
180
- func @generic_op_reshape_consumer_fusion (%arg0 : tensor <?x?x4 x5 xf32 >,
181
- %arg1 : tensor <?x?x4 x5 xf32 >) ->
182
- tensor <?x?xf32 >
183
- {
184
- %0 = linalg.generic {
185
- indexing_maps = [#map0 , #map0 , #map0 ],
186
- iterator_types = [" parallel" , " parallel" , " parallel" , " parallel" ]}
187
- ins (%arg0 , %arg1 : tensor <?x?x4 x5 xf32 >, tensor <?x?x4 x5 xf32 >) {
188
- ^bb0 (%arg3: f32 , %arg4: f32 ): // no predecessors
189
- %1 = mulf %arg3 , %arg4 : f32
190
- linalg.yield %1 : f32
191
- } -> tensor <?x?x4 x5 xf32 >
192
- %1 = linalg.tensor_reshape %0 [affine_map <(i , j , k , l ) -> (i )>,
193
- affine_map <(i , j , k , l ) -> (j , k , l )>] :
194
- tensor <?x?x4 x5 xf32 > into tensor <?x?xf32 >
195
- return %1 : tensor <?x?xf32 >
196
- }
197
-
198
- // CHECK-LABEL: func @generic_op_reshape_consumer_fusion
199
- // CHECK: linalg.generic
200
- // CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP0]], #[[$MAP1]]]
201
- // CHECK-NOT: linalg.generic
202
-
203
- // -----
204
-
205
- #map0 = affine_map <(d0 , d1 , d2 , d3 ) -> (d0 , d1 , d2 , d3 )>
206
- func @generic_op_reshape_consumer_nofusion (%arg0 : tensor <?x?x?x5 xf32 >,
207
- %arg1 : tensor <?x?x?x5 xf32 >) ->
208
- tensor <?x?xf32 >
209
- {
210
- %0 = linalg.generic {
211
- indexing_maps = [#map0 , #map0 , #map0 ],
212
- iterator_types = [" parallel" , " parallel" , " parallel" , " parallel" ]}
213
- ins (%arg0 , %arg1 : tensor <?x?x?x5 xf32 >, tensor <?x?x?x5 xf32 >) {
214
- ^bb0 (%arg3: f32 , %arg4: f32 ): // no predecessors
215
- %1 = mulf %arg3 , %arg4 : f32
216
- linalg.yield %1 : f32
217
- } -> tensor <?x?x?x5 xf32 >
218
- %1 = linalg.tensor_reshape %0 [affine_map <(i , j , k , l ) -> (i )>,
219
- affine_map <(i , j , k , l ) -> (j , k , l )>] :
220
- tensor <?x?x?x5 xf32 > into tensor <?x?xf32 >
221
- return %1 : tensor <?x?xf32 >
222
- }
223
-
224
- // CHECK-LABEL: func @generic_op_reshape_consumer_nofusion
225
- // CHECK: linalg.tensor_reshape
226
-
227
- // -----
228
-
229
- #map0 = affine_map <(d0 , d1 ) -> (d0 , d1 )>
230
- #map1 = affine_map <(d0 , d1 , d2 ) -> (d0 , d1 )>
231
- #map2 = affine_map <(d0 , d1 , d2 ) -> (d2 )>
232
-
233
- func @generic_op_reshape_consumer_expanding (%arg0: tensor <264 x4 xf32 >)
234
- -> tensor <8 x33 x4 xf32 > {
235
- %cst = constant dense <2.000000e+00 > : tensor <264 x4 xf32 >
236
- %0 = linalg.generic {
237
- indexing_maps = [#map0 , #map0 , #map0 ],
238
- iterator_types = [" parallel" , " parallel" ]}
239
- ins (%arg0 , %cst : tensor <264 x4 xf32 >, tensor <264 x4 xf32 >) {
240
- ^bb0 (%arg1: f32 , %arg2: f32 ): // no predecessors
241
- %2 = mulf %arg1 , %arg2 : f32
242
- linalg.yield %2 : f32
243
- } -> tensor <264 x4 xf32 >
244
- %1 = linalg.tensor_reshape %0 [#map1 , #map2 ] :
245
- tensor <264 x4 xf32 > into tensor <8 x33 x4 xf32 >
246
- return %1 : tensor <8 x33 x4 xf32 >
247
- }
248
-
249
- // The reshape op in `%arg0` is folded into the indexing map of generic op.
250
- // CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0 * 33 + d1, d2)>
251
- // CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
252
- // CHECK: func @generic_op_reshape_consumer_expanding
253
- // CHECK-NOT: linalg.tensor_reshape
254
- // CHECK: %[[CST:.*]] = constant {{.*}} : f32
255
- // CHECK: linalg.generic
256
- // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]]]
257
- // CHECK-SAME: tensor<264x4xf32>
258
- // CHECK: -> tensor<8x33x4xf32>
259
- // CHECK-NOT: linalg.tensor_reshape
260
-
261
- // -----
262
-
263
145
#map0 = affine_map <(d0 , d1 , d2 ) -> (d0 )>
264
146
#map1 = affine_map <(d0 , d1 , d2 ) -> (d0 , d1 , d2 )>
265
147
func @generic_op_constant_fusion (%arg0 : tensor <5 x?x?xf32 >) -> tensor <5 x?x?xf32 >
@@ -499,159 +381,3 @@ func @indexed_generic_op_fusion(%arg0: tensor<?x?xi32>) {
499
381
// CHECK: %[[VAL4:.+]] = subi %[[VAL3]], %[[SUB_OPERAND2]] : i32
500
382
// CHECK: linalg.yield %[[VAL4]] : i32
501
383
// CHECK-NOT: linalg.indexed_generic
502
-
503
- // -----
504
-
505
- // CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1 * 4 + d2, d3)>
506
- // CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
507
-
508
- #map0 = affine_map <(d0 , d1 , d2 , d3 ) -> (d0 , d1 , d2 , d3 )>
509
- func @indexed_generic_op_reshape_producer_fusion (%arg0 : tensor <?x?x?xi32 >)
510
- -> tensor <?x?x4 x?xi32 > {
511
- %0 = linalg.tensor_reshape %arg0 [affine_map <(i , j , k , l ) -> (i )>,
512
- affine_map <(i , j , k , l ) -> (j , k )>,
513
- affine_map <(i , j , k , l ) -> (l )>] :
514
- tensor <?x?x?xi32 > into tensor <?x?x4 x?xi32 >
515
- %1 = linalg.indexed_generic {
516
- indexing_maps = [#map0 , #map0 ],
517
- iterator_types = [" parallel" , " parallel" , " parallel" , " parallel" ] }
518
- ins (%0 : tensor <?x?x4 x?xi32 >) {
519
- ^bb0 (%arg2: index , %arg3: index , %arg4: index , %arg5: index , %arg6: i32 ): // no predecessors
520
- %2 = index_cast %arg2 : index to i32
521
- %3 = addi %arg6 , %2 : i32
522
- linalg.yield %3 : i32
523
- } -> tensor <?x?x4 x?xi32 >
524
- return %1 : tensor <?x?x4 x?xi32 >
525
- }
526
-
527
- // CHECK-LABEL: func @indexed_generic_op_reshape_producer_fusion
528
- // CHECK-NOT: linalg.tensor_reshape
529
- // CHECK: linalg.indexed_generic
530
- // CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP1]]]
531
- // CHECK-NOT: linalg.tensor_reshape
532
-
533
- // -----
534
-
535
- // CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
536
- // CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1 * 20 + d2 * 5 + d3)>
537
-
538
- #map0 = affine_map <(d0 , d1 , d2 , d3 ) -> (d0 , d1 , d2 , d3 )>
539
- func @indexed_generic_op_reshape_consumer_fusion (%arg0 : tensor <?x?x4 x5 xi32 >)
540
- -> tensor <?x?xi32 > {
541
- %0 = linalg.indexed_generic {
542
- indexing_maps = [#map0 , #map0 ],
543
- iterator_types = [" parallel" , " parallel" , " parallel" , " parallel" ] }
544
- ins (%arg0 : tensor <?x?x4 x5 xi32 >) {
545
- ^bb0 (%arg2: index , %arg3: index , %arg4: index , %arg5: index , %arg6: i32 ): // no predecessors
546
- %2 = index_cast %arg2 : index to i32
547
- %3 = addi %arg6 , %2 : i32
548
- linalg.yield %3 : i32
549
- } -> tensor <?x?x4 x5 xi32 >
550
- %1 = linalg.tensor_reshape %0 [affine_map <(i , j , k , l ) -> (i )>,
551
- affine_map <(i , j , k , l ) -> (j , k , l )>] :
552
- tensor <?x?x4 x5 xi32 > into tensor <?x?xi32 >
553
- return %1 : tensor <?x?xi32 >
554
- }
555
-
556
- // CHECK-LABEL: func @indexed_generic_op_reshape_consumer_fusion
557
- // CHECK-NOT: linalg.tensor_reshape
558
- // CHECK: linalg.indexed_generic
559
- // CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP1]]]
560
- // CHECK-NOT: linalg.tensor_reshape
561
-
562
- // -----
563
-
564
- // CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d1 + d2 * 7)>
565
- // CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
566
-
567
- #map0 = affine_map <(d0 , d1 , d2 ) -> (d0 )>
568
- #map1 = affine_map <(d0 , d1 , d2 ) -> (d1 , d2 )>
569
- #map2 = affine_map <(d0 , d1 , d2 ) -> (d0 , d2 , d1 )>
570
- #map3 = affine_map <(d0 , d1 , d2 ) -> (d0 , d1 , d2 )>
571
- func @generic_op_021_permultation_reshape_producer_fusion (%arg0 : tensor <3 x35 xf32 >) -> tensor <3 x7 x5 xf32 > {
572
- %0 = linalg.tensor_reshape %arg0 [#map0 , #map1 ] : tensor <3 x35 xf32 > into tensor <3 x5 x7 xf32 >
573
- %1 = linalg.generic {index ing_maps = [#map2 , #map3 ], iterator_types = [" parallel" , " parallel" , " parallel" ]} ins (%0 : tensor <3 x5 x7 xf32 >) {
574
- ^bb0 (%arg2: f32 ): // no predecessors
575
- linalg.yield %arg2 : f32
576
- } -> tensor <3 x7 x5 xf32 >
577
- return %1 : tensor <3 x7 x5 xf32 >
578
- }
579
-
580
- // CHECK-LABEL: func @generic_op_021_permultation_reshape_producer_fusion
581
- // CHECK-NOT: linalg.tensor_reshape
582
- // CHECK: linalg.generic
583
- // CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP1]]]
584
- // CHECK-NOT: linalg.tensor_reshape
585
-
586
- // -----
587
-
588
- // CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d2, d0 * 7 + d1)>
589
- // CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
590
-
591
- #map0 = affine_map <(d0 , d1 , d2 ) -> (d0 )>
592
- #map1 = affine_map <(d0 , d1 , d2 ) -> (d1 , d2 )>
593
- #map2 = affine_map <(d0 , d1 , d2 ) -> (d1 , d2 , d0 )>
594
- #map3 = affine_map <(d0 , d1 , d2 ) -> (d0 , d1 , d2 )>
595
- func @generic_op_120_permultation_reshape_producer_fusion (%arg0 : tensor <3 x35 xf32 >) -> tensor <5 x7 x3 xf32 > {
596
- %0 = linalg.tensor_reshape %arg0 [#map0 , #map1 ] : tensor <3 x35 xf32 > into tensor <3 x5 x7 xf32 >
597
- %1 = linalg.generic {index ing_maps = [#map2 , #map3 ], iterator_types = [" parallel" , " parallel" , " parallel" ]} ins (%0 : tensor <3 x5 x7 xf32 >) {
598
- ^bb0 (%arg2: f32 ): // no predecessors
599
- linalg.yield %arg2 : f32
600
- } -> tensor <5 x7 x3 xf32 >
601
- return %1 : tensor <5 x7 x3 xf32 >
602
- }
603
-
604
- // CHECK-LABEL: func @generic_op_120_permultation_reshape_producer_fusion
605
- // CHECK-NOT: linalg.tensor_reshape
606
- // CHECK: linalg.generic
607
- // CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP1]]]
608
- // CHECK-NOT: linalg.tensor_reshape
609
-
610
- // -----
611
-
612
- // CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d1, d0 * 7 + d2)>
613
- // CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
614
-
615
- #map0 = affine_map <(d0 , d1 , d2 ) -> (d0 )>
616
- #map1 = affine_map <(d0 , d1 , d2 ) -> (d1 , d2 )>
617
- #map2 = affine_map <(d0 , d1 , d2 ) -> (d1 , d0 , d2 )>
618
- #map3 = affine_map <(d0 , d1 , d2 ) -> (d0 , d1 , d2 )>
619
- func @generic_op_102_permultation_reshape_producer_fusion (%arg0 : tensor <3 x35 xf32 >) -> tensor <5 x3 x7 xf32 > {
620
- %0 = linalg.tensor_reshape %arg0 [#map0 , #map1 ] : tensor <3 x35 xf32 > into tensor <3 x5 x7 xf32 >
621
- %1 = linalg.generic {index ing_maps = [#map2 , #map3 ], iterator_types = [" parallel" , " parallel" , " parallel" ]} ins (%0 : tensor <3 x5 x7 xf32 >) {
622
- ^bb0 (%arg2: f32 ): // no predecessors
623
- linalg.yield %arg2 : f32
624
- } -> tensor <5 x3 x7 xf32 >
625
- return %1 : tensor <5 x3 x7 xf32 >
626
- }
627
-
628
- // CHECK-LABEL: func @generic_op_102_permultation_reshape_producer_fusion
629
- // CHECK-NOT: linalg.tensor_reshape
630
- // CHECK: linalg.generic
631
- // CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP1]]]
632
- // CHECK-NOT: linalg.tensor_reshape
633
-
634
- // -----
635
-
636
- // CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
637
- // CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d1, d0 * 7 + d2)>
638
-
639
-
640
- #map0 = affine_map <(d0 , d1 , d2 ) -> (d0 , d1 , d2 )>
641
- #map1 = affine_map <(d0 , d1 , d2 ) -> (d1 , d0 , d2 )>
642
- #map2 = affine_map <(d0 , d1 , d2 ) -> (d0 )>
643
- #map3 = affine_map <(d0 , d1 , d2 ) -> (d1 , d2 )>
644
- func @generic_op_102_permultation_reshape_consumer_fusion (%arg0 : tensor <3 x5 x7 xf32 >) -> tensor <5 x21 xf32 > {
645
- %0 = linalg.generic {index ing_maps = [#map0 , #map1 ], iterator_types = [" parallel" , " parallel" , " parallel" ]} ins (%arg0 : tensor <3 x5 x7 xf32 >) {
646
- ^bb0 (%arg2: f32 ): // no predecessors
647
- linalg.yield %arg2 : f32
648
- } -> tensor <5 x3 x7 xf32 >
649
- %1 = linalg.tensor_reshape %0 [#map2 , #map3 ] : tensor <5 x3 x7 xf32 > into tensor <5 x21 xf32 >
650
- return %1 : tensor <5 x21 xf32 >
651
- }
652
-
653
- // CHECK-LABEL: func @generic_op_102_permultation_reshape_consumer_fusion
654
- // CHECK-NOT: linalg.tensor_reshape
655
- // CHECK: linalg.generic
656
- // CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP1]]]
657
- // CHECK-NOT: linalg.tensor_reshape
0 commit comments