8
8
// CHECK-SAME: %[[MEMREF4:[a-zA-Z0-9]*]]: memref<?x?xf32>,
9
9
// CHECK-SAME: %[[MEMREF5:[a-zA-Z0-9]*]]: memref<?x?xf32>,
10
10
// CHECK-SAME: %[[VAL:[a-zA-Z0-9]*]]: index,
11
- // CHECK-SAME: %[[LB:[a-zA-Z0-9]*]]: index,
12
- // CHECK-SAME: %[[UB:[a-zA-Z0-9]*]]: index,
13
11
// CHECK-SAME: %[[STEP:[a-zA-Z0-9]*]]: index,
14
12
// CHECK-SAME: %[[CMP:[a-zA-Z0-9]*]]: i1
15
13
func.func @hoist_vector_transfer_pairs (
16
14
%memref0: memref <?x?xf32 >, %memref1: memref <?x?xf32 >, %memref2: memref <?x?xf32 >,
17
15
%memref3: memref <?x?xf32 >, %memref4: memref <?x?xf32 >, %memref5: memref <?x?xf32 >,
18
- %val: index , %lb : index , %ub : index , %step: index , %cmp: i1 ) {
16
+ %val: index , %step: index , %cmp: i1 ) {
17
+ %lb = arith.constant 0 : index
18
+ %ub = arith.constant 16 : index
19
19
%c0 = arith.constant 0 : index
20
20
%cst = arith.constant 0.0 : f32
21
21
22
22
// CHECK: vector.transfer_read %{{.*}} : memref<?x?xf32>, vector<1xf32>
23
- // CHECK: scf.for %[[I:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] iter_args({{.*}}) -> (vector<1xf32>) {
23
+ // CHECK: scf.for %[[I:.*]] = {{.*}} to {{.*}} step %[[STEP]] iter_args({{.*}}) -> (vector<1xf32>) {
24
24
// CHECK: vector.transfer_read %{{.*}} : memref<?x?xf32>, vector<2xf32>
25
- // CHECK: scf.for %[[J:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] iter_args({{.*}}) -> (vector<1xf32>, vector<2xf32>) {
25
+ // CHECK: scf.for %[[J:.*]] = {{.*}} to {{.*}} step %[[STEP]] iter_args({{.*}}) -> (vector<1xf32>, vector<2xf32>) {
26
26
// CHECK: vector.transfer_read %{{.*}} : memref<?x?xf32>, vector<3xf32>
27
27
// CHECK: vector.transfer_read %{{.*}} : memref<?x?xf32>, vector<4xf32>
28
28
// CHECK: "some_crippling_use"(%[[MEMREF4]]) : (memref<?x?xf32>) -> ()
@@ -92,15 +92,15 @@ module attributes {transform.with_named_sequence} {
92
92
// CHECK-SAME: %[[MEMREF2:[a-zA-Z0-9]*]]: memref<?x?xf32>,
93
93
// CHECK-SAME: %[[MEMREF3:[a-zA-Z0-9]*]]: memref<?x?xf32>,
94
94
// CHECK-SAME: %[[VAL:[a-zA-Z0-9]*]]: index,
95
- // CHECK-SAME: %[[LB:[a-zA-Z0-9]*]]: index,
96
- // CHECK-SAME: %[[UB:[a-zA-Z0-9]*]]: index,
97
95
// CHECK-SAME: %[[STEP:[a-zA-Z0-9]*]]: index,
98
96
// CHECK-SAME: %[[RANDOM:[a-zA-Z0-9]*]]: index,
99
97
// CHECK-SAME: %[[CMP:[a-zA-Z0-9]*]]: i1
100
98
func.func @hoist_vector_transfer_pairs_disjoint (
101
99
%memref0: memref <?x?xf32 >, %memref1: memref <?x?xf32 >,
102
- %memref2: memref <?x?xf32 >, %memref3: memref <?x?xf32 >, %val: index , %lb : index , %ub : index ,
100
+ %memref2: memref <?x?xf32 >, %memref3: memref <?x?xf32 >, %val: index ,
103
101
%step: index , %random_index : index , %cmp: i1 ) {
102
+ %lb = arith.constant 0 : index
103
+ %ub = arith.constant 16 : index
104
104
%c0 = arith.constant 0 : index
105
105
%c1 = arith.constant 1 : index
106
106
%c3 = arith.constant 3 : index
@@ -110,9 +110,9 @@ func.func @hoist_vector_transfer_pairs_disjoint(
110
110
// CHECK: vector.transfer_read %[[MEMREF2]]{{.*}} : memref<?x?xf32>, vector<3xf32>
111
111
// CHECK: vector.transfer_read %[[MEMREF3]]{{.*}} : memref<?x?xf32>, vector<4xf32>
112
112
// CHECK: vector.transfer_read %[[MEMREF3]]{{.*}} : memref<?x?xf32>, vector<4xf32>
113
- // CHECK: scf.for %[[I:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] iter_args({{.*}}) ->
113
+ // CHECK: scf.for %[[I:.*]] = {{.*}} to {{.*}} step %[[STEP]] iter_args({{.*}}) ->
114
114
// CHECK-SAME: (vector<3xf32>, vector<3xf32>, vector<4xf32>, vector<4xf32>) {
115
- // CHECK: scf.for %[[J:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] iter_args({{.*}}) ->
115
+ // CHECK: scf.for %[[J:.*]] = {{.*}} to {{.*}} step %[[STEP]] iter_args({{.*}}) ->
116
116
// CHECK-SAME: (vector<3xf32>, vector<3xf32>, vector<4xf32>, vector<4xf32>) {
117
117
// CHECK: vector.transfer_read %[[MEMREF1]]{{.*}} : memref<?x?xf32>, vector<2xf32>
118
118
// CHECK: vector.transfer_read %[[MEMREF1]]{{.*}} : memref<?x?xf32>, vector<2xf32>
@@ -308,6 +308,62 @@ module attributes {transform.with_named_sequence} {
308
308
309
309
// -----
310
310
311
+ // CHECK-LABEL: func.func @no_hoisting_zero_trip_loop
312
+ func.func @no_hoisting_zero_trip_loop (%arg0: memref <20 xi32 >, %arg1: memref <20 xi32 >, %lb: index , %ub: index ) {
313
+ %c0_i32 = arith.constant 0 : i32
314
+ %c0 = arith.constant 0 : index
315
+ %c1 = arith.constant 1 : index
316
+ // %lb and %ub are unbounded, so do not hoist.
317
+
318
+ // CHECK: scf.for {{.*}} {
319
+ // CHECK-NEXT: vector.transfer_read
320
+ // CHECK-NEXT: vector.transfer_write
321
+ scf.for %arg2 = %lb to %ub step %c1 {
322
+ %read = vector.transfer_read %arg0 [%c0 ], %c0_i32 {in_bounds = [true ]} : memref <20 xi32 >, vector <4 xi32 >
323
+ vector.transfer_write %read , %arg1 [%c0 ] {in_bounds = [true ]} : vector <4 xi32 >, memref <20 xi32 >
324
+ }
325
+
326
+ // %lb_0 is in range [%lb, 8], and %ub_0 is in range [4, %ub].
327
+ // Since %lb_0 could be greater than %ub_0, do not hoist.
328
+ %lb_0 = affine.min affine_map <(d0 ) -> (d0 , 8 )>(%lb )
329
+ %ub_0 = affine.max affine_map <(d0 ) -> (d0 , 4 )>(%ub )
330
+
331
+ // CHECK: scf.for {{.*}} {
332
+ // CHECK-NEXT: vector.transfer_read
333
+ // CHECK-NEXT: vector.transfer_write
334
+ scf.for %arg2 = %lb_0 to %ub_0 step %c1 {
335
+ %read = vector.transfer_read %arg0 [%c0 ], %c0_i32 {in_bounds = [true ]} : memref <20 xi32 >, vector <4 xi32 >
336
+ vector.transfer_write %read , %arg1 [%c0 ] {in_bounds = [true ]} : vector <4 xi32 >, memref <20 xi32 >
337
+ }
338
+
339
+ // %lb_1 is in range [%lb, 4], and %ub_1 is in range [8, %ub].
340
+ // Since %lb_1 is guaranteed to be less than %ub_1, hoisting is possible.
341
+ %lb_1 = affine.min affine_map <(d0 ) -> (d0 , 4 )>(%lb )
342
+ %ub_1 = affine.max affine_map <(d0 ) -> (d0 , 8 )>(%ub )
343
+
344
+ // CHECK: vector.transfer_read
345
+ // CHECK: scf.for {{.*}} {
346
+ // CHECK-NEXT: "prevent.dce"
347
+ scf.for %arg2 = %lb_1 to %ub_1 step %c1 {
348
+ %read = vector.transfer_read %arg0 [%c0 ], %c0_i32 {in_bounds = [true ]} : memref <20 xi32 >, vector <4 xi32 >
349
+ " prevent.dce" (%read ) : (vector <4 xi32 >) ->()
350
+ vector.transfer_write %read , %arg1 [%c0 ] {in_bounds = [true ]} : vector <4 xi32 >, memref <20 xi32 >
351
+ }
352
+ return
353
+ }
354
+
355
+ module attributes {transform.with_named_sequence } {
356
+ transform.named_sequence @__transform_main (%arg1: !transform.any_op {transform.readonly }) {
357
+ %0 = transform.structured.match ops {[" func.func" ]} in %arg1
358
+ : (!transform.any_op ) -> !transform.any_op
359
+ transform.structured.hoist_redundant_vector_transfers %0
360
+ : (!transform.any_op ) -> !transform.any_op
361
+ transform.yield
362
+ }
363
+ }
364
+
365
+ // -----
366
+
311
367
// Regression test - `vector.transfer_read` below should not be hoisted.
312
368
// Indeed, %collapse_shape (written to by `vector.transfer_write`) and %alloca
313
369
// (read by `vector.transfer_read`) alias.
@@ -436,7 +492,7 @@ module attributes {transform.with_named_sequence} {
436
492
// CHECK: #[[$MAP4:.+]] = affine_map<()[s0] -> (s0 + 4)>
437
493
438
494
// CHECK-LABEL: func.func @hoist_vector_transfer_pairs_disjoint_dynamic
439
- // CHECK-SAME: (%[[BUFFER:.+]]: memref<?x?xf32>, %{{.+}}: index, %{{.+}}: index, %{{.+}}: index, % [[I0:.+]]: index)
495
+ // CHECK-SAME: (%[[BUFFER:.+]]: memref<?x?xf32>, %{{.+}}: index, %[[I0:.+]]: index)
440
496
441
497
// CHECK: %[[PLUS1:.+]] = affine.apply #[[$MAP1]]()[%[[I0]]]
442
498
// CHECK: %[[PLUS4:.+]] = affine.apply #[[$MAP4]]()[%[[I0]]]
@@ -451,7 +507,9 @@ module attributes {transform.with_named_sequence} {
451
507
// CHECK: vector.transfer_write %{{.+}}, %[[BUFFER]][%[[I0]], %[[I0]]]
452
508
453
509
func.func @hoist_vector_transfer_pairs_disjoint_dynamic (
454
- %buffer: memref <?x?xf32 >, %lb : index , %ub : index , %step: index , %i0 : index ) {
510
+ %buffer: memref <?x?xf32 >, %step: index , %i0 : index ) {
511
+ %lb = arith.constant 0 : index
512
+ %ub = arith.constant 16 : index
455
513
%cst = arith.constant 0.0 : f32
456
514
%i1 = affine.apply affine_map <(d0 ) -> (d0 + 1 )>(%i0 )
457
515
%i2 = affine.apply affine_map <(d0 ) -> (d0 + 4 )>(%i0 )
@@ -494,7 +552,9 @@ module attributes {transform.with_named_sequence} {
494
552
// CHECK-COUNT-2: vector.transfer_write
495
553
496
554
func.func @hoist_vector_transfer_pairs_overlapping_dynamic (
497
- %buffer: memref <?x?xf32 >, %lb : index , %ub : index , %step: index , %i0 : index ) {
555
+ %buffer: memref <?x?xf32 >, %step: index , %i0 : index ) {
556
+ %lb = arith.constant 0 : index
557
+ %ub = arith.constant 16 : index
498
558
%cst = arith.constant 0.0 : f32
499
559
%i1 = affine.apply affine_map <(d0 ) -> (d0 + 3 )>(%i0 )
500
560
@@ -534,7 +594,9 @@ module attributes {transform.with_named_sequence} {
534
594
// CHECK: return
535
595
536
596
func.func @hoist_vector_transfer_pairs_disjoint_dynamic (
537
- %buffer: memref <?x?xf32 >, %lb : index , %ub : index , %step: index , %i0 : index , %i1 : index ) {
597
+ %buffer: memref <?x?xf32 >, %step: index , %i0 : index , %i1 : index ) {
598
+ %lb = arith.constant 0 : index
599
+ %ub = arith.constant 16 : index
538
600
%cst = arith.constant 0.0 : f32
539
601
%i2 = affine.apply affine_map <(d0 ) -> ((d0 floordiv 32 ) * 16 )>(%i1 )
540
602
%i3 = affine.apply affine_map <(d0 ) -> ((d0 floordiv 32 ) * 16 + 8 )>(%i1 )
@@ -571,7 +633,7 @@ module attributes {transform.with_named_sequence} {
571
633
// Test hoisting of vector.extract/vector.broadcast pairs
572
634
573
635
// CHECK-LABEL: func.func @hoist_vector_broadcasts
574
- // CHECK-SAME: (%{{.+}}: index, %{{.+}}: index, %{{.+}}: index, % [[VEC:.+]]: vector<3x4xf32>) -> vector<3x4xf32> {
636
+ // CHECK-SAME: (%{{.+}}: index, %[[VEC:.+]]: vector<3x4xf32>) -> vector<3x4xf32> {
575
637
// CHECK: %[[EXTRACT:.+]] = vector.extract %[[VEC]][0] : vector<4xf32> from vector<3x4xf32>
576
638
// CHECK-NEXT: %[[LOOP:.+]] = scf.for {{.*}} {
577
639
// CHECK-NEXT: %[[USE:.+]] = "some_use"({{.*}}) : (vector<4xf32>) -> vector<4xf32>
@@ -580,7 +642,9 @@ module attributes {transform.with_named_sequence} {
580
642
// CHECK-NEXT: %[[BCAST:.+]] = vector.broadcast %[[LOOP]] : vector<4xf32> to vector<3x4xf32>
581
643
// CHECK-NEXT: return %[[BCAST]] : vector<3x4xf32>
582
644
583
- func.func @hoist_vector_broadcasts (%lb : index , %ub : index , %step : index , %vec : vector <3 x4 xf32 >) -> vector <3 x4 xf32 > {
645
+ func.func @hoist_vector_broadcasts (%step : index , %vec : vector <3 x4 xf32 >) -> vector <3 x4 xf32 > {
646
+ %lb = arith.constant 0 : index
647
+ %ub = arith.constant 16 : index
584
648
%bcast_vec = scf.for %arg0 = %lb to %ub step %step iter_args (%iarg = %vec ) -> vector <3 x4 xf32 > {
585
649
%extract = vector.extract %iarg [0 ] : vector <4 xf32 > from vector <3 x4 xf32 >
586
650
%use = " some_use" (%extract ) : (vector <4 xf32 >) -> vector <4 xf32 >
@@ -605,7 +669,7 @@ module attributes {transform.with_named_sequence} {
605
669
// Test hoisting of vector.extract/vector.broadcast pairs with dynamic position
606
670
607
671
// CHECK-LABEL: func.func @hoist_vector_broadcasts
608
- // CHECK-SAME: (%{{.+}}: index, %{{.+}}: index, %{{.+}}: index, % [[VEC:.+]]: vector<3x4xf32>, %[[POS:.+]]: index) -> vector<3x4xf32> {
672
+ // CHECK-SAME: (%{{.+}}: index, %[[VEC:.+]]: vector<3x4xf32>, %[[POS:.+]]: index) -> vector<3x4xf32> {
609
673
// CHECK: %[[EXTRACT:.+]] = vector.extract %[[VEC]][%[[POS]]] : vector<4xf32> from vector<3x4xf32>
610
674
// CHECK-NEXT: %[[LOOP:.+]] = scf.for {{.*}} {
611
675
// CHECK-NEXT: %[[USE:.+]] = "some_use"({{.*}}) : (vector<4xf32>) -> vector<4xf32>
@@ -614,7 +678,9 @@ module attributes {transform.with_named_sequence} {
614
678
// CHECK-NEXT: %[[BCAST:.+]] = vector.broadcast %[[LOOP]] : vector<4xf32> to vector<3x4xf32>
615
679
// CHECK-NEXT: return %[[BCAST]] : vector<3x4xf32>
616
680
617
- func.func @hoist_vector_broadcasts_dynamic (%lb : index , %ub : index , %step : index , %vec : vector <3 x4 xf32 >, %pos: index ) -> vector <3 x4 xf32 > {
681
+ func.func @hoist_vector_broadcasts_dynamic (%step : index , %vec : vector <3 x4 xf32 >, %pos: index ) -> vector <3 x4 xf32 > {
682
+ %lb = arith.constant 0 : index
683
+ %ub = arith.constant 16 : index
618
684
%bcast_vec = scf.for %arg0 = %lb to %ub step %step iter_args (%iarg = %vec ) -> vector <3 x4 xf32 > {
619
685
%extract = vector.extract %iarg [%pos ] : vector <4 xf32 > from vector <3 x4 xf32 >
620
686
%use = " some_use" (%extract ) : (vector <4 xf32 >) -> vector <4 xf32 >
@@ -639,7 +705,7 @@ module attributes {transform.with_named_sequence} {
639
705
// Test hoisting of vector.extract/vector.broadcast pairs with multiple iter_args
640
706
641
707
// CHECK-LABEL: func.func @hoist_vector_broadcasts_multiple
642
- // CHECK-SAME: (%{{.+}}: index, %{{.+}}: index, %{{.+}}: index, % [[VEC1:.+]]: vector<3x4xf32>,
708
+ // CHECK-SAME: (%{{.+}}: index, %[[VEC1:.+]]: vector<3x4xf32>,
643
709
// CHECK-SAME: %[[VEC2:.+]]: vector<3x5xf32>) -> (vector<3x4xf32>, vector<3x5xf32>) {
644
710
// CHECK-DAG: %[[EXTRACT1:.+]] = vector.extract %[[VEC1]][0] : vector<4xf32> from vector<3x4xf32>
645
711
// CHECK-DAG: %[[EXTRACT2:.+]] = vector.extract %[[VEC2]][1] : vector<5xf32> from vector<3x5xf32>
@@ -652,7 +718,9 @@ module attributes {transform.with_named_sequence} {
652
718
// CHECK-DAG: %[[BCAST2:.+]] = vector.broadcast %[[LOOP]]#1 : vector<5xf32> to vector<3x5xf32>
653
719
// CHECK-NEXT: return %[[BCAST1]], %[[BCAST2]] : vector<3x4xf32>, vector<3x5xf32>
654
720
655
- func.func @hoist_vector_broadcasts_multiple (%lb : index , %ub : index , %step : index , %vec1 : vector <3 x4 xf32 >, %vec2 : vector <3 x5 xf32 >) -> (vector <3 x4 xf32 >, vector <3 x5 xf32 >) {
721
+ func.func @hoist_vector_broadcasts_multiple (%step : index , %vec1 : vector <3 x4 xf32 >, %vec2 : vector <3 x5 xf32 >) -> (vector <3 x4 xf32 >, vector <3 x5 xf32 >) {
722
+ %lb = arith.constant 0 : index
723
+ %ub = arith.constant 16 : index
656
724
%bcast_vec:2 = scf.for %arg0 = %lb to %ub step %step iter_args (%iarg = %vec1 , %iarg2 = %vec2 ) -> (vector <3 x4 xf32 >, vector <3 x5 xf32 >) {
657
725
%extract1 = vector.extract %iarg [0 ] : vector <4 xf32 > from vector <3 x4 xf32 >
658
726
%extract2 = vector.extract %iarg2 [1 ] : vector <5 xf32 > from vector <3 x5 xf32 >
0 commit comments