8
8
// CHECK-SAME: %[[MEMREF4:[a-zA-Z0-9]*]]: memref<?x?xf32>,
9
9
// CHECK-SAME: %[[MEMREF5:[a-zA-Z0-9]*]]: memref<?x?xf32>,
10
10
// CHECK-SAME: %[[VAL:[a-zA-Z0-9]*]]: index,
11
+ // CHECK-SAME: %[[LB:[a-zA-Z0-9]*]]: index,
12
+ // CHECK-SAME: %[[UB:[a-zA-Z0-9]*]]: index,
11
13
// CHECK-SAME: %[[STEP:[a-zA-Z0-9]*]]: index,
12
14
// CHECK-SAME: %[[CMP:[a-zA-Z0-9]*]]: i1
13
15
func.func @hoist_vector_transfer_pairs (
14
16
%memref0: memref <?x?xf32 >, %memref1: memref <?x?xf32 >, %memref2: memref <?x?xf32 >,
15
17
%memref3: memref <?x?xf32 >, %memref4: memref <?x?xf32 >, %memref5: memref <?x?xf32 >,
16
- %val: index , %step: index , %cmp: i1 ) {
17
- %lb = arith.constant 0 : index
18
- %ub = arith.constant 16 : index
18
+ %val: index , %lb : index , %ub : index , %step: index , %cmp: i1 ) {
19
19
%c0 = arith.constant 0 : index
20
20
%cst = arith.constant 0.0 : f32
21
21
22
22
// CHECK: vector.transfer_read %{{.*}} : memref<?x?xf32>, vector<1xf32>
23
- // CHECK: scf.for %[[I:.*]] = {{.*}} to {{.*}} step %[[STEP]] iter_args({{.*}}) -> (vector<1xf32>) {
23
+ // CHECK: scf.for %[[I:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] iter_args({{.*}}) -> (vector<1xf32>) {
24
24
// CHECK: vector.transfer_read %{{.*}} : memref<?x?xf32>, vector<2xf32>
25
- // CHECK: scf.for %[[J:.*]] = {{.*}} to {{.*}} step %[[STEP]] iter_args({{.*}}) -> (vector<1xf32>, vector<2xf32>) {
25
+ // CHECK: scf.for %[[J:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] iter_args({{.*}}) -> (vector<1xf32>, vector<2xf32>) {
26
26
// CHECK: vector.transfer_read %{{.*}} : memref<?x?xf32>, vector<3xf32>
27
27
// CHECK: vector.transfer_read %{{.*}} : memref<?x?xf32>, vector<4xf32>
28
28
// CHECK: "some_crippling_use"(%[[MEMREF4]]) : (memref<?x?xf32>) -> ()
@@ -92,15 +92,15 @@ module attributes {transform.with_named_sequence} {
92
92
// CHECK-SAME: %[[MEMREF2:[a-zA-Z0-9]*]]: memref<?x?xf32>,
93
93
// CHECK-SAME: %[[MEMREF3:[a-zA-Z0-9]*]]: memref<?x?xf32>,
94
94
// CHECK-SAME: %[[VAL:[a-zA-Z0-9]*]]: index,
95
+ // CHECK-SAME: %[[LB:[a-zA-Z0-9]*]]: index,
96
+ // CHECK-SAME: %[[UB:[a-zA-Z0-9]*]]: index,
95
97
// CHECK-SAME: %[[STEP:[a-zA-Z0-9]*]]: index,
96
98
// CHECK-SAME: %[[RANDOM:[a-zA-Z0-9]*]]: index,
97
99
// CHECK-SAME: %[[CMP:[a-zA-Z0-9]*]]: i1
98
100
func.func @hoist_vector_transfer_pairs_disjoint (
99
101
%memref0: memref <?x?xf32 >, %memref1: memref <?x?xf32 >,
100
- %memref2: memref <?x?xf32 >, %memref3: memref <?x?xf32 >, %val: index ,
102
+ %memref2: memref <?x?xf32 >, %memref3: memref <?x?xf32 >, %val: index , %lb : index , %ub : index ,
101
103
%step: index , %random_index : index , %cmp: i1 ) {
102
- %lb = arith.constant 0 : index
103
- %ub = arith.constant 16 : index
104
104
%c0 = arith.constant 0 : index
105
105
%c1 = arith.constant 1 : index
106
106
%c3 = arith.constant 3 : index
@@ -110,9 +110,9 @@ func.func @hoist_vector_transfer_pairs_disjoint(
110
110
// CHECK: vector.transfer_read %[[MEMREF2]]{{.*}} : memref<?x?xf32>, vector<3xf32>
111
111
// CHECK: vector.transfer_read %[[MEMREF3]]{{.*}} : memref<?x?xf32>, vector<4xf32>
112
112
// CHECK: vector.transfer_read %[[MEMREF3]]{{.*}} : memref<?x?xf32>, vector<4xf32>
113
- // CHECK: scf.for %[[I:.*]] = {{.*}} to {{.*}} step %[[STEP]] iter_args({{.*}}) ->
113
+ // CHECK: scf.for %[[I:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] iter_args({{.*}}) ->
114
114
// CHECK-SAME: (vector<3xf32>, vector<3xf32>, vector<4xf32>, vector<4xf32>) {
115
- // CHECK: scf.for %[[J:.*]] = {{.*}} to {{.*}} step %[[STEP]] iter_args({{.*}}) ->
115
+ // CHECK: scf.for %[[J:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] iter_args({{.*}}) ->
116
116
// CHECK-SAME: (vector<3xf32>, vector<3xf32>, vector<4xf32>, vector<4xf32>) {
117
117
// CHECK: vector.transfer_read %[[MEMREF1]]{{.*}} : memref<?x?xf32>, vector<2xf32>
118
118
// CHECK: vector.transfer_read %[[MEMREF1]]{{.*}} : memref<?x?xf32>, vector<2xf32>
@@ -309,18 +309,18 @@ module attributes {transform.with_named_sequence} {
309
309
// -----
310
310
311
311
// CHECK-LABEL: func.func @no_hoisting_zero_trip_loop
312
- func.func @no_hoisting_zero_trip_loop (%arg0: memref <20 xi32 >, %arg1: memref < 20 x i32 >, % lb: index , %ub: index ) {
312
+ func.func @no_hoisting_zero_trip_loop (%arg0: memref <20 xi32 >, %lb: index , %ub: index ) {
313
313
%c0_i32 = arith.constant 0 : i32
314
314
%c0 = arith.constant 0 : index
315
315
%c1 = arith.constant 1 : index
316
316
// %lb and %ub are unbounded, so do not hoist.
317
317
318
318
// CHECK: scf.for {{.*}} {
319
319
// CHECK-NEXT: vector.transfer_read
320
- // CHECK-NEXT: vector.transfer_write
320
+ // CHECK-NEXT: "prevent.dce"
321
321
scf.for %arg2 = %lb to %ub step %c1 {
322
322
%read = vector.transfer_read %arg0 [%c0 ], %c0_i32 {in_bounds = [true ]} : memref <20 xi32 >, vector <4 xi32 >
323
- vector.transfer_write %read , %arg1 [ %c0 ] { in_bounds = [ true ]} : vector <4 xi32 >, memref < 20 x i32 >
323
+ " prevent.dce " ( %read ) : ( vector <4 xi32 >) ->()
324
324
}
325
325
326
326
// %lb_0 is in range [%lb, 8], and %ub_0 is in range [4, %ub].
@@ -330,24 +330,23 @@ func.func @no_hoisting_zero_trip_loop(%arg0: memref<20xi32>, %arg1: memref<20xi3
330
330
331
331
// CHECK: scf.for {{.*}} {
332
332
// CHECK-NEXT: vector.transfer_read
333
- // CHECK-NEXT: vector.transfer_write
333
+ // CHECK-NEXT: "prevent.dce"
334
334
scf.for %arg2 = %lb_0 to %ub_0 step %c1 {
335
335
%read = vector.transfer_read %arg0 [%c0 ], %c0_i32 {in_bounds = [true ]} : memref <20 xi32 >, vector <4 xi32 >
336
- vector.transfer_write %read , %arg1 [ %c0 ] { in_bounds = [ true ]} : vector <4 xi32 >, memref < 20 x i32 >
336
+ " prevent.dce " ( %read ) : ( vector <4 xi32 >) ->()
337
337
}
338
338
339
339
// %lb_1 is in range [%lb, 4], and %ub_1 is in range [8, %ub].
340
340
// Since %lb_1 is guaranteed to be less than %ub_1, hoisting is possible.
341
341
%lb_1 = affine.min affine_map <(d0 ) -> (d0 , 4 )>(%lb )
342
342
%ub_1 = affine.max affine_map <(d0 ) -> (d0 , 8 )>(%ub )
343
343
344
- // CHECK: vector.transfer_read
344
+ // CHECK: vector.transfer_read
345
345
// CHECK: scf.for {{.*}} {
346
346
// CHECK-NEXT: "prevent.dce"
347
347
scf.for %arg2 = %lb_1 to %ub_1 step %c1 {
348
348
%read = vector.transfer_read %arg0 [%c0 ], %c0_i32 {in_bounds = [true ]} : memref <20 xi32 >, vector <4 xi32 >
349
349
" prevent.dce" (%read ) : (vector <4 xi32 >) ->()
350
- vector.transfer_write %read , %arg1 [%c0 ] {in_bounds = [true ]} : vector <4 xi32 >, memref <20 xi32 >
351
350
}
352
351
return
353
352
}
@@ -356,7 +355,7 @@ module attributes {transform.with_named_sequence} {
356
355
transform.named_sequence @__transform_main (%arg1: !transform.any_op {transform.readonly }) {
357
356
%0 = transform.structured.match ops {[" func.func" ]} in %arg1
358
357
: (!transform.any_op ) -> !transform.any_op
359
- transform.structured.hoist_redundant_vector_transfers %0
358
+ transform.structured.hoist_redundant_vector_transfers %0 { verify_non_zero_trip }
360
359
: (!transform.any_op ) -> !transform.any_op
361
360
transform.yield
362
361
}
@@ -492,7 +491,7 @@ module attributes {transform.with_named_sequence} {
492
491
// CHECK: #[[$MAP4:.+]] = affine_map<()[s0] -> (s0 + 4)>
493
492
494
493
// CHECK-LABEL: func.func @hoist_vector_transfer_pairs_disjoint_dynamic
495
- // CHECK-SAME: (%[[BUFFER:.+]]: memref<?x?xf32>, %{{.+}}: index, %[[I0:.+]]: index)
494
+ // CHECK-SAME: (%[[BUFFER:.+]]: memref<?x?xf32>, %{{.+}}: index, %{{.+}}: index, %{{.+}}: index, % [[I0:.+]]: index)
496
495
497
496
// CHECK: %[[PLUS1:.+]] = affine.apply #[[$MAP1]]()[%[[I0]]]
498
497
// CHECK: %[[PLUS4:.+]] = affine.apply #[[$MAP4]]()[%[[I0]]]
@@ -507,9 +506,7 @@ module attributes {transform.with_named_sequence} {
507
506
// CHECK: vector.transfer_write %{{.+}}, %[[BUFFER]][%[[I0]], %[[I0]]]
508
507
509
508
func.func @hoist_vector_transfer_pairs_disjoint_dynamic (
510
- %buffer: memref <?x?xf32 >, %step: index , %i0 : index ) {
511
- %lb = arith.constant 0 : index
512
- %ub = arith.constant 16 : index
509
+ %buffer: memref <?x?xf32 >, %lb : index , %ub : index , %step: index , %i0 : index ) {
513
510
%cst = arith.constant 0.0 : f32
514
511
%i1 = affine.apply affine_map <(d0 ) -> (d0 + 1 )>(%i0 )
515
512
%i2 = affine.apply affine_map <(d0 ) -> (d0 + 4 )>(%i0 )
@@ -552,9 +549,7 @@ module attributes {transform.with_named_sequence} {
552
549
// CHECK-COUNT-2: vector.transfer_write
553
550
554
551
func.func @hoist_vector_transfer_pairs_overlapping_dynamic (
555
- %buffer: memref <?x?xf32 >, %step: index , %i0 : index ) {
556
- %lb = arith.constant 0 : index
557
- %ub = arith.constant 16 : index
552
+ %buffer: memref <?x?xf32 >, %lb : index , %ub : index , %step: index , %i0 : index ) {
558
553
%cst = arith.constant 0.0 : f32
559
554
%i1 = affine.apply affine_map <(d0 ) -> (d0 + 3 )>(%i0 )
560
555
@@ -594,9 +589,7 @@ module attributes {transform.with_named_sequence} {
594
589
// CHECK: return
595
590
596
591
func.func @hoist_vector_transfer_pairs_disjoint_dynamic (
597
- %buffer: memref <?x?xf32 >, %step: index , %i0 : index , %i1 : index ) {
598
- %lb = arith.constant 0 : index
599
- %ub = arith.constant 16 : index
592
+ %buffer: memref <?x?xf32 >, %lb : index , %ub : index , %step: index , %i0 : index , %i1 : index ) {
600
593
%cst = arith.constant 0.0 : f32
601
594
%i2 = affine.apply affine_map <(d0 ) -> ((d0 floordiv 32 ) * 16 )>(%i1 )
602
595
%i3 = affine.apply affine_map <(d0 ) -> ((d0 floordiv 32 ) * 16 + 8 )>(%i1 )
@@ -633,7 +626,7 @@ module attributes {transform.with_named_sequence} {
633
626
// Test hoisting of vector.extract/vector.broadcast pairs
634
627
635
628
// CHECK-LABEL: func.func @hoist_vector_broadcasts
636
- // CHECK-SAME: (%{{.+}}: index, %[[VEC:.+]]: vector<3x4xf32>) -> vector<3x4xf32> {
629
+ // CHECK-SAME: (%{{.+}}: index, %{{.+}}: index, %{{.+}}: index, % [[VEC:.+]]: vector<3x4xf32>) -> vector<3x4xf32> {
637
630
// CHECK: %[[EXTRACT:.+]] = vector.extract %[[VEC]][0] : vector<4xf32> from vector<3x4xf32>
638
631
// CHECK-NEXT: %[[LOOP:.+]] = scf.for {{.*}} {
639
632
// CHECK-NEXT: %[[USE:.+]] = "some_use"({{.*}}) : (vector<4xf32>) -> vector<4xf32>
@@ -642,9 +635,7 @@ module attributes {transform.with_named_sequence} {
642
635
// CHECK-NEXT: %[[BCAST:.+]] = vector.broadcast %[[LOOP]] : vector<4xf32> to vector<3x4xf32>
643
636
// CHECK-NEXT: return %[[BCAST]] : vector<3x4xf32>
644
637
645
- func.func @hoist_vector_broadcasts (%step : index , %vec : vector <3 x4 xf32 >) -> vector <3 x4 xf32 > {
646
- %lb = arith.constant 0 : index
647
- %ub = arith.constant 16 : index
638
+ func.func @hoist_vector_broadcasts (%lb : index , %ub : index , %step : index , %vec : vector <3 x4 xf32 >) -> vector <3 x4 xf32 > {
648
639
%bcast_vec = scf.for %arg0 = %lb to %ub step %step iter_args (%iarg = %vec ) -> vector <3 x4 xf32 > {
649
640
%extract = vector.extract %iarg [0 ] : vector <4 xf32 > from vector <3 x4 xf32 >
650
641
%use = " some_use" (%extract ) : (vector <4 xf32 >) -> vector <4 xf32 >
@@ -669,7 +660,7 @@ module attributes {transform.with_named_sequence} {
669
660
// Test hoisting of vector.extract/vector.broadcast pairs with dynamic position
670
661
671
662
// CHECK-LABEL: func.func @hoist_vector_broadcasts
672
- // CHECK-SAME: (%{{.+}}: index, %[[VEC:.+]]: vector<3x4xf32>, %[[POS:.+]]: index) -> vector<3x4xf32> {
663
+ // CHECK-SAME: (%{{.+}}: index, %{{.+}}: index, %{{.+}}: index, % [[VEC:.+]]: vector<3x4xf32>, %[[POS:.+]]: index) -> vector<3x4xf32> {
673
664
// CHECK: %[[EXTRACT:.+]] = vector.extract %[[VEC]][%[[POS]]] : vector<4xf32> from vector<3x4xf32>
674
665
// CHECK-NEXT: %[[LOOP:.+]] = scf.for {{.*}} {
675
666
// CHECK-NEXT: %[[USE:.+]] = "some_use"({{.*}}) : (vector<4xf32>) -> vector<4xf32>
@@ -678,9 +669,7 @@ module attributes {transform.with_named_sequence} {
678
669
// CHECK-NEXT: %[[BCAST:.+]] = vector.broadcast %[[LOOP]] : vector<4xf32> to vector<3x4xf32>
679
670
// CHECK-NEXT: return %[[BCAST]] : vector<3x4xf32>
680
671
681
- func.func @hoist_vector_broadcasts_dynamic (%step : index , %vec : vector <3 x4 xf32 >, %pos: index ) -> vector <3 x4 xf32 > {
682
- %lb = arith.constant 0 : index
683
- %ub = arith.constant 16 : index
672
+ func.func @hoist_vector_broadcasts_dynamic (%lb : index , %ub : index , %step : index , %vec : vector <3 x4 xf32 >, %pos: index ) -> vector <3 x4 xf32 > {
684
673
%bcast_vec = scf.for %arg0 = %lb to %ub step %step iter_args (%iarg = %vec ) -> vector <3 x4 xf32 > {
685
674
%extract = vector.extract %iarg [%pos ] : vector <4 xf32 > from vector <3 x4 xf32 >
686
675
%use = " some_use" (%extract ) : (vector <4 xf32 >) -> vector <4 xf32 >
@@ -705,7 +694,7 @@ module attributes {transform.with_named_sequence} {
705
694
// Test hoisting of vector.extract/vector.broadcast pairs with multiple iter_args
706
695
707
696
// CHECK-LABEL: func.func @hoist_vector_broadcasts_multiple
708
- // CHECK-SAME: (%{{.+}}: index, %[[VEC1:.+]]: vector<3x4xf32>,
697
+ // CHECK-SAME: (%{{.+}}: index, %{{.+}}: index, %{{.+}}: index, % [[VEC1:.+]]: vector<3x4xf32>,
709
698
// CHECK-SAME: %[[VEC2:.+]]: vector<3x5xf32>) -> (vector<3x4xf32>, vector<3x5xf32>) {
710
699
// CHECK-DAG: %[[EXTRACT1:.+]] = vector.extract %[[VEC1]][0] : vector<4xf32> from vector<3x4xf32>
711
700
// CHECK-DAG: %[[EXTRACT2:.+]] = vector.extract %[[VEC2]][1] : vector<5xf32> from vector<3x5xf32>
@@ -718,9 +707,7 @@ module attributes {transform.with_named_sequence} {
718
707
// CHECK-DAG: %[[BCAST2:.+]] = vector.broadcast %[[LOOP]]#1 : vector<5xf32> to vector<3x5xf32>
719
708
// CHECK-NEXT: return %[[BCAST1]], %[[BCAST2]] : vector<3x4xf32>, vector<3x5xf32>
720
709
721
- func.func @hoist_vector_broadcasts_multiple (%step : index , %vec1 : vector <3 x4 xf32 >, %vec2 : vector <3 x5 xf32 >) -> (vector <3 x4 xf32 >, vector <3 x5 xf32 >) {
722
- %lb = arith.constant 0 : index
723
- %ub = arith.constant 16 : index
710
+ func.func @hoist_vector_broadcasts_multiple (%lb : index , %ub : index , %step : index , %vec1 : vector <3 x4 xf32 >, %vec2 : vector <3 x5 xf32 >) -> (vector <3 x4 xf32 >, vector <3 x5 xf32 >) {
724
711
%bcast_vec:2 = scf.for %arg0 = %lb to %ub step %step iter_args (%iarg = %vec1 , %iarg2 = %vec2 ) -> (vector <3 x4 xf32 >, vector <3 x5 xf32 >) {
725
712
%extract1 = vector.extract %iarg [0 ] : vector <4 xf32 > from vector <3 x4 xf32 >
726
713
%extract2 = vector.extract %iarg2 [1 ] : vector <5 xf32 > from vector <3 x5 xf32 >
0 commit comments