@@ -765,10 +765,10 @@ transform.sequence failures(propagate) {
765
765
766
766
// CHECK-LABEL: func.func @no_hoisting_collapse_shape
767
767
// CHECK: scf.for {{.*}} {
768
- // CHECK: vector.transfer_write
769
- // CHECK: vector.transfer_read
770
- // CHECK: vector.transfer_write
771
- // CHECK: }
768
+ // CHECK: vector.transfer_write {{.*}} : vector<4xi32>, memref<4xi32>
769
+ // CHECK-NEXT : vector.transfer_read {{.*}} : memref<1x4x1xi32>, vector<1x4x1xi32>
770
+ // CHECK-NEXT : vector.transfer_write {{.*}} : vector<1x4x1xi32>, memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>
771
+ // CHECK-NEXT : }
772
772
773
773
func.func @no_hoisting_collapse_shape (%in_0: memref <1 x20 x1 xi32 >, %1: memref <9 x1 xi32 >, %vec: vector <4 xi32 >) {
774
774
%c0_i32 = arith.constant 0 : i32
@@ -827,3 +827,48 @@ transform.sequence failures(propagate) {
827
827
transform.structured.hoist_redundant_vector_transfers %0
828
828
: (!transform.any_op ) -> !transform.any_op
829
829
}
830
+
831
+ // -----
832
+
833
+ // Regression test - hoisting the following `vector.transfer_{read|write}` pair
834
+ // would not be safe:
835
+ // %lhs = vector.transfer_read %collapsed_1[%c0]
836
+ // vector.transfer_write %op, %collapsed_1[%c0]
837
+ // That's because the following `vector.transfer_read` reads from the same
838
+ // memory (i.e. `%collapsed_1` and `%collapsed_2` alias):
839
+ // %acc = vector.transfer_read %collapsed_2[%c0]
840
+
841
+ // CHECK-LABEL: func.func @no_hoisting_write_to_memref
842
+ // CHECK: scf.for {{.*}} {
843
+ // CHECK: vector.transfer_read {{.*}} : memref<2xi32>, vector<1xi32>
844
+ // CHECK-NEXT: vector.transfer_read {{.*}} : memref<2xi32>, vector<1xi32>
845
+ // CHECK-NEXT: vector.outerproduct {{.*}} : vector<1xi32>, i32
846
+ // CHECK-NEXT: vector.transfer_write {{.*}} : vector<1xi32>, memref<2xi32>
847
+ // CHECK-NEXT: }
848
+
849
+ func.func @no_hoisting_write_to_memref (%rhs: i32 , %arg1: vector <1 xi32 >) {
850
+ %c0_i32 = arith.constant 0 : i32
851
+ %c0 = arith.constant 0 : index
852
+ %c1 = arith.constant 1 : index
853
+ %c4 = arith.constant 4 : index
854
+ %c20 = arith.constant 20 : index
855
+ %alloca = memref.alloca () {alignment = 64 : i64 } : memref <1 x1 x2 xi32 >
856
+ %cast = memref.cast %alloca : memref <1 x1 x2 xi32 > to memref <1 x1 x2 xi32 >
857
+ %collapsed_1 = memref.collapse_shape %alloca [[0 , 1 , 2 ]] : memref <1 x1 x2 xi32 > into memref <2 xi32 >
858
+ scf.for %_ = %c0 to %c20 step %c4 {
859
+ %collapsed_2 = memref.collapse_shape %alloca [[0 , 1 , 2 ]] : memref <1 x1 x2 xi32 > into memref <2 xi32 >
860
+ %lhs = vector.transfer_read %collapsed_1 [%c0 ], %c0_i32 {in_bounds = [true ]} : memref <2 xi32 >, vector <1 xi32 >
861
+ %acc = vector.transfer_read %collapsed_2 [%c0 ], %c0_i32 {in_bounds = [true ]} : memref <2 xi32 >, vector <1 xi32 >
862
+ %op = vector.outerproduct %lhs , %rhs , %acc {kind = #vector.kind <add >} : vector <1 xi32 >, i32
863
+ vector.transfer_write %op , %collapsed_1 [%c0 ] {in_bounds = [true ]} : vector <1 xi32 >, memref <2 xi32 >
864
+ }
865
+ return
866
+ }
867
+
868
+ transform.sequence failures (propagate ) {
869
+ ^bb1 (%arg1: !transform.any_op ):
870
+ %0 = transform.structured.match ops {[" func.func" ]} in %arg1
871
+ : (!transform.any_op ) -> !transform.any_op
872
+ transform.structured.hoist_redundant_vector_transfers %0
873
+ : (!transform.any_op ) -> !transform.any_op
874
+ }
0 commit comments