Skip to content

Commit 9aae59a

Browse files
committed
[mlir][vector] Prevent incorrect vector.transfer_{read|write} hoisting
At the moment, `hoistRedundantVectorTransfers` would hoist the `vector.transfer_read`/`vector.transfer_write` pair in this function: ```mlir func.func @no_hoisting_write_to_memref(%rhs: i32, %arg1: vector<1xi32>) { %c0_i32 = arith.constant 0 : i32 %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %c4 = arith.constant 4 : index %c20 = arith.constant 20 : index %alloca = memref.alloca() {alignment = 64 : i64} : memref<1x1x2xi32> %cast = memref.cast %alloca : memref<1x1x2xi32> to memref<1x1x2xi32> %collapsed_1 = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x1x2xi32> into memref<2xi32> scf.for %_ = %c0 to %c20 step %c4 { %collapsed_2 = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x1x2xi32> into memref<2xi32> %lhs = vector.transfer_read %collapsed_1[%c0], %c0_i32 {in_bounds = [true]} : memref<2xi32>, vector<1xi32> %acc = vector.transfer_read %collapsed_2[%c0], %c0_i32 {in_bounds = [true]} : memref<2xi32>, vector<1xi32> %op = vector.outerproduct %lhs, %rhs, %acc {kind = #vector.kind<add>} : vector<1xi32>, i32 vector.transfer_write %op, %collapsed_1[%c0] {in_bounds = [true]} : vector<1xi32>, memref<2xi32> } return } ``` as follows: ```mlir func.func @no_hoisting_write_to_memref(%arg0: i32, %arg1: vector<1xi32>) { %c0_i32 = arith.constant 0 : i32 %c0 = arith.constant 0 : index %c4 = arith.constant 4 : index %c20 = arith.constant 20 : index %alloca = memref.alloca() {alignment = 64 : i64} : memref<1x1x2xi32> %collapse_shape = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x1x2xi32> into memref<2xi32> %collapse_shape_0 = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x1x2xi32> into memref<2xi32> %0 = vector.transfer_read %collapse_shape[%c0], %c0_i32 {in_bounds = [true]} : memref<2xi32>, vector<1xi32> %1 = vector.transfer_read %collapse_shape_0[%c0], %c0_i32 {in_bounds = [true]} : memref<2xi32>, vector<1xi32> %2 = scf.for %arg2 = %c0 to %c20 step %c4 iter_args(%arg3 = %0) -> (vector<1xi32>) { %3 = vector.outerproduct %arg3, %arg0, %1 {kind = #vector.kind<add>} : vector<1xi32>, i32 scf.yield %3 : vector<1xi32> } vector.transfer_write %2, %collapse_shape[%c0] {in_bounds = [true]} : vector<1xi32>, memref<2xi32> return } ``` This is not safe. While one argument for `vector.outerproduct` (`%rhs` from the original loop) is correctly being forwarded via `iter_args`, the other one (`%acc` from the original loop) is not. This patch disables hoisting in cases where the source of "candidate" `vector.transfer_read` aliases with some other `memref`. A more generic approach would be to make sure that all values are correctly forwarded via `iter_args`, but that would require involving alias analysis. [1] Based on iree-org/iree#14994.
1 parent 59fbba9 commit 9aae59a

File tree

2 files changed

+57
-4
lines changed

2 files changed

+57
-4
lines changed

mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,14 @@ void mlir::linalg::hoistRedundantVectorTransfers(func::FuncOp func) {
152152
transferRead.getPermutationMap() != transferWrite.getPermutationMap())
153153
return WalkResult::advance();
154154

155+
// When the source of transfer_read aliases, the following dominance
156+
// analysis might not be sufficient.
157+
// TODO: There might be other, similar cases missing here (i.e. other
158+
// Memref Ops).
159+
auto source = transferRead.getSource();
160+
if (source.getDefiningOp<memref::CollapseShapeOp>())
161+
return WalkResult::advance();
162+
155163
// TODO: may want to memoize this information for performance but it
156164
// likely gets invalidated often.
157165
DominanceInfo dom(loop);

mlir/test/Dialect/Linalg/hoisting.mlir

Lines changed: 49 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -765,10 +765,10 @@ transform.sequence failures(propagate) {
765765

766766
// CHECK-LABEL: func.func @no_hoisting_collapse_shape
767767
// CHECK: scf.for {{.*}} {
768-
// CHECK: vector.transfer_write
769-
// CHECK: vector.transfer_read
770-
// CHECK: vector.transfer_write
771-
// CHECK: }
768+
// CHECK: vector.transfer_write {{.*}} : vector<4xi32>, memref<4xi32>
769+
// CHECK-NEXT: vector.transfer_read {{.*}} : memref<1x4x1xi32>, vector<1x4x1xi32>
770+
// CHECK-NEXT: vector.transfer_write {{.*}} : vector<1x4x1xi32>, memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>
771+
// CHECK-NEXT: }
772772

773773
func.func @no_hoisting_collapse_shape(%in_0: memref<1x20x1xi32>, %1: memref<9x1xi32>, %vec: vector<4xi32>) {
774774
%c0_i32 = arith.constant 0 : i32
@@ -827,3 +827,48 @@ transform.sequence failures(propagate) {
827827
transform.structured.hoist_redundant_vector_transfers %0
828828
: (!transform.any_op) -> !transform.any_op
829829
}
830+
831+
// -----
832+
833+
// Regression test - hoisting the following `vector.transfer_{read|write}` pair
834+
// would not be safe:
835+
// %lhs = vector.transfer_read %collapsed_1[%c0]
836+
// vector.transfer_write %op, %collapsed_1[%c0]
837+
// That's because the following `vector.transfer_read` reads from the same
838+
// memory (i.e. `%collapsed_1` and `%collapsed_2` alias):
839+
// %acc = vector.transfer_read %collapsed_2[%c0]
840+
841+
// CHECK-LABEL: func.func @no_hoisting_write_to_memref
842+
// CHECK: scf.for {{.*}} {
843+
// CHECK: vector.transfer_read {{.*}} : memref<2xi32>, vector<1xi32>
844+
// CHECK-NEXT: vector.transfer_read {{.*}} : memref<2xi32>, vector<1xi32>
845+
// CHECK-NEXT: vector.outerproduct {{.*}} : vector<1xi32>, i32
846+
// CHECK-NEXT: vector.transfer_write {{.*}} : vector<1xi32>, memref<2xi32>
847+
// CHECK-NEXT: }
848+
849+
func.func @no_hoisting_write_to_memref(%rhs: i32, %arg1: vector<1xi32>) {
850+
%c0_i32 = arith.constant 0 : i32
851+
%c0 = arith.constant 0 : index
852+
%c1 = arith.constant 1 : index
853+
%c4 = arith.constant 4 : index
854+
%c20 = arith.constant 20 : index
855+
%alloca = memref.alloca() {alignment = 64 : i64} : memref<1x1x2xi32>
856+
%cast = memref.cast %alloca : memref<1x1x2xi32> to memref<1x1x2xi32>
857+
%collapsed_1 = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x1x2xi32> into memref<2xi32>
858+
scf.for %_ = %c0 to %c20 step %c4 {
859+
%collapsed_2 = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x1x2xi32> into memref<2xi32>
860+
%lhs = vector.transfer_read %collapsed_1[%c0], %c0_i32 {in_bounds = [true]} : memref<2xi32>, vector<1xi32>
861+
%acc = vector.transfer_read %collapsed_2[%c0], %c0_i32 {in_bounds = [true]} : memref<2xi32>, vector<1xi32>
862+
%op = vector.outerproduct %lhs, %rhs, %acc {kind = #vector.kind<add>} : vector<1xi32>, i32
863+
vector.transfer_write %op, %collapsed_1[%c0] {in_bounds = [true]} : vector<1xi32>, memref<2xi32>
864+
}
865+
return
866+
}
867+
868+
transform.sequence failures(propagate) {
869+
^bb1(%arg1: !transform.any_op):
870+
%0 = transform.structured.match ops{["func.func"]} in %arg1
871+
: (!transform.any_op) -> !transform.any_op
872+
transform.structured.hoist_redundant_vector_transfers %0
873+
: (!transform.any_op) -> !transform.any_op
874+
}

0 commit comments

Comments
 (0)