Skip to content

Commit 235fcd9

Browse files
committed
[mlir][vector] Refine vector.transfer_read hoisting/forwarding
Make sure that when analysing a `vector.transfer_read` that's a candidate for either hoisting or store-to-load forwarding, memref.collapse_shape Ops are correctly included in the alias analysis. This is done by either: * making sure that relevant users are taken into account, or * source Ops are correctly identified (i.e. `memref.collapse_shape` is not really the true source). Note that this won't fix aliasing for other cases (e.g. with memref.expand_shape). Ideally, we should query an alias analysis in e.g. `noAliasingUseInLoop` instead of hard-coding ops
1 parent e791535 commit 235fcd9

File tree

4 files changed

+119
-1
lines changed

4 files changed

+119
-1
lines changed

mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ static bool noAliasingUseInLoop(vector::TransferReadOp transferRead,
5757
Value source = transferRead.getSource();
5858
while (auto subView = source.getDefiningOp<memref::SubViewOp>())
5959
source = subView.getSource();
60+
while (auto collapsed = source.getDefiningOp<memref::CollapseShapeOp>())
61+
source = collapsed->getOperand(0);
6062
llvm::SmallVector<Operation *, 32> users(source.getUsers().begin(),
6163
source.getUsers().end());
6264
llvm::SmallDenseSet<Operation *, 32> processed;
@@ -69,6 +71,10 @@ static bool noAliasingUseInLoop(vector::TransferReadOp transferRead,
6971
users.append(subView->getUsers().begin(), subView->getUsers().end());
7072
continue;
7173
}
74+
if (auto collapsed = dyn_cast<memref::CollapseShapeOp>(user)) {
75+
users.append(collapsed->getUsers().begin(), collapsed->getUsers().end());
76+
continue;
77+
}
7278
if (isMemoryEffectFree(user) || isa<vector::TransferReadOp>(user))
7379
continue;
7480
if (!loop->isAncestor(user))

mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,10 @@ void TransferOptimization::storeToLoadForwarding(vector::TransferReadOp read) {
206206
users.append(subView->getUsers().begin(), subView->getUsers().end());
207207
continue;
208208
}
209+
if (auto collapsed = dyn_cast<memref::CollapseShapeOp>(user)) {
210+
users.append(collapsed->getUsers().begin(), collapsed->getUsers().end());
211+
continue;
212+
}
209213
if (isMemoryEffectFree(user) || isa<vector::TransferReadOp>(user))
210214
continue;
211215
if (auto write = dyn_cast<vector::TransferWriteOp>(user)) {

mlir/test/Dialect/Linalg/hoisting.mlir

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -756,3 +756,74 @@ transform.sequence failures(propagate) {
756756
transform.structured.hoist_redundant_vector_transfers %0
757757
: (!transform.any_op) -> !transform.any_op
758758
}
759+
760+
// -----
761+
762+
// Regression test - `vector.transfer_read` below should not be hoisted.
763+
// Indeed, %collapse_shape (written to by `vector.transfer_write`) and %alloca
764+
// (read by `vector.transfer_read`) alias.
765+
766+
// CHECK-LABEL: func.func @no_hoisting_collapse_shape
767+
// CHECK: scf.for {{.*}} {
768+
// CHECK: vector.transfer_write
769+
// CHECK: vector.transfer_read
770+
// CHECK: vector.transfer_write
771+
// CHECK: }
772+
773+
func.func @no_hoisting_collapse_shape(%in_0: memref<1x20x1xi32>, %1: memref<9x1xi32>, %vec: vector<4xi32>) {
774+
%c0_i32 = arith.constant 0 : i32
775+
%c0 = arith.constant 0 : index
776+
%c4 = arith.constant 4 : index
777+
%c20 = arith.constant 20 : index
778+
%alloca = memref.alloca() {alignment = 64 : i64} : memref<1x4x1xi32>
779+
scf.for %arg0 = %c0 to %c20 step %c4 {
780+
%subview = memref.subview %in_0[0, %arg0, 0] [1, 4, 1] [1, 1, 1] : memref<1x20x1xi32> to memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>
781+
%collapse_shape = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x4x1xi32> into memref<4xi32>
782+
vector.transfer_write %vec, %collapse_shape[%c0] {in_bounds = [true]} : vector<4xi32>, memref<4xi32>
783+
%read = vector.transfer_read %alloca[%c0, %c0, %c0], %c0_i32 {in_bounds = [true, true, true]} : memref<1x4x1xi32>, vector<1x4x1xi32>
784+
vector.transfer_write %read, %subview[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x4x1xi32>, memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>
785+
}
786+
return
787+
}
788+
789+
transform.sequence failures(propagate) {
790+
^bb1(%arg1: !transform.any_op):
791+
%0 = transform.structured.match ops{["func.func"]} in %arg1
792+
: (!transform.any_op) -> !transform.any_op
793+
transform.structured.hoist_redundant_vector_transfers %0
794+
: (!transform.any_op) -> !transform.any_op
795+
}
796+
797+
// -----
798+
799+
// Regression test - `vector.transfer_read` below should not be hoisted.
800+
// Indeed, %collapse_shape (read by `vector.transfer_read`) and %alloca
801+
// (written to by `vector.transfer_write`) alias.
802+
803+
// CHECK-LABEL: func.func @no_hoisting_collapse_shape_2
804+
// CHECK: scf.for {{.*}} {
805+
// CHECK: vector.transfer_write
806+
// CHECK: vector.transfer_read
807+
808+
func.func @no_hoisting_collapse_shape_2(%vec: vector<1x12x1xi32>) {
809+
%c0_i32 = arith.constant 0 : i32
810+
%c0 = arith.constant 0 : index
811+
%c4 = arith.constant 4 : index
812+
%c20 = arith.constant 20 : index
813+
%alloca = memref.alloca() {alignment = 64 : i64} : memref<1x12x1xi32>
814+
scf.for %arg0 = %c0 to %c20 step %c4 {
815+
%collapse_shape = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x12x1xi32> into memref<12xi32>
816+
vector.transfer_write %vec, %alloca[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x12x1xi32>, memref<1x12x1xi32>
817+
%read = vector.transfer_read %collapse_shape[%c0], %c0_i32 {in_bounds = [true]} : memref<12xi32>, vector<12xi32>
818+
"prevent.dce"(%read) : (vector<12xi32>) ->()
819+
}
820+
return
821+
}
822+
823+
transform.sequence failures(propagate) {
824+
^bb1(%arg1: !transform.any_op):
825+
%0 = transform.structured.match ops{["func.func"]} in %arg1
826+
: (!transform.any_op) -> !transform.any_op
827+
transform.structured.hoist_redundant_vector_transfers %0
828+
: (!transform.any_op) -> !transform.any_op
829+
}

mlir/test/Dialect/Vector/vector-transferop-opt.mlir

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,4 +215,41 @@ func.func @forward_dead_store_negative(%arg0: i1, %arg1 : memref<4x4xf32>,
215215
vector.transfer_write %v2, %arg1[%c1, %c0] {in_bounds = [true, true]} :
216216
vector<1x4xf32>, memref<4x4xf32>
217217
return %0 : vector<1x4xf32>
218-
}
218+
}
219+
220+
221+
// Regression test - the following _potential forwarding_ of %1 to the final
222+
// `vector.transfer_write` would not be safe:
223+
// %1 = vector.transfer_read %subview
224+
// vector.transfer_write %1, %alloca
225+
// vector.transfer_write %vec, %collapse_shape
226+
// vector.transfer_write %1, %subview
227+
// Indeed, %alloca and %collapse_shape alias and hence %2 != %1.
228+
229+
// CHECK-LABEL: func.func @collapse_shape
230+
// CHECK: scf.for {{.*}} {
231+
// CHECK: vector.transfer_read
232+
// CHECK: vector.transfer_write
233+
// CHECK: vector.transfer_write
234+
// CHECK: vector.transfer_read
235+
// CHECK: vector.transfer_write
236+
237+
func.func @collapse_shape(%in_0: memref<1x20x1xi32>, %vec: vector<4xi32>) {
238+
%c0_i32 = arith.constant 0 : i32
239+
%c0 = arith.constant 0 : index
240+
%c4 = arith.constant 4 : index
241+
%c20 = arith.constant 20 : index
242+
243+
%alloca = memref.alloca() {alignment = 64 : i64} : memref<1x4x1xi32>
244+
%collapse_shape = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x4x1xi32> into memref<4xi32>
245+
scf.for %arg0 = %c0 to %c20 step %c4 {
246+
%subview = memref.subview %in_0[0, %arg0, 0] [1, 4, 1] [1, 1, 1] : memref<1x20x1xi32> to memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>
247+
%1 = vector.transfer_read %subview[%c0, %c0, %c0], %c0_i32 {in_bounds = [true, true, true]} : memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>, vector<1x4x1xi32>
248+
// $alloca and $collapse_shape alias
249+
vector.transfer_write %1, %alloca[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x4x1xi32>, memref<1x4x1xi32>
250+
vector.transfer_write %vec, %collapse_shape[%c0] {in_bounds = [true]} : vector<4xi32>, memref<4xi32>
251+
%2 = vector.transfer_read %alloca[%c0, %c0, %c0], %c0_i32 {in_bounds = [true, true, true]} : memref<1x4x1xi32>, vector<1x4x1xi32>
252+
vector.transfer_write %2, %subview[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x4x1xi32>, memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>
253+
}
254+
return
255+
}

0 commit comments

Comments
 (0)