-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[mlir][Bufferization] Do not have read semantics for destination of tensor.parallel_insert_slice
.
#134169
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[mlir][Bufferization] Do not have read semantics for destination of tensor.parallel_insert_slice
.
#134169
Conversation
…tensor.parallel_insert_slice`. Signed-off-by: MaheshRavishankar <[email protected]>
@llvm/pr-subscribers-mlir-scf @llvm/pr-subscribers-mlir Author: None (MaheshRavishankar) Changes
This patch drops the read semantics for destination of Full diff: https://github.com/llvm/llvm-project/pull/134169.diff 3 Files Affected:
diff --git a/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp
index f48d2a2df9c3c..f83435521e708 100644
--- a/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -1207,17 +1207,11 @@ struct ForallOpInterface
ForallOp> {
bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
- auto forallOp = cast<ForallOp>(op);
-
- // If the loop has zero iterations, the results of the op are their
- // corresponding shared_outs, meaning that the shared_outs bufferize to a
- // read.
- if (mayHaveZeroIterations(forallOp))
- return true;
-
- // scf::ForallOp alone doesn't bufferize to a memory read, one of the
- // uses of its matching bbArg may.
- return state.isValueRead(forallOp.getTiedBlockArgument(&opOperand));
+ // All tensor operands to `scf.forall` are `shared_outs` and all
+ // shared outs are assumed to be read by the loop. This does not
+ // account for the case where the entire value is over-written,
+ // but being conservative here.
+ return true;
}
bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
diff --git a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
index 4ac6eca586961..31014172a9555 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -930,8 +930,7 @@ struct ParallelInsertSliceOpInterface
bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
- return insertSliceOpRequiresRead(cast<tensor::ParallelInsertSliceOp>(op),
- opOperand);
+ return opOperand == cast<ParallelInsertSliceOp>(op).getSourceMutable();
}
bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
index bb9f7dfdba83f..a1067ec3ba05f 100644
--- a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
@@ -946,3 +946,38 @@ func.func @index_switch(%pred: index, %b: tensor<5xf32>, %c: tensor<5xf32>) -> t
// CHECK: return %[[r]]
return %0 : tensor<5xf32>
}
+
+// -----
+
+// See Issue https://github.com/llvm/llvm-project/issues/133964 . Checks that
+// tensor.parallel_insert_slice dest operand does not have read semantics.
+func.func @check_scfforall_inplace_bufferizer(%arg0 : tensor<?x?xf32>,
+ %arg1 : tensor<?x?xf32>,
+ %arg2 : tensor<?xf32> {bufferization.writable = true}) -> tensor<?xf32> {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %d0 = tensor.dim %arg2, %c0 : tensor<?xf32>
+ %d1 = tensor.dim %arg1, %c1 : tensor<?x?xf32>
+ %0 = scf.forall (%arg3) in (%c1) shared_outs(%arg4 = %arg2) -> (tensor<?xf32>) {
+ %1 = tensor.extract_slice %arg0[0, 0][%d0, %d1][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+ %2 = tensor.extract_slice %arg1[0, 0][%d0, %d1][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+ %3 = linalg.generic {
+ indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
+ affine_map<(d0, d1) -> (d0, d1)>,
+ affine_map<(d0, d1) -> (d0)>],
+ iterator_types = ["parallel", "reduction"]}
+ ins(%1, %2 : tensor<?x?xf32>, tensor<?x?xf32>)
+ outs(%arg4 : tensor<?xf32>) {
+ ^bb0(%b0 : f32, %b1: f32, %b2 : f32):
+ %4 = arith.mulf %b0, %b1 : f32
+ %5 = arith.addf %4, %b2 : f32
+ linalg.yield %5 : f32
+ } -> tensor<?xf32>
+ scf.forall.in_parallel {
+ tensor.parallel_insert_slice %3 into %arg4[0] [%d0] [1] : tensor<?xf32> into tensor<?xf32>
+ }
+ }
+ return %0 : tensor<?xf32>
+}
+// CHECK-LABEL: func @check_scfforall_inplace_bufferizer
+// CHECK-NOT: memref.alloc
|
@llvm/pr-subscribers-mlir-tensor Author: None (MaheshRavishankar) Changes
This patch drops the read semantics for destination of Full diff: https://github.com/llvm/llvm-project/pull/134169.diff 3 Files Affected:
diff --git a/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp
index f48d2a2df9c3c..f83435521e708 100644
--- a/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -1207,17 +1207,11 @@ struct ForallOpInterface
ForallOp> {
bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
- auto forallOp = cast<ForallOp>(op);
-
- // If the loop has zero iterations, the results of the op are their
- // corresponding shared_outs, meaning that the shared_outs bufferize to a
- // read.
- if (mayHaveZeroIterations(forallOp))
- return true;
-
- // scf::ForallOp alone doesn't bufferize to a memory read, one of the
- // uses of its matching bbArg may.
- return state.isValueRead(forallOp.getTiedBlockArgument(&opOperand));
+ // All tensor operands to `scf.forall` are `shared_outs` and all
+ // shared outs are assumed to be read by the loop. This does not
+ // account for the case where the entire value is over-written,
+ // but being conservative here.
+ return true;
}
bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
diff --git a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
index 4ac6eca586961..31014172a9555 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -930,8 +930,7 @@ struct ParallelInsertSliceOpInterface
bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
- return insertSliceOpRequiresRead(cast<tensor::ParallelInsertSliceOp>(op),
- opOperand);
+ return opOperand == cast<ParallelInsertSliceOp>(op).getSourceMutable();
}
bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
index bb9f7dfdba83f..a1067ec3ba05f 100644
--- a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
@@ -946,3 +946,38 @@ func.func @index_switch(%pred: index, %b: tensor<5xf32>, %c: tensor<5xf32>) -> t
// CHECK: return %[[r]]
return %0 : tensor<5xf32>
}
+
+// -----
+
+// See Issue https://github.com/llvm/llvm-project/issues/133964 . Checks that
+// tensor.parallel_insert_slice dest operand does not have read semantics.
+func.func @check_scfforall_inplace_bufferizer(%arg0 : tensor<?x?xf32>,
+ %arg1 : tensor<?x?xf32>,
+ %arg2 : tensor<?xf32> {bufferization.writable = true}) -> tensor<?xf32> {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %d0 = tensor.dim %arg2, %c0 : tensor<?xf32>
+ %d1 = tensor.dim %arg1, %c1 : tensor<?x?xf32>
+ %0 = scf.forall (%arg3) in (%c1) shared_outs(%arg4 = %arg2) -> (tensor<?xf32>) {
+ %1 = tensor.extract_slice %arg0[0, 0][%d0, %d1][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+ %2 = tensor.extract_slice %arg1[0, 0][%d0, %d1][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+ %3 = linalg.generic {
+ indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
+ affine_map<(d0, d1) -> (d0, d1)>,
+ affine_map<(d0, d1) -> (d0)>],
+ iterator_types = ["parallel", "reduction"]}
+ ins(%1, %2 : tensor<?x?xf32>, tensor<?x?xf32>)
+ outs(%arg4 : tensor<?xf32>) {
+ ^bb0(%b0 : f32, %b1: f32, %b2 : f32):
+ %4 = arith.mulf %b0, %b1 : f32
+ %5 = arith.addf %4, %b2 : f32
+ linalg.yield %5 : f32
+ } -> tensor<?xf32>
+ scf.forall.in_parallel {
+ tensor.parallel_insert_slice %3 into %arg4[0] [%d0] [1] : tensor<?xf32> into tensor<?xf32>
+ }
+ }
+ return %0 : tensor<?xf32>
+}
+// CHECK-LABEL: func @check_scfforall_inplace_bufferizer
+// CHECK-NOT: memref.alloc
|
Signed-off-by: MaheshRavishankar <[email protected]>
Signed-off-by: MaheshRavishankar <[email protected]>
b2e739e
to
c61a1c2
Compare
Signed-off-by: MaheshRavishankar <[email protected]>
The major changes come from the following PRs: * llvm/llvm-project#131226 * llvm/llvm-project#134264 * llvm/llvm-project#134169 * llvm/llvm-project#134517 --------- Signed-off-by: MaheshRavishankar <[email protected]> Signed-off-by: Jakub Kuderski <[email protected]> Co-authored-by: MaheshRavishankar <[email protected]>
tensor.insert_slice
needs to have read semantics on its destination operand. Since it has a return value, its semantics aretensor.parallel_insert_slice
though has no result. So it does not need to have read semantics. The op description here also says that it is expected to lower to amemref.subview
, that does not have read semantics on the destination (its just a view).This patch drops the read semantics for destination of
tensor.parallel_insert_slice
but also makes theshared_outs
operands ofscf.forall
have read semantics. Earlier it would rely indirectly on read semantics of destination operand oftensor.parallel_insert_slice
to propagate the read semantics forshared_outs
. Now that is specified more directly.Fixes #133964