-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[mlir][tensor] fix out-of-bound index in tensor.dim #85901
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
convert tensor.dim with out-of-bound index to ub.poison Fixes: llvm#70183
Thank you for submitting a Pull Request (PR) to the LLVM Project! This PR will be automatically labeled and the relevant teams will be If you wish to, you can add reviewers by using the "Reviewers" section on this page. If this is not working for you, it is probably because you do not have write If you have received no comments on your PR for a week, you can request a review If you have further questions, they may be answered by the LLVM GitHub User Guide. You can also ask questions in a comment on this PR, on the LLVM Discord or on the forums. |
@llvm/pr-subscribers-mlir-memref @llvm/pr-subscribers-mlir-tensor Author: Jianbang Yang (dynamicheart) Changesfold tensor.dim with out-of-bound index to ub.poison Fixes: #70183 Full diff: https://github.com/llvm/llvm-project/pull/85901.diff 4 Files Affected:
diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
index 2b226c7a1207cf..a656c812a59feb 100644
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
@@ -333,6 +333,9 @@ struct FoldDimOfAllocTensorOp : public OpRewritePattern<tensor::DimOp> {
auto allocTensorOp = dimOp.getSource().getDefiningOp<AllocTensorOp>();
if (!allocTensorOp || !maybeConstantIndex)
return failure();
+ if (*maybeConstantIndex < 0 ||
+ *maybeConstantIndex >= allocTensorOp.getType().getRank())
+ return failure();
if (!allocTensorOp.getType().isDynamicDim(*maybeConstantIndex))
return failure();
rewriter.replaceOp(
diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
index dc8843aa4e1e13..2183839d8f4290 100644
--- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
@@ -11,6 +11,7 @@
#include "mlir/Dialect/Arith/Utils/Utils.h"
#include "mlir/Dialect/Complex/IR/Complex.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/Dialect/UB/IR/UBOps.h"
#include "mlir/Dialect/Utils/IndexingUtils.h"
#include "mlir/Dialect/Utils/ReshapeOpsUtils.h"
#include "mlir/Dialect/Utils/StaticValueUtils.h"
@@ -45,6 +46,8 @@ Operation *TensorDialect::materializeConstant(OpBuilder &builder,
if (complex::ConstantOp::isBuildableWith(value, type))
return builder.create<complex::ConstantOp>(loc, type,
llvm::cast<ArrayAttr>(value));
+ if (auto poison = dyn_cast<ub::PoisonAttr>(value))
+ return builder.create<ub::PoisonOp>(loc, type, poison);
return nullptr;
}
@@ -738,11 +741,11 @@ OpFoldResult DimOp::fold(FoldAdaptor adaptor) {
if (!tensorType)
return {};
- // Out of bound indices produce undefined behavior but are still valid IR.
- // Don't choke on them.
+ // Fold dim to posion if the index is out of bound. Poison represents
+ // undefined behavior.
int64_t indexVal = index.getInt();
if (indexVal < 0 || indexVal >= tensorType.getRank())
- return {};
+ return ub::PoisonAttr::get(getContext());
// Fold if the shape extent along the given index is known.
if (!tensorType.isDynamicDim(index.getInt())) {
diff --git a/mlir/test/Dialect/MemRef/resolve-dim-ops.mlir b/mlir/test/Dialect/MemRef/resolve-dim-ops.mlir
index 18e9a9d02e1081..72ff1ca08c1faa 100644
--- a/mlir/test/Dialect/MemRef/resolve-dim-ops.mlir
+++ b/mlir/test/Dialect/MemRef/resolve-dim-ops.mlir
@@ -13,10 +13,9 @@ func.func @dim_out_of_bounds(%m : memref<7x8xf32>) -> index {
// -----
// CHECK-LABEL: func @dim_out_of_bounds_2(
-// CHECK-NEXT: arith.constant
+// CHECK-NEXT: ub.poison
// CHECK-NEXT: arith.constant
// CHECK-NEXT: bufferization.alloc_tensor
-// CHECK-NEXT: tensor.dim
// CHECK-NEXT: return
func.func @dim_out_of_bounds_2(%idx1 : index, %idx2 : index) -> index {
%idx = arith.constant 7 : index
diff --git a/mlir/test/Dialect/Tensor/canonicalize.mlir b/mlir/test/Dialect/Tensor/canonicalize.mlir
index e5374f031be553..2914c6873f5e47 100644
--- a/mlir/test/Dialect/Tensor/canonicalize.mlir
+++ b/mlir/test/Dialect/Tensor/canonicalize.mlir
@@ -2367,3 +2367,25 @@ func.func @dim_of_reshape_undominated(%arg0: tensor<*xf32>, %arg1: tensor<?xinde
%dim = tensor.dim %reshape, %0 : tensor<*xf32>
return %dim : index
}
+
+// -----
+
+// Test case: tensor.dim(%tensor, %out_of_bound_index) is folded into ub.poison
+// CHECK-LABEL: func @dim_out_of_bounds(
+// CHECK: ub.poison
+// CHECK-NEXT: bufferization.alloc_tensor
+// CHECK-NEXT: memref.alloc
+// CHECK-NEXT: memref.cast
+// CHECK-NEXT: affine.vector_load
+// CHECK-NEXT: return
+func.func @dim_out_of_bounds() -> vector<7xi32> {
+ %c1 = arith.constant 1 : index
+ %idx28 = index.constant 28
+ %c29 = arith.constant 29 : index
+ %3 = bufferization.alloc_tensor(%c29) : tensor<?xi16>
+ %dim = tensor.dim %3, %idx28 : tensor<?xi16>
+ %alloc_21 = memref.alloc(%c29) : memref<?x26x2xi32>
+ %16 = affine.vector_load %alloc_21[%c1, %c1, %dim] : memref<?x26x2xi32>, vector<7xi32>
+ return %16 : vector<7xi32>
+}
+
|
@llvm/pr-subscribers-mlir-bufferization Author: Jianbang Yang (dynamicheart) Changesfold tensor.dim with out-of-bound index to ub.poison Fixes: #70183 Full diff: https://github.com/llvm/llvm-project/pull/85901.diff 4 Files Affected:
diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
index 2b226c7a1207cf..a656c812a59feb 100644
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
@@ -333,6 +333,9 @@ struct FoldDimOfAllocTensorOp : public OpRewritePattern<tensor::DimOp> {
auto allocTensorOp = dimOp.getSource().getDefiningOp<AllocTensorOp>();
if (!allocTensorOp || !maybeConstantIndex)
return failure();
+ if (*maybeConstantIndex < 0 ||
+ *maybeConstantIndex >= allocTensorOp.getType().getRank())
+ return failure();
if (!allocTensorOp.getType().isDynamicDim(*maybeConstantIndex))
return failure();
rewriter.replaceOp(
diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
index dc8843aa4e1e13..2183839d8f4290 100644
--- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
@@ -11,6 +11,7 @@
#include "mlir/Dialect/Arith/Utils/Utils.h"
#include "mlir/Dialect/Complex/IR/Complex.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/Dialect/UB/IR/UBOps.h"
#include "mlir/Dialect/Utils/IndexingUtils.h"
#include "mlir/Dialect/Utils/ReshapeOpsUtils.h"
#include "mlir/Dialect/Utils/StaticValueUtils.h"
@@ -45,6 +46,8 @@ Operation *TensorDialect::materializeConstant(OpBuilder &builder,
if (complex::ConstantOp::isBuildableWith(value, type))
return builder.create<complex::ConstantOp>(loc, type,
llvm::cast<ArrayAttr>(value));
+ if (auto poison = dyn_cast<ub::PoisonAttr>(value))
+ return builder.create<ub::PoisonOp>(loc, type, poison);
return nullptr;
}
@@ -738,11 +741,11 @@ OpFoldResult DimOp::fold(FoldAdaptor adaptor) {
if (!tensorType)
return {};
- // Out of bound indices produce undefined behavior but are still valid IR.
- // Don't choke on them.
+ // Fold dim to posion if the index is out of bound. Poison represents
+ // undefined behavior.
int64_t indexVal = index.getInt();
if (indexVal < 0 || indexVal >= tensorType.getRank())
- return {};
+ return ub::PoisonAttr::get(getContext());
// Fold if the shape extent along the given index is known.
if (!tensorType.isDynamicDim(index.getInt())) {
diff --git a/mlir/test/Dialect/MemRef/resolve-dim-ops.mlir b/mlir/test/Dialect/MemRef/resolve-dim-ops.mlir
index 18e9a9d02e1081..72ff1ca08c1faa 100644
--- a/mlir/test/Dialect/MemRef/resolve-dim-ops.mlir
+++ b/mlir/test/Dialect/MemRef/resolve-dim-ops.mlir
@@ -13,10 +13,9 @@ func.func @dim_out_of_bounds(%m : memref<7x8xf32>) -> index {
// -----
// CHECK-LABEL: func @dim_out_of_bounds_2(
-// CHECK-NEXT: arith.constant
+// CHECK-NEXT: ub.poison
// CHECK-NEXT: arith.constant
// CHECK-NEXT: bufferization.alloc_tensor
-// CHECK-NEXT: tensor.dim
// CHECK-NEXT: return
func.func @dim_out_of_bounds_2(%idx1 : index, %idx2 : index) -> index {
%idx = arith.constant 7 : index
diff --git a/mlir/test/Dialect/Tensor/canonicalize.mlir b/mlir/test/Dialect/Tensor/canonicalize.mlir
index e5374f031be553..2914c6873f5e47 100644
--- a/mlir/test/Dialect/Tensor/canonicalize.mlir
+++ b/mlir/test/Dialect/Tensor/canonicalize.mlir
@@ -2367,3 +2367,25 @@ func.func @dim_of_reshape_undominated(%arg0: tensor<*xf32>, %arg1: tensor<?xinde
%dim = tensor.dim %reshape, %0 : tensor<*xf32>
return %dim : index
}
+
+// -----
+
+// Test case: tensor.dim(%tensor, %out_of_bound_index) is folded into ub.poison
+// CHECK-LABEL: func @dim_out_of_bounds(
+// CHECK: ub.poison
+// CHECK-NEXT: bufferization.alloc_tensor
+// CHECK-NEXT: memref.alloc
+// CHECK-NEXT: memref.cast
+// CHECK-NEXT: affine.vector_load
+// CHECK-NEXT: return
+func.func @dim_out_of_bounds() -> vector<7xi32> {
+ %c1 = arith.constant 1 : index
+ %idx28 = index.constant 28
+ %c29 = arith.constant 29 : index
+ %3 = bufferization.alloc_tensor(%c29) : tensor<?xi16>
+ %dim = tensor.dim %3, %idx28 : tensor<?xi16>
+ %alloc_21 = memref.alloc(%c29) : memref<?x26x2xi32>
+ %16 = affine.vector_load %alloc_21[%c1, %c1, %dim] : memref<?x26x2xi32>, vector<7xi32>
+ return %16 : vector<7xi32>
+}
+
|
@lipracer please also review this PR. |
int64_t indexVal = index.getInt(); | ||
if (indexVal < 0 || indexVal >= tensorType.getRank()) | ||
return {}; | ||
return ub::PoisonAttr::get(getContext()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is the Tensor dialect already dependent on UB dialect and generating poison values?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is really a design question here: this new addition can be a problem for some user, it's not clear to me if we can actually use poison everywhere in MLIR right now.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I read the code comment again and think you are right. It seems tensor.dim
ops with out-of-bound indices are treated as valid IR and may have following handling solutions:
- Compiled to runnable binary and raise exception on runtime
- Handled in other passes to raise warning or errors on compile time depends on the compiler design.
So should I just simply fix the out-of-bound access bug in the PatternRewriter FoldDimOfAllocTensorOp
and keep this folding behavior as before. What's your opinion?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Another question: is ub.poison
designed to be a valid IR and able to be compiled to a binary?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
After reviewing the design related to Poison semantics, there is no clear definition of the issue of excessive crossing. Can we simply skip folding and do nothing according to the original logic, so as not to disrupt the processing of the original code.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems tensor.dim ops with out-of-bound indices are treated as valid IR
It is documented as undefined behavior, so turning it into poison is clearly allowed by the spec.
However at the moment we haven't deployed the UB dialect as a really "central" component, and "fold" is, even more than canonicalization, very anchored in the core of MLIR (it's not optional, can't be disabled, etc.).
So folding to poison makes me a bit nervous here, if there were already other folding of the tensor dialect to poison that wouldn't be a problem.
Can we simply skip folding and do nothing
This is the most conservative solution IMO.
You need to add a dependency on UB dialect in cmake file like this. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.If the suggestions have been fixed.
// Test case: tensor.dim(%tensor, %out_of_bound_index) is folded into ub.poison | ||
// CHECK-LABEL: func @dim_out_of_bounds( | ||
// CHECK: ub.poison | ||
// CHECK-NEXT: bufferization.alloc_tensor |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
check the bufferization.alloc_tensor
's operands like this:
// CHECK: %[[poison.*]]ub.poison
// CHECK-NEXT: bufferization.alloc_tensor(%[[poison]])
✅ With the latest revision this PR passed the Python code formatter. |
✅ With the latest revision this PR passed the C/C++ code formatter. |
@dynamicheart Thanks for fixsed, Please wait ci green,I will merge this. |
@dynamicheart Congratulations on having your first Pull Request (PR) merged into the LLVM Project! Your changes will be combined with recent changes from other authors, then tested Please check whether problems have been caused by your change specifically, as How to do this, and the rest of the post-merge process, is covered in detail here. If your change does cause a problem, it may be reverted, or you can revert it yourself. If you don't get any reports, no action is required from you. Your changes are working as expected, well done! |
fold tensor.dim with out-of-bound index to ub.poison
Fixes: #70183