-
Notifications
You must be signed in to change notification settings - Fork 13.9k
[mlir][vector] Restrict DropInnerMostUnitDimsTransferRead #94904
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[mlir][vector] Restrict DropInnerMostUnitDimsTransferRead #94904
Conversation
@llvm/pr-subscribers-mlir-vector @llvm/pr-subscribers-mlir Author: Andrzej Warzyński (banach-space) Changes
Full diff: https://github.com/llvm/llvm-project/pull/94904.diff 2 Files Affected:
diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
index f29eba90c3ceb..caf1506e0db93 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
@@ -1293,6 +1293,21 @@ class DropInnerMostUnitDimsTransferRead
if (dimsToDrop == 0)
return failure();
+ // Make sure that the indixes to be dropped are equal 0.
+ // TODO: Deal with cases when the indices are not 0.
+ auto isZeroIdx = [](Value idx) {
+ Attribute attr;
+ APInt value;
+ if (!matchPattern(idx, m_Constant(&attr)))
+ return false;
+ if (matchPattern(attr, m_ConstantInt(&value)))
+ if (!value.isZero())
+ return false;
+ return true;
+ };
+ if (!llvm::all_of(readOp.getIndices().take_back(dimsToDrop), isZeroIdx))
+ return failure();
+
auto resultTargetVecType =
VectorType::get(targetType.getShape().drop_back(dimsToDrop),
targetType.getElementType(),
diff --git a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
index b4cb640108bae..96151076330cd 100644
--- a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
+++ b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
@@ -1,12 +1,17 @@
// RUN: mlir-opt %s -test-vector-transfer-collapse-inner-most-dims -split-input-file | FileCheck %s
-func.func @contiguous_inner_most_view(%in: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x8x1xf32>{
+//-----------------------------------------------------------------------------
+// 1. vector.transfer_read
+//-----------------------------------------------------------------------------
+
+func.func @contiguous_inner_most(%in: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x8x1xf32>{
%c0 = arith.constant 0 : index
%cst = arith.constant 0.0 : f32
%0 = vector.transfer_read %in[%c0, %c0, %c0, %c0], %cst {in_bounds = [true, true, true]} : memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x8x1xf32>
return %0 : vector<1x8x1xf32>
}
-// CHECK: func @contiguous_inner_most_view(%[[SRC:.+]]: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>
+
+// CHECK: func @contiguous_inner_most(%[[SRC:.+]]: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>
// CHECK: %[[SRC_0:.+]] = memref.subview %[[SRC]]
// CHECK-SAME: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>> to memref<1x1x8xf32, strided<[3072, 8, 1], offset: ?>>
// CHECK: %[[VEC:.+]] = vector.transfer_read %[[SRC_0]]
@@ -14,15 +19,61 @@ func.func @contiguous_inner_most_view(%in: memref<1x1x8x1xf32, strided<[3072, 8,
// CHECK: %[[RESULT:.+]] = vector.shape_cast %[[VEC]]
// CHECK: return %[[RESULT]]
+// Same as the top example within this split, but with the inner vector
+// dim scalable. Note that this example only makes sense when "8 = [8]" (i.e.
+// vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
+
+func.func @contiguous_inner_most_scalable_inner_dim(%in: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x[8]x1xf32>{
+ %c0 = arith.constant 0 : index
+ %cst = arith.constant 0.0 : f32
+ %0 = vector.transfer_read %in[%c0, %c0, %c0, %c0], %cst {in_bounds = [true, true, true]} : memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x[8]x1xf32>
+ return %0 : vector<1x[8]x1xf32>
+}
+
+// CHECK: func @contiguous_inner_most_scalable_inner_dim(%[[SRC:.+]]: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>
+// CHECK: %[[SRC_0:.+]] = memref.subview %[[SRC]]
+// CHECK-SAME: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>> to memref<1x1x8xf32, strided<[3072, 8, 1], offset: ?>>
+// CHECK: %[[VEC:.+]] = vector.transfer_read %[[SRC_0]]
+// CHECK-SAME: memref<1x1x8xf32, strided<[3072, 8, 1], offset: ?>>, vector<1x[8]xf32>
+// CHECK: %[[RESULT:.+]] = vector.shape_cast %[[VEC]]
+// CHECK: return %[[RESULT]]
+
+// Same as the top example within this split, but the trailing unit dim was
+// replaced with a dyn dim - not supported
+
+func.func @non_unit_trailing_dim(%in: memref<1x1x8x?xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x8x1xf32>{
+ %c0 = arith.constant 0 : index
+ %cst = arith.constant 0.0 : f32
+ %0 = vector.transfer_read %in[%c0, %c0, %c0, %c0], %cst {in_bounds = [true, true, true]} : memref<1x1x8x?xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x8x1xf32>
+ return %0 : vector<1x8x1xf32>
+}
+
+// CHECK-LABEL: func @non_unit_trailing_dim
+// CHECK-NOT: memref.subview
+// CHECK-NOT: vector.shape_cast
+
+// Same as the top example within this split, but with a scalable unit dim in
+// the output vector - not supported
+
+func.func @negative_scalable_unit_dim(%in: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x8x[1]xf32>{
+ %c0 = arith.constant 0 : index
+ %cst = arith.constant 0.0 : f32
+ %0 = vector.transfer_read %in[%c0, %c0, %c0, %c0], %cst {in_bounds = [true, true, true]} : memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x8x[1]xf32>
+ return %0 : vector<1x8x[1]xf32>
+}
+// CHECK-LABEL: func @negative_scalable_unit_dim
+// CHECK-NOT: memref.subview
+// CHECK-NOT: vector.shape_cast
+
// -----
-func.func @contiguous_outer_dyn_inner_most_view(%a: index, %b: index, %memref: memref<?x?x8x1xf32>) -> vector<8x1xf32> {
+func.func @contiguous_inner_most_dynamic_outer(%a: index, %b: index, %memref: memref<?x?x8x1xf32>) -> vector<8x1xf32> {
%c0 = arith.constant 0 : index
%pad = arith.constant 0.0 : f32
%v = vector.transfer_read %memref[%a, %b, %c0, %c0], %pad {in_bounds = [true, true]} : memref<?x?x8x1xf32>, vector<8x1xf32>
return %v : vector<8x1xf32>
}
-// CHECK: func.func @contiguous_outer_dyn_inner_most_view(
+// CHECK: func.func @contiguous_inner_most_dynamic_outer
// CHECK-SAME: %[[IDX0:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[IDX1:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[SRC:[a-zA-Z0-9]+]]
@@ -38,71 +89,171 @@ func.func @contiguous_outer_dyn_inner_most_view(%a: index, %b: index, %memref: m
// CHECK: %[[RESULT:.+]] = vector.shape_cast %[[VEC]]
// CHECK: return %[[RESULT]]
+// Same as the top example within this split, but with the outer vector
+// dim scalable. Note that this example only makes sense when "8 = [8]" (i.e.
+// vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
+
+func.func @contiguous_inner_most_outer_dim_dyn_scalable_inner_dim(%a: index, %b: index, %memref: memref<?x?x8x1xf32>) -> vector<[8]x1xf32> {
+ %c0 = arith.constant 0 : index
+ %pad = arith.constant 0.0 : f32
+ %v = vector.transfer_read %memref[%a, %b, %c0, %c0], %pad {in_bounds = [true, true]} : memref<?x?x8x1xf32>, vector<[8]x1xf32>
+ return %v : vector<[8]x1xf32>
+}
+// CHECK-LABEL: func @contiguous_inner_most_outer_dim_dyn_scalable_inner_dim
+// CHECK-SAME: %[[IDX0:[a-zA-Z0-9]+]]
+// CHECK-SAME: %[[IDX1:[a-zA-Z0-9]+]]
+// CHECK-SAME: %[[SRC:[a-zA-Z0-9]+]]
+// CHECK: %[[VIEW:.+]] = memref.subview %[[SRC]]{{.*}} memref<?x?x8x1xf32> to memref<?x?x8xf32, strided<[?, 8, 1], offset: ?>>
+// CHECK: %[[VEC_READ:.+]] = vector.transfer_read %[[VIEW]]
+// CHECK-SAME: {in_bounds = [true]}
+// CHECK-SAME: memref<?x?x8xf32, strided<[?, 8, 1], offset: ?>>, vector<[8]xf32>
+// CHECK: vector.shape_cast %[[VEC_READ]]
+
// -----
-func.func @contiguous_inner_most_dim(%A: memref<16x1xf32>, %i:index, %j:index) -> (vector<8x1xf32>) {
+func.func @contiguous_inner_most_dim_non_zero_idx(%A: memref<16x1xf32>, %i:index) -> (vector<8x1xf32>) {
%c0 = arith.constant 0 : index
%f0 = arith.constant 0.0 : f32
- %1 = vector.transfer_read %A[%i, %j], %f0 : memref<16x1xf32>, vector<8x1xf32>
+ %1 = vector.transfer_read %A[%i, %c0], %f0 : memref<16x1xf32>, vector<8x1xf32>
return %1 : vector<8x1xf32>
}
-// CHECK: func @contiguous_inner_most_dim(%[[SRC:.+]]: memref<16x1xf32>, %[[I:.+]]: index, %[[J:.+]]: index) -> vector<8x1xf32>
+// CHECK: func @contiguous_inner_most_dim_non_zero_idx(%[[SRC:.+]]: memref<16x1xf32>, %[[I:.+]]: index, %[[J:.+]]: index) -> vector<8x1xf32>
// CHECK: %[[SRC_0:.+]] = memref.subview %[[SRC]]
// CHECK-SAME: memref<16x1xf32> to memref<16xf32, strided<[1]>>
// CHECK: %[[V:.+]] = vector.transfer_read %[[SRC_0]]
-// CHECK: %[[RESULT]] = vector.shape_cast %[[V]] : vector<8xf32> to vector<8x1xf32>
+// CHECK: %[[RESULT:.+]] = vector.shape_cast %[[V]] : vector<8xf32> to vector<8x1xf32>
// CHECK: return %[[RESULT]]
+// The index to be dropped is != 0 - this is currently not supported.
+func.func @negative_contiguous_inner_most_dim_non_zero_idxs(%A: memref<16x1xf32>, %i:index) -> (vector<8x1xf32>) {
+ %f0 = arith.constant 0.0 : f32
+ %1 = vector.transfer_read %A[%i, %i], %f0 : memref<16x1xf32>, vector<8x1xf32>
+ return %1 : vector<8x1xf32>
+}
+// CHECK-LABEL: func @negative_contiguous_inner_most_dim_non_zero_idxs
+// CHECK-NOT: memref.subview
+// CHECK: vector.transfer_read
+
+// Same as the top example within this split, but with the outer vector
+// dim scalable. Note that this example only makes sense when "8 = [8]" (i.e.
+// vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
+
+func.func @contiguous_inner_most_dim_non_zero_idx_scalable_inner_dim(%A: memref<16x1xf32>, %i:index) -> (vector<[8]x1xf32>) {
+ %c0 = arith.constant 0 : index
+ %f0 = arith.constant 0.0 : f32
+ %1 = vector.transfer_read %A[%i, %c0], %f0 : memref<16x1xf32>, vector<[8]x1xf32>
+ return %1 : vector<[8]x1xf32>
+}
+// CHECK-LABEL: func @contiguous_inner_most_dim_non_zero_idxs_scalable_inner_dim(
+// CHECK-SAME: %[[SRC:.+]]: memref<16x1xf32>, %[[I:.+]]: index, %[[J:.+]]: index) -> vector<[8]x1xf32>
+// CHECK: %[[SRC_0:.+]] = memref.subview %[[SRC]]
+// CHECK-SAME: memref<16x1xf32> to memref<16xf32, strided<[1]>>
+// CHECK: %[[V:.+]] = vector.transfer_read %[[SRC_0]]
+// CHECK: %[[RESULT:.+]] = vector.shape_cast %[[V]] : vector<[8]xf32> to vector<[8]x1xf32>
+// CHECK: return %[[RESULT]]
+
// -----
-func.func @contiguous_inner_most_dim_bounds(%A: memref<1000x1xf32>, %i:index, %ii:index) -> (vector<4x1xf32>) {
+func.func @contiguous_inner_most_dim_with_subview(%A: memref<1000x1xf32>, %i:index, %ii:index) -> (vector<4x1xf32>) {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.0 : f32
%0 = memref.subview %A[%i, 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>>
%1 = vector.transfer_read %0[%ii, %c0], %cst {in_bounds = [true, true]} : memref<40x1xf32, strided<[1, 1], offset: ?>>, vector<4x1xf32>
return %1 : vector<4x1xf32>
}
-// CHECK: func @contiguous_inner_most_dim_bounds(%[[SRC:.+]]: memref<1000x1xf32>, %[[II:.+]]: index, %[[J:.+]]: index) -> vector<4x1xf32>
+// CHECK: func @contiguous_inner_most_dim_with_subview(%[[SRC:.+]]: memref<1000x1xf32>, %[[II:.+]]: index, %[[J:.+]]: index) -> vector<4x1xf32>
// CHECK: %[[SRC_0:.+]] = memref.subview %[[SRC]]
// CHECK: %[[SRC_1:.+]] = memref.subview %[[SRC_0]]
// CHECK: %[[V:.+]] = vector.transfer_read %[[SRC_1]]
// CHECK-SAME: {in_bounds = [true]}
// CHECK-SAME: vector<4xf32>
+// Same as the top example within this split, but with the outer vector
+// dim scalable. Note that this example only makes sense when "4 = [4]" (i.e.
+// vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
+
+func.func @contiguous_inner_most_dim_with_subview_scalable_inner_dim(%A: memref<1000x1xf32>, %i:index, %ii:index) -> (vector<[4]x1xf32>) {
+ %c0 = arith.constant 0 : index
+ %cst = arith.constant 0.0 : f32
+ %0 = memref.subview %A[%i, 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>>
+ %1 = vector.transfer_read %0[%ii, %c0], %cst {in_bounds = [true, true]} : memref<40x1xf32, strided<[1, 1], offset: ?>>, vector<[4]x1xf32>
+ return %1 : vector<[4]x1xf32>
+}
+// CHECK-LABEL: func @contiguous_inner_most_dim_with_subview_scalable_inner_dim
+// CHECK-SAME: %[[SRC:.+]]: memref<1000x1xf32>
+// CHECK: %[[SRC_0:.+]] = memref.subview %[[SRC]]
+// CHECK: %[[V:.+]] = vector.transfer_read %[[SRC_0]]
+// CHECK-SAME: {in_bounds = [true]}
+// CHECK-SAME: vector<[4]xf32>
+
// -----
-func.func @contiguous_inner_most_dim_bounds_2d(%A: memref<1000x1x1xf32>, %i:index, %ii:index) -> (vector<4x1x1xf32>) {
+func.func @contiguous_inner_most_dim_with_subview_2d(%A: memref<1000x1x1xf32>, %i:index, %ii:index) -> (vector<4x1x1xf32>) {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.0 : f32
%0 = memref.subview %A[%i, 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>
%1 = vector.transfer_read %0[%ii, %c0, %c0], %cst {in_bounds = [true, true, true]} : memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>, vector<4x1x1xf32>
return %1 : vector<4x1x1xf32>
}
-// CHECK: func @contiguous_inner_most_dim_bounds_2d(%[[SRC:.+]]: memref<1000x1x1xf32>, %[[II:.+]]: index, %[[J:.+]]: index) -> vector<4x1x1xf32>
+// CHECK: func @contiguous_inner_most_dim_with_subview_2d(%[[SRC:.+]]: memref<1000x1x1xf32>, %[[II:.+]]: index, %[[J:.+]]: index) -> vector<4x1x1xf32>
// CHECK: %[[SRC_0:.+]] = memref.subview %[[SRC]]
// CHECK: %[[SRC_1:.+]] = memref.subview %[[SRC_0]]
// CHECK: %[[V:.+]] = vector.transfer_read %[[SRC_1]]
// CHECK-SAME: {in_bounds = [true]}
// CHECK-SAME: vector<4xf32>
+// Same as the top example within this split, but with the outer vector
+// dim scalable. Note that this example only makes sense when "4 = [4]" (i.e.
+// vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
+
+func.func @contiguous_inner_most_dim_with_subview_2d_scalable_inner_dim(%A: memref<1000x1x1xf32>, %i:index, %ii:index) -> (vector<[4]x1x1xf32>) {
+ %c0 = arith.constant 0 : index
+ %cst = arith.constant 0.0 : f32
+ %0 = memref.subview %A[%i, 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>
+ %1 = vector.transfer_read %0[%ii, %c0, %c0], %cst {in_bounds = [true, true, true]} : memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>, vector<[4]x1x1xf32>
+ return %1 : vector<[4]x1x1xf32>
+}
+// CHECK-LABEL: func @contiguous_inner_most_dim_with_subview_2d_scalable_inner_dim(
+// CHECK-SAME: %[[SRC:.+]]: memref<1000x1x1xf32>, %[[II:.+]]: index, %[[J:.+]]: index) -> vector<[4]x1x1xf32>
+// CHECK: %[[SRC_0:.+]] = memref.subview %[[SRC]]
+// CHECK: %[[SRC_1:.+]] = memref.subview %[[SRC_0]]
+// CHECK: %[[V:.+]] = vector.transfer_read %[[SRC_1]]
+// CHECK-SAME: {in_bounds = [true]}
+// CHECK-SAME: vector<[4]xf32>
+// CHECK: vector.shape_cast %[[V]]
+
// -----
-func.func @contiguous_inner_most_dim_out_of_bounds_2d(%arg0: memref<1x1xf32>) -> vector<4x8xf32> {
+// NOTE: This is an out-of-bounds access.
+
+func.func @negative_non_unit_inner_vec_dim(%arg0: memref<4x1xf32>) -> vector<4x8xf32> {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = vector.transfer_read %arg0[%c0, %c0], %cst : memref<1x1xf32>, vector<4x8xf32>
+ %0 = vector.transfer_read %arg0[%c0, %c0], %cst : memref<4x1xf32>, vector<4x8xf32>
return %0 : vector<4x8xf32>
}
-// The inner most unit dim can not be dropped. In this context, we do not
-// generate rank-reduced memref.subview ops.
-// CHECK: func.func @contiguous_inner_most_dim_out_of_bounds_2d
-// CHECK-SAME: %[[SRC:[a-zA-Z0-9]+]]
+// CHECK: func.func @negative_non_unit_inner_vec_dim
+// CHECK-NOT: memref.subview
+// CHECK: vector.transfer_read
+
+// -----
+
+func.func @negative_non_unit_inner_memref_dim(%arg0: memref<4x8xf32>) -> vector<4x1xf32> {
+ %c0 = arith.constant 0 : index
+ %cst = arith.constant 0.000000e+00 : f32
+ %0 = vector.transfer_read %arg0[%c0, %c0], %cst : memref<4x8xf32>, vector<4x1xf32>
+ return %0 : vector<4x1xf32>
+}
+// CHECK: func.func @negative_non_unit_inner_memref_dim
// CHECK-NOT: memref.subview
-// CHECK: %[[READ:.+]] = vector.transfer_read %[[SRC]]
-// CHECK: return %[[READ]] : vector<4x8xf32>
+// CHECK: vector.transfer_read
// -----
+//-----------------------------------------------------------------------------
+// 2. vector.transfer_write
+//-----------------------------------------------------------------------------
+
func.func @drop_two_inner_most_dim_for_transfer_write(%arg0: memref<1x512x16x1x1xf32>, %arg1: vector<1x16x16x1x1xf32>, %arg2: index) {
%c0 = arith.constant 0 : index
vector.transfer_write %arg1, %arg0[%c0, %arg2, %c0, %c0, %c0]
@@ -177,21 +328,6 @@ func.func @non_unit_strides(%arg0: memref<512x16x1xf32, strided<[8192, 16, 4], o
// -----
-func.func @leading_scalable_dimension_transfer_read(%dest : memref<24x1xf32>) -> vector<[4]x1xf32> {
- %c0 = arith.constant 0 : index
- %pad = arith.constant 0.0 : f32
- %0 = vector.transfer_read %dest[%c0, %c0], %pad {in_bounds = [true, true]} : memref<24x1xf32>, vector<[4]x1xf32>
- return %0 : vector<[4]x1xf32>
-}
-// CHECK: func.func @leading_scalable_dimension_transfer_read
-// CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]]
-// CHECK: %[[SUBVIEW:.+]] = memref.subview %[[DEST]][0, 0] [24, 1] [1, 1] : memref<24x1xf32> to memref<24xf32, strided<[1]>>
-// CHECK: %[[READ:.+]] = vector.transfer_read %[[SUBVIEW]]{{.*}} {in_bounds = [true]} : memref<24xf32, strided<[1]>>, vector<[4]xf32>
-// CHECK: %[[CAST:.+]] = vector.shape_cast %[[READ]] : vector<[4]xf32> to vector<[4]x1xf32>
-// CHECK: return %[[CAST]]
-
-// -----
-
// Negative test: [1] (scalable 1) is _not_ a unit dimension.
func.func @trailing_scalable_one_dim_transfer_read(%dest : memref<24x1xf32>) -> vector<4x[1]xf32> {
%c0 = arith.constant 0 : index
|
b0788ca
to
3a27b5b
Compare
auto isZeroIdx = [](Value idx) { | ||
Attribute attr; | ||
APInt value; | ||
if (!matchPattern(idx, m_Constant(&attr))) | ||
return false; | ||
if (matchPattern(attr, m_ConstantInt(&value))) | ||
if (!value.isZero()) | ||
return false; | ||
return true; | ||
}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we use the isZeroIndex
, which is from StaticValueUtils.h
?
llvm-project/mlir/include/mlir/Dialect/Utils/StaticValueUtils.h
Lines 27 to 29 in 1d96e4b
/// Return true if `v` is an IntegerAttr with value `0` of a ConstantIndexOp | |
/// with attribute with value `0`. | |
bool isZeroIndex(OpFoldResult v); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, nice, thanks for the suggestion!
3a27b5b
to
515dd7e
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looking at indices is convenient and correct. I wonder if looking at in_bounds attribute is better here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it intended to not having // -----
between some tests? E.g., line 92, etc.
I think that there are two different (both important) concerns here:
I'm only looking at the former concern. In particular, for this example: func.func @negative_example(%A: memref<16x1xf32>, %i:index, %j:index) -> (vector<8x1xf32>) {
%f0 = arith.constant 0.0 : f32
%1 = vector.transfer_read %A[%i, %j], %f0 : memref<16x1xf32>, vector<8x1xf32>
return %1 : vector<8x1xf32>
} what should be the output after collapsing? This is what you will get today: func.func @negative_example(%arg0: memref<16x1xf32>, %arg1: index, %arg2: index) -> vector<8x1xf32> {
%cst = arith.constant 0.000000e+00 : f32
%subview = memref.subview %arg0[0, 0] [16, 1] [1, 1] : memref<16x1xf32> to memref<16xf32, strided<[1]>>
%0 = vector.transfer_read %subview[%arg1], %cst : memref<16xf32, strided<[1]>>, vector<8xf32>
%1 = vector.shape_cast %0 : vector<8xf32> to vector<8x1xf32>
return %1 : vector<8x1xf32>
} This is not correct - Please let me know if this makes sense - I hope that I'm not missing something important myself 😅 |
That allows me to insert comments like: // Same as the top example within this split, but with the outer vector
// dim scalable. And in general, helps to group "similar" tests together. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The concerns are very valid to me! I was thinking that unit dim is a special case, and think if we can do some code improvement based on it (e.g., having some assumptions). After thinking a while, I don't figure a way. The patch looks good to me.
Restrict `DropInnerMostUnitDimsTransferRead` so that it fails when one of the indices to be dropped could be != 0, e.g. ``` func.func @negative_example(%A: memref<16x1xf32>, %i:index, %j:index) -> (vector<8x1xf32>) { %f0 = arith.constant 0.0 : f32 %1 = vector.transfer_read %A[%i, %j], %f0 : memref<16x1xf32>, vector<8x1xf32> return %1 : vector<8x1xf32> } ``` This is an edge case that could represent an out-of-bounds access, though that will depend on the actual value of `%j`. NOTE: This PR is limited to tests for `vector.transfer_read`. Depends on: llvm#94490, llvm#94604
Switch to using isZeroIndex from StaticValueUtils.h
edf5c34
to
c55adde
Compare
Restrict `DropInnerMostUnitDimsTransferWrite` so that it fails when one of the indices to be dropped could be != 0, e.g. ```mlir func.func @negative_example( %arg0: memref<16x1xf32>, %arg1: vector<8x1xf32>, %idx_1: index, %idx_2: index) { %c0 = arith.constant 0 : index vector.transfer_write %arg1, %arg0[%idx_1, %idx_2] {in_bounds = [true, true]} : vector<8x1xf32>, memref<16x1xf32> return } ``` This is an edge case that could represent an out-of-bounds access, though that will depend on the actual value of `%i`. Importantly, _without this change_ it would be transformed as follows: ```mlir func.func @negative_example( %arg0: memref<16x1xf32>, %arg1: vector<8x1xf32>, %idx_1: index, %idx_2: index) { %subview = memref.subview %arg0[0, 0] [16, 1] [1, 1] : memref<16x1xf32> to memref<16xf32, strided<[1]>> %0 = vector.shape_cast %arg1 : vector<8x1xf32> to vector<8xf32> vector.transfer_write %0, %subview[%idx_1] {in_bounds = [true]} : vector<8xf32>, memref<16xf32, strided<[1]>> return } ``` This is incorrect - `%idx_2` is ignored. Hence the extra restriction to avoid such cases. NOTE: This PR is limited to `vector.transfer_write`. Similar patch for `vector.transfer_read`: llvm#94904
Restrict `DropInnerMostUnitDimsTransferWrite` so that it fails when one of the indices to be dropped could be != 0, e.g. ```mlir func.func @negative_example( %arg0: memref<16x1xf32>, %arg1: vector<8x1xf32>, %idx_1: index, %idx_2: index) { %c0 = arith.constant 0 : index vector.transfer_write %arg1, %arg0[%idx_1, %idx_2] {in_bounds = [true, true]} : vector<8x1xf32>, memref<16x1xf32> return } ``` This is an edge case that could represent an out-of-bounds access, though that will depend on the actual value of `%i`. Importantly, _without this change_ it would be transformed as follows: ```mlir func.func @negative_example( %arg0: memref<16x1xf32>, %arg1: vector<8x1xf32>, %idx_1: index, %idx_2: index) { %subview = memref.subview %arg0[0, 0] [16, 1] [1, 1] : memref<16x1xf32> to memref<16xf32, strided<[1]>> %0 = vector.shape_cast %arg1 : vector<8x1xf32> to vector<8xf32> vector.transfer_write %0, %subview[%idx_1] {in_bounds = [true]} : vector<8xf32>, memref<16xf32, strided<[1]>> return } ``` This is incorrect - `%idx_2` is ignored. Hence the extra restriction to avoid such cases. NOTE: This PR is limited to `vector.transfer_write`. Similar patch for `vector.transfer_read`: llvm#94904
Restrict `DropInnerMostUnitDimsTransferWrite` so that it fails when one of the indices to be dropped could be != 0, e.g. ```mlir func.func @negative_example( %arg0: memref<16x1xf32>, %arg1: vector<8x1xf32>, %idx_1: index, %idx_2: index) { %c0 = arith.constant 0 : index vector.transfer_write %arg1, %arg0[%idx_1, %idx_2] {in_bounds = [true, true]} : vector<8x1xf32>, memref<16x1xf32> return } ``` This is an edge case that could represent an out-of-bounds access, though that will depend on the actual value of `%i`. Importantly, _without this change_ it would be transformed as follows: ```mlir func.func @negative_example( %arg0: memref<16x1xf32>, %arg1: vector<8x1xf32>, %idx_1: index, %idx_2: index) { %subview = memref.subview %arg0[0, 0] [16, 1] [1, 1] : memref<16x1xf32> to memref<16xf32, strided<[1]>> %0 = vector.shape_cast %arg1 : vector<8x1xf32> to vector<8xf32> vector.transfer_write %0, %subview[%idx_1] {in_bounds = [true]} : vector<8xf32>, memref<16xf32, strided<[1]>> return } ``` This is incorrect - `%idx_2` is ignored. Hence the extra restriction to avoid such cases. NOTE: This PR is limited to `vector.transfer_write`. Similar patch for `vector.transfer_read`: llvm#94904
…6218) Restrict `DropInnerMostUnitDimsTransfer{Read|Write}` so that it fails when one of the indices to be dropped could be != 0 and "out of bounds": ```mlir func.func @negative_example(%arg0: memref<16x1xf32>, %arg1: vector<8x1xf32>, %idx_1: index, %idx_2: index) { vector.transfer_write %arg1, %arg0[%idx_1, %idx_2] {in_bounds = [true, false]} : vector<8x1xf32>, memref<16x1xf32> return } ``` This is an edge case that could represent an out-of-bounds access, though that will depend on the actual value of %i. Importantly, without this change it would be transformed as follows: ```mlir func.func @negative_example(%arg0: memref<16x1xf32>, %arg1: vector<8x1xf32>, %arg2: index, %arg3: index) { %subview = memref.subview %arg0[0, 0] [16, 1] [1, 1] : memref<16x1xf32> to memref<16xf32, strided<[1]>> %0 = vector.shape_cast %arg1 : vector<8x1xf32> to vector<8xf32> vector.transfer_write %0, %subview[%arg2] {in_bounds = [true]} : vector<8xf32>, memref<16xf32, strided<[1]>> return } ``` This is incorrect - `%idx_2` is ignored and the "out of bounds" flags is not propagated. Hence the extra restriction to avoid such cases. NOTE: This is a follow-up for: #94904
…vm#96218) Restrict `DropInnerMostUnitDimsTransfer{Read|Write}` so that it fails when one of the indices to be dropped could be != 0 and "out of bounds": ```mlir func.func @negative_example(%arg0: memref<16x1xf32>, %arg1: vector<8x1xf32>, %idx_1: index, %idx_2: index) { vector.transfer_write %arg1, %arg0[%idx_1, %idx_2] {in_bounds = [true, false]} : vector<8x1xf32>, memref<16x1xf32> return } ``` This is an edge case that could represent an out-of-bounds access, though that will depend on the actual value of %i. Importantly, without this change it would be transformed as follows: ```mlir func.func @negative_example(%arg0: memref<16x1xf32>, %arg1: vector<8x1xf32>, %arg2: index, %arg3: index) { %subview = memref.subview %arg0[0, 0] [16, 1] [1, 1] : memref<16x1xf32> to memref<16xf32, strided<[1]>> %0 = vector.shape_cast %arg1 : vector<8x1xf32> to vector<8xf32> vector.transfer_write %0, %subview[%arg2] {in_bounds = [true]} : vector<8xf32>, memref<16xf32, strided<[1]>> return } ``` This is incorrect - `%idx_2` is ignored and the "out of bounds" flags is not propagated. Hence the extra restriction to avoid such cases. NOTE: This is a follow-up for: llvm#94904
Restrict
DropInnerMostUnitDimsTransferRead
so that it fails when oneof the indices to be dropped could be != 0, e.g.
This is an edge case that could represent an out-of-bounds access,
though that will depend on the actual value of
%j
. Importantly, withoutthis change it would be transformed as follows:
This is incorrect -
%arg2
is ignored. Hence the extra restriction to avoidsuch cases.
NOTE: This PR is limited to tests for
vector.transfer_read
.