-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[mlir][xegpu] Allow out-of-bounds writes #110811
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[mlir][xegpu] Allow out-of-bounds writes #110811
Conversation
Relaxes vector.transfer_write lowering to allow out-of-bound writes. This aligns lowering with the current hardware specification which does not update bytes in out-of-bound locations.
@llvm/pr-subscribers-mlir @llvm/pr-subscribers-mlir-gpu Author: Adam Siemieniuk (adam-smnk) ChangesRelaxes vector.transfer_write lowering to allow out-of-bound writes. This aligns lowering with the current hardware specification which does not update bytes in out-of-bound locations. Full diff: https://github.com/llvm/llvm-project/pull/110811.diff 2 Files Affected:
diff --git a/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp b/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp
index 0e21e96cc3fbb9..9bfa9f5558081c 100644
--- a/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp
+++ b/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp
@@ -203,18 +203,15 @@ struct TransferWriteLowering
if (failed(transferPreconditions(rewriter, writeOp)))
return failure();
- if (writeOp.hasOutOfBoundsDim())
- return rewriter.notifyMatchFailure(writeOp,
- "Unsupported out-of-bounds write");
AffineMap map = writeOp.getPermutationMap();
if (!map.isMinorIdentity())
return rewriter.notifyMatchFailure(writeOp, "Expects identity map");
VectorType vecTy = writeOp.getVectorType();
- auto descType =
- xegpu::TensorDescType::get(vecTy.getShape(), vecTy.getElementType(),
- /*array_length=*/1, /*boundary_check=*/false,
- xegpu::MemorySpace::Global);
+ auto descType = xegpu::TensorDescType::get(
+ vecTy.getShape(), vecTy.getElementType(),
+ /*array_length=*/1, /*boundary_check=*/writeOp.hasOutOfBoundsDim(),
+ xegpu::MemorySpace::Global);
xegpu::CreateNdDescOp ndDesc = createNdDescriptor(
rewriter, loc, descType,
dyn_cast<TypedValue<MemRefType>>(writeOp.getSource()),
diff --git a/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir
index 361919c47b097d..076760fe21dc86 100644
--- a/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir
+++ b/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir
@@ -66,30 +66,37 @@ func.func @store_dynamic_source(%vec: vector<8x16xf32>,
// -----
-func.func @no_store_transposed(%vec: vector<8x16xf32>,
- %source: memref<32x64xf32>, %offset: index) {
+func.func @store_out_of_bounds(%vec: vector<8x16xf32>,
+ %source: memref<7x64xf32>, %offset: index) {
vector.transfer_write %vec, %source[%offset, %offset]
- {permutation_map = affine_map<(d0, d1) -> (d1, d0)>,
- in_bounds = [true, true]}
- : vector<8x16xf32>, memref<32x64xf32>
+ {in_bounds = [false, true]}
+ : vector<8x16xf32>, memref<7x64xf32>
return
}
-// CHECK-LABEL: @no_store_transposed(
-// CHECK: vector.transfer_write
+// CHECK-LABEL: @store_out_of_bounds(
+// CHECK-SAME: %[[VEC:.+]]: vector<8x16xf32>,
+// CHECK-SAME: %[[SRC:.+]]: memref<7x64xf32>,
+// CHECK-SAME: %[[OFFSET:.+]]: index
+// CHECK: %[[DESC:.+]] = xegpu.create_nd_tdesc
+// CHECK-SAME: %[[SRC]][%[[OFFSET]], %[[OFFSET]]]
+// CHECK-SAME: memref<7x64xf32> -> !xegpu.tensor_desc<8x16xf32,
+// CHECK-SAME: boundary_check = true
+// CHECK: xegpu.store_nd %[[VEC]], %[[DESC]] : vector<8x16xf32>
// -----
-func.func @no_store_out_of_bounds(%vec: vector<8x16xf32>,
+func.func @no_store_transposed(%vec: vector<8x16xf32>,
%source: memref<32x64xf32>, %offset: index) {
vector.transfer_write %vec, %source[%offset, %offset]
- {in_bounds = [false, true]}
+ {permutation_map = affine_map<(d0, d1) -> (d1, d0)>,
+ in_bounds = [true, true]}
: vector<8x16xf32>, memref<32x64xf32>
return
}
-// CHECK-LABEL: @no_store_out_of_bounds(
-// CHECK: vector.transfer_write
+// CHECK-LABEL: @no_store_transposed(
+// CHECK: vector.transfer_write
// -----
|
xegpu::MemorySpace::Global); | ||
auto descType = xegpu::TensorDescType::get( | ||
vecTy.getShape(), vecTy.getElementType(), | ||
/*array_length=*/1, /*boundary_check=*/writeOp.hasOutOfBoundsDim(), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should be aware that, there is not boundary check for 1D load/store
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point, I'll open a separate PR that adds extra validation to both read and write on 1D shapes.
Relaxes vector.transfer_write lowering to allow out-of-bound writes.
This aligns lowering with the current hardware specification which does not update bytes in out-of-bound locations.