Skip to content

Commit ec450b1

Browse files
authored
[mlir][xegpu] Allow out-of-bounds writes (#110811)
Relaxes vector.transfer_write lowering to allow out-of-bound writes. This aligns lowering with the current hardware specification which does not update bytes in out-of-bound locations during block stores.
1 parent 17bc959 commit ec450b1

File tree

2 files changed

+22
-18
lines changed

2 files changed

+22
-18
lines changed

mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -218,18 +218,15 @@ struct TransferWriteLowering
218218
if (failed(transferPreconditions(rewriter, writeOp)))
219219
return failure();
220220

221-
if (writeOp.hasOutOfBoundsDim())
222-
return rewriter.notifyMatchFailure(writeOp,
223-
"Unsupported out-of-bounds write");
224221
AffineMap map = writeOp.getPermutationMap();
225222
if (!map.isMinorIdentity())
226223
return rewriter.notifyMatchFailure(writeOp, "Expects identity map");
227224

228225
VectorType vecTy = writeOp.getVectorType();
229-
auto descType =
230-
xegpu::TensorDescType::get(vecTy.getShape(), vecTy.getElementType(),
231-
/*array_length=*/1, /*boundary_check=*/false,
232-
xegpu::MemorySpace::Global);
226+
auto descType = xegpu::TensorDescType::get(
227+
vecTy.getShape(), vecTy.getElementType(),
228+
/*array_length=*/1, /*boundary_check=*/writeOp.hasOutOfBoundsDim(),
229+
xegpu::MemorySpace::Global);
233230
xegpu::CreateNdDescOp ndDesc = createNdDescriptor(
234231
rewriter, loc, descType,
235232
dyn_cast<TypedValue<MemRefType>>(writeOp.getSource()),

mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -66,30 +66,37 @@ func.func @store_dynamic_source(%vec: vector<8x16xf32>,
6666

6767
// -----
6868

69-
func.func @no_store_transposed(%vec: vector<8x16xf32>,
70-
%source: memref<32x64xf32>, %offset: index) {
69+
func.func @store_out_of_bounds(%vec: vector<8x16xf32>,
70+
%source: memref<7x64xf32>, %offset: index) {
7171
vector.transfer_write %vec, %source[%offset, %offset]
72-
{permutation_map = affine_map<(d0, d1) -> (d1, d0)>,
73-
in_bounds = [true, true]}
74-
: vector<8x16xf32>, memref<32x64xf32>
72+
{in_bounds = [false, true]}
73+
: vector<8x16xf32>, memref<7x64xf32>
7574
return
7675
}
7776

78-
// CHECK-LABEL: @no_store_transposed(
79-
// CHECK: vector.transfer_write
77+
// CHECK-LABEL: @store_out_of_bounds(
78+
// CHECK-SAME: %[[VEC:.+]]: vector<8x16xf32>,
79+
// CHECK-SAME: %[[SRC:.+]]: memref<7x64xf32>,
80+
// CHECK-SAME: %[[OFFSET:.+]]: index
81+
// CHECK: %[[DESC:.+]] = xegpu.create_nd_tdesc
82+
// CHECK-SAME: %[[SRC]][%[[OFFSET]], %[[OFFSET]]]
83+
// CHECK-SAME: memref<7x64xf32> -> !xegpu.tensor_desc<8x16xf32,
84+
// CHECK-SAME: boundary_check = true
85+
// CHECK: xegpu.store_nd %[[VEC]], %[[DESC]] : vector<8x16xf32>
8086

8187
// -----
8288

83-
func.func @no_store_out_of_bounds(%vec: vector<8x16xf32>,
89+
func.func @no_store_transposed(%vec: vector<8x16xf32>,
8490
%source: memref<32x64xf32>, %offset: index) {
8591
vector.transfer_write %vec, %source[%offset, %offset]
86-
{in_bounds = [false, true]}
92+
{permutation_map = affine_map<(d0, d1) -> (d1, d0)>,
93+
in_bounds = [true, true]}
8794
: vector<8x16xf32>, memref<32x64xf32>
8895
return
8996
}
9097

91-
// CHECK-LABEL: @no_store_out_of_bounds(
92-
// CHECK: vector.transfer_write
98+
// CHECK-LABEL: @no_store_transposed(
99+
// CHECK: vector.transfer_write
93100

94101
// -----
95102

0 commit comments

Comments
 (0)