-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[mlir][xegpu] Support boundary checks only for block instructions #119380
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Constrains Vector lowering to apply boundary checks only to data transfers operating on block shapes. This further aligns lowering with the current Xe instructions' restrictions.
@llvm/pr-subscribers-mlir-gpu Author: Adam Siemieniuk (adam-smnk) ChangesConstrains Vector lowering to apply boundary checks only to data transfers operating on block shapes. This further aligns lowering with the current Xe instructions' restrictions. Full diff: https://github.com/llvm/llvm-project/pull/119380.diff 5 Files Affected:
diff --git a/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp b/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp
index 215e1b1b874520..1232d8795d4dcf 100644
--- a/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp
+++ b/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp
@@ -82,6 +82,10 @@ static LogicalResult transferPreconditions(PatternRewriter &rewriter,
xferOp, "Buffer must be contiguous in the innermost dimension");
unsigned vecRank = vecTy.getRank();
+ if (xferOp.hasOutOfBoundsDim() && vecRank < 2)
+ return rewriter.notifyMatchFailure(
+ xferOp, "Boundary check is available only for block instructions.");
+
AffineMap map = xferOp.getPermutationMap();
if (!map.isProjectedPermutation(/*allowZeroInResults=*/false))
return rewriter.notifyMatchFailure(xferOp, "Unsupported permutation map");
@@ -255,9 +259,12 @@ struct LoadLowering : public OpRewritePattern<vector::LoadOp> {
if (failed(storeLoadPreconditions(rewriter, loadOp, vecTy)))
return failure();
+ // Boundary check is available only for block instructions.
+ bool boundaryCheck = vecTy.getRank() > 1;
+
auto descType = xegpu::TensorDescType::get(
vecTy.getShape(), vecTy.getElementType(), /*array_length=*/1,
- /*boundary_check=*/true, xegpu::MemorySpace::Global);
+ boundaryCheck, xegpu::MemorySpace::Global);
xegpu::CreateNdDescOp ndDesc = createNdDescriptor(
rewriter, loc, descType, loadOp.getBase(), loadOp.getIndices());
@@ -285,10 +292,12 @@ struct StoreLowering : public OpRewritePattern<vector::StoreOp> {
if (failed(storeLoadPreconditions(rewriter, storeOp, vecTy)))
return failure();
- auto descType =
- xegpu::TensorDescType::get(vecTy.getShape(), vecTy.getElementType(),
- /*array_length=*/1, /*boundary_check=*/true,
- xegpu::MemorySpace::Global);
+ // Boundary check is available only for block instructions.
+ bool boundaryCheck = vecTy.getRank() > 1;
+
+ auto descType = xegpu::TensorDescType::get(
+ vecTy.getShape(), vecTy.getElementType(),
+ /*array_length=*/1, boundaryCheck, xegpu::MemorySpace::Global);
xegpu::CreateNdDescOp ndDesc = createNdDescriptor(
rewriter, loc, descType, storeOp.getBase(), storeOp.getIndices());
diff --git a/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir
index e2a506f8ad5abd..7cef17df79dd2a 100644
--- a/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir
+++ b/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir
@@ -12,7 +12,7 @@ func.func @load_1D_vector(%source: memref<8x16x32xf32>, %offset: index) -> vecto
// CHECK: %[[DESC:.+]] = xegpu.create_nd_tdesc
// CHECK-SAME: %[[SRC]][%[[OFFSET]], %[[OFFSET]], %[[OFFSET]]]
// CHECK-SAME: memref<8x16x32xf32> -> !xegpu.tensor_desc<8xf32,
-// CHECK-SAME: boundary_check = true
+// CHECK-SAME: boundary_check = false
// CHECK: %[[VEC:.+]] = xegpu.load_nd %[[DESC]]{{.*}}-> vector<8xf32>
// CHECK: return %[[VEC]]
diff --git a/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir
index 3d45407c2486d6..4f069ebc39db37 100644
--- a/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir
+++ b/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir
@@ -14,7 +14,7 @@ func.func @store_1D_vector(%vec: vector<8xf32>,
// CHECK: %[[DESC:.+]] = xegpu.create_nd_tdesc
// CHECK-SAME: %[[SRC]][%[[OFFSET]], %[[OFFSET]], %[[OFFSET]]]
// CHECK-SAME: memref<8x16x32xf32> -> !xegpu.tensor_desc<8xf32,
-// CHECK-SAME: boundary_check = true
+// CHECK-SAME: boundary_check = false
// CHECK: xegpu.store_nd %[[VEC]], %[[DESC]] : vector<8xf32>
// -----
diff --git a/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir
index 4841ecbb62e807..497eb86cea8353 100644
--- a/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir
+++ b/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir
@@ -119,6 +119,19 @@ func.func @no_load_out_of_bounds_non_zero_pad(%source: memref<32x64xf32>,
// -----
+func.func @no_load_out_of_bounds_1D_vector(%source: memref<8x16x32xf32>,
+ %offset: index) -> vector<8xf32> {
+ %c0 = arith.constant 0.0 : f32
+ %0 = vector.transfer_read %source[%offset, %offset, %offset], %c0
+ {in_bounds = [false]} : memref<8x16x32xf32>, vector<8xf32>
+ return %0 : vector<8xf32>
+}
+
+// CHECK-LABEL: @no_load_out_of_bounds_1D_vector(
+// CHECK: vector.transfer_read
+
+// -----
+
func.func @no_load_masked(%source : memref<4xf32>,
%offset : index) -> vector<4xf32> {
%c0 = arith.constant 0.0 : f32
diff --git a/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir
index 076760fe21dc86..91e3fb3841f6eb 100644
--- a/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir
+++ b/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir
@@ -164,3 +164,16 @@ func.func @no_store_unsupported_map(%vec: vector<8x16xf32>,
// CHECK-LABEL: @no_store_unsupported_map(
// CHECK: vector.transfer_write
+
+// -----
+
+func.func @no_store_out_of_bounds_1D_vector(%vec: vector<8xf32>,
+ %source: memref<8x16x32xf32>, %offset: index) {
+ vector.transfer_write %vec, %source[%offset, %offset, %offset]
+ {in_bounds = [false]}
+ : vector<8xf32>, memref<8x16x32xf32>
+ return
+}
+
+// CHECK-LABEL: @no_store_out_of_bounds_1D_vector(
+// CHECK: vector.transfer_write
|
@llvm/pr-subscribers-mlir Author: Adam Siemieniuk (adam-smnk) ChangesConstrains Vector lowering to apply boundary checks only to data transfers operating on block shapes. This further aligns lowering with the current Xe instructions' restrictions. Full diff: https://github.com/llvm/llvm-project/pull/119380.diff 5 Files Affected:
diff --git a/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp b/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp
index 215e1b1b874520..1232d8795d4dcf 100644
--- a/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp
+++ b/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp
@@ -82,6 +82,10 @@ static LogicalResult transferPreconditions(PatternRewriter &rewriter,
xferOp, "Buffer must be contiguous in the innermost dimension");
unsigned vecRank = vecTy.getRank();
+ if (xferOp.hasOutOfBoundsDim() && vecRank < 2)
+ return rewriter.notifyMatchFailure(
+ xferOp, "Boundary check is available only for block instructions.");
+
AffineMap map = xferOp.getPermutationMap();
if (!map.isProjectedPermutation(/*allowZeroInResults=*/false))
return rewriter.notifyMatchFailure(xferOp, "Unsupported permutation map");
@@ -255,9 +259,12 @@ struct LoadLowering : public OpRewritePattern<vector::LoadOp> {
if (failed(storeLoadPreconditions(rewriter, loadOp, vecTy)))
return failure();
+ // Boundary check is available only for block instructions.
+ bool boundaryCheck = vecTy.getRank() > 1;
+
auto descType = xegpu::TensorDescType::get(
vecTy.getShape(), vecTy.getElementType(), /*array_length=*/1,
- /*boundary_check=*/true, xegpu::MemorySpace::Global);
+ boundaryCheck, xegpu::MemorySpace::Global);
xegpu::CreateNdDescOp ndDesc = createNdDescriptor(
rewriter, loc, descType, loadOp.getBase(), loadOp.getIndices());
@@ -285,10 +292,12 @@ struct StoreLowering : public OpRewritePattern<vector::StoreOp> {
if (failed(storeLoadPreconditions(rewriter, storeOp, vecTy)))
return failure();
- auto descType =
- xegpu::TensorDescType::get(vecTy.getShape(), vecTy.getElementType(),
- /*array_length=*/1, /*boundary_check=*/true,
- xegpu::MemorySpace::Global);
+ // Boundary check is available only for block instructions.
+ bool boundaryCheck = vecTy.getRank() > 1;
+
+ auto descType = xegpu::TensorDescType::get(
+ vecTy.getShape(), vecTy.getElementType(),
+ /*array_length=*/1, boundaryCheck, xegpu::MemorySpace::Global);
xegpu::CreateNdDescOp ndDesc = createNdDescriptor(
rewriter, loc, descType, storeOp.getBase(), storeOp.getIndices());
diff --git a/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir
index e2a506f8ad5abd..7cef17df79dd2a 100644
--- a/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir
+++ b/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir
@@ -12,7 +12,7 @@ func.func @load_1D_vector(%source: memref<8x16x32xf32>, %offset: index) -> vecto
// CHECK: %[[DESC:.+]] = xegpu.create_nd_tdesc
// CHECK-SAME: %[[SRC]][%[[OFFSET]], %[[OFFSET]], %[[OFFSET]]]
// CHECK-SAME: memref<8x16x32xf32> -> !xegpu.tensor_desc<8xf32,
-// CHECK-SAME: boundary_check = true
+// CHECK-SAME: boundary_check = false
// CHECK: %[[VEC:.+]] = xegpu.load_nd %[[DESC]]{{.*}}-> vector<8xf32>
// CHECK: return %[[VEC]]
diff --git a/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir
index 3d45407c2486d6..4f069ebc39db37 100644
--- a/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir
+++ b/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir
@@ -14,7 +14,7 @@ func.func @store_1D_vector(%vec: vector<8xf32>,
// CHECK: %[[DESC:.+]] = xegpu.create_nd_tdesc
// CHECK-SAME: %[[SRC]][%[[OFFSET]], %[[OFFSET]], %[[OFFSET]]]
// CHECK-SAME: memref<8x16x32xf32> -> !xegpu.tensor_desc<8xf32,
-// CHECK-SAME: boundary_check = true
+// CHECK-SAME: boundary_check = false
// CHECK: xegpu.store_nd %[[VEC]], %[[DESC]] : vector<8xf32>
// -----
diff --git a/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir
index 4841ecbb62e807..497eb86cea8353 100644
--- a/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir
+++ b/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir
@@ -119,6 +119,19 @@ func.func @no_load_out_of_bounds_non_zero_pad(%source: memref<32x64xf32>,
// -----
+func.func @no_load_out_of_bounds_1D_vector(%source: memref<8x16x32xf32>,
+ %offset: index) -> vector<8xf32> {
+ %c0 = arith.constant 0.0 : f32
+ %0 = vector.transfer_read %source[%offset, %offset, %offset], %c0
+ {in_bounds = [false]} : memref<8x16x32xf32>, vector<8xf32>
+ return %0 : vector<8xf32>
+}
+
+// CHECK-LABEL: @no_load_out_of_bounds_1D_vector(
+// CHECK: vector.transfer_read
+
+// -----
+
func.func @no_load_masked(%source : memref<4xf32>,
%offset : index) -> vector<4xf32> {
%c0 = arith.constant 0.0 : f32
diff --git a/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir
index 076760fe21dc86..91e3fb3841f6eb 100644
--- a/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir
+++ b/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir
@@ -164,3 +164,16 @@ func.func @no_store_unsupported_map(%vec: vector<8x16xf32>,
// CHECK-LABEL: @no_store_unsupported_map(
// CHECK: vector.transfer_write
+
+// -----
+
+func.func @no_store_out_of_bounds_1D_vector(%vec: vector<8xf32>,
+ %source: memref<8x16x32xf32>, %offset: index) {
+ vector.transfer_write %vec, %source[%offset, %offset, %offset]
+ {in_bounds = [false]}
+ : vector<8xf32>, memref<8x16x32xf32>
+ return
+}
+
+// CHECK-LABEL: @no_store_out_of_bounds_1D_vector(
+// CHECK: vector.transfer_write
|
Follow up on #110811 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Thanks
Constrains Vector lowering to apply boundary checks only to data transfers operating on block shapes.
This further aligns lowering with the current Xe instructions' restrictions.