Skip to content

Commit d9d6233

Browse files
authored
[mlir][linalg] Add a new helper hook: hasVectorizationImpl (#110708)
The newly added hook simply returns `false` for Ops for which there's no "vectorization logic" in the Linalg Vectorizer (i.e. the `vectorize()` method). It's added so that the following two TD ops expose identical level of functionality (that's not the case ATM): * `transform.structured.vectorize_children_and_apply_patterns` * `transform.structured.vectorize` Specifically, ATM, the former works only for Linalg Ops, while the latter works for all Ops that the vectorizer supports (*). With this change, I am making sure that both TD will behave consistently. Note, this shouldn't affect any of the current uses of the vectorizer. (*) This is implemented via the `vectorize()` method in Vectorization.cpp.
1 parent 992e754 commit d9d6233

File tree

5 files changed

+96
-7
lines changed

5 files changed

+96
-7
lines changed

mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -762,6 +762,14 @@ LogicalResult copyToGPUPrivateMemory(OpBuilder &b, Value src, Value dst);
762762
/// memory is freed when going outside of the scope.
763763
LogicalResult deallocateGPUPrivateMemory(OpBuilder &, Value /*buffer*/);
764764

765+
/// Return true if there's dedicated logic in the Linalg Vectorizer to
766+
/// vectorize this Op, false otherwise.
767+
///
768+
/// Note that this helper merely implements a very high level check and that the
769+
/// vectorizer also requires various additional pre-conditions to be met for it
770+
/// to work (these are checked by the vectorizer itself).
771+
bool hasVectorizationImpl(Operation *);
772+
765773
/// Emit a suitable vector form for an operation. If provided,
766774
/// `inputVectorSizes` are used to vectorize this operation. `inputVectorSizes`
767775
/// must match the rank of the iteration space of the operation and the sizes

mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3416,11 +3416,11 @@ struct VectorizationPattern : public RewritePattern {
34163416
flatten1DDepthwiseConv(flattenConv) {}
34173417
LogicalResult matchAndRewrite(Operation *op,
34183418
PatternRewriter &rewriter) const override {
3419-
LinalgOp linalgOp = dyn_cast<LinalgOp>(op);
3420-
if (!linalgOp)
3421-
return rewriter.notifyMatchFailure(op, "expected Linalg Op");
3422-
return vectorize(rewriter, linalgOp, /*inputVectorSizes=*/{},
3423-
/*scalableVecDims=*/{}, vectorizeNDExtract,
3419+
if (!linalg::hasVectorizationImpl(op))
3420+
return rewriter.notifyMatchFailure(op,
3421+
"Unsupported Op, cannot vectorize");
3422+
return vectorize(rewriter, op, /*inputVectorSizes=*/{},
3423+
/*inputScalableVecDims=*/{}, vectorizeNDExtract,
34243424
flatten1DDepthwiseConv);
34253425
}
34263426

@@ -3501,8 +3501,7 @@ DiagnosedSilenceableFailure transform::VectorizeOp::apply(
35013501

35023502
// TODO: Check that the correct number of vectorSizes was provided.
35033503
for (Operation *target : targets) {
3504-
if (!isa<linalg::LinalgOp, tensor::PadOp, tensor::PackOp, tensor::UnPackOp>(
3505-
target)) {
3504+
if (!linalg::hasVectorizationImpl(target)) {
35063505
return mlir::emitSilenceableFailure(target->getLoc())
35073506
<< "Unsupported Op, cannot vectorize";
35083507
}

mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2083,6 +2083,10 @@ LogicalResult mlir::linalg::vectorizeOpPrecondition(
20832083
Operation *op, ArrayRef<int64_t> inputVectorSizes,
20842084
ArrayRef<bool> inputScalableVecDims, bool vectorizeNDExtract,
20852085
bool flatten1DDepthwiseConv) {
2086+
2087+
if (!hasVectorizationImpl(op))
2088+
return failure();
2089+
20862090
if (failed(vectorizeScalableVectorPrecondition(op, inputVectorSizes,
20872091
inputScalableVecDims)))
20882092
return failure();
@@ -2120,6 +2124,11 @@ static void convertAffineApply(RewriterBase &rewriter, LinalgOp linalgOp) {
21202124
}
21212125
}
21222126

2127+
bool mlir::linalg::hasVectorizationImpl(Operation *op) {
2128+
return isa<linalg::LinalgOp, tensor::PadOp, tensor::PackOp, tensor::UnPackOp>(
2129+
op);
2130+
}
2131+
21232132
/// Emit a suitable vector form for an operation. If provided,
21242133
/// `inputVectorSizes` are used to vectorize this operation.
21252134
/// `inputVectorSizes` must match the rank of the iteration space of the

mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2010,3 +2010,68 @@ module attributes {transform.with_named_sequence} {
20102010
// CHECK: %[[VAL_8:.*]] = vector.transpose %[[VAL_7]], [1, 2, 3, 0] : vector<1x1x12x197xf32> to vector<1x12x197x1xf32>
20112011
// CHECK: %[[VAL_9:.*]] = vector.transfer_write %[[VAL_8]], %[[VAL_3]]{{\[}}%[[VAL_2]], %[[VAL_2]], %[[VAL_2]], %[[VAL_2]]] {in_bounds = [true, true, true, true]} : vector<1x12x197x1xf32>, tensor<1x12x197x1xf32>
20122012
// CHECK: return %[[VAL_9]] : tensor<1x12x197x1xf32>
2013+
2014+
// -----
2015+
2016+
// Input identical as the test in vectorization.mlir. Output is different -
2017+
// vector sizes are inferred (rather than user-specified) and hence _no_
2018+
// masking was used.
2019+
2020+
func.func @test_vectorize_pack(%arg0: tensor<32x8x16xf32>, %arg1: tensor<4x1x32x16x2xf32>) -> tensor<4x1x32x16x2xf32> {
2021+
%pack = tensor.pack %arg0 outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x8x16xf32> -> tensor<4x1x32x16x2xf32>
2022+
return %pack : tensor<4x1x32x16x2xf32>
2023+
}
2024+
2025+
module attributes {transform.with_named_sequence} {
2026+
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
2027+
%0 = transform.structured.match ops{["tensor.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
2028+
%1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op
2029+
%2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op
2030+
transform.yield
2031+
}
2032+
}
2033+
2034+
// CHECK-LABEL: func.func @test_vectorize_pack(
2035+
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x8x16xf32>,
2036+
// CHECK-SAME: %[[VAL_1:.*]]: tensor<4x1x32x16x2xf32>) -> tensor<4x1x32x16x2xf32> {
2037+
// CHECK: %[[VAL_2:.*]] = arith.constant 0.000000e+00 : f32
2038+
// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index
2039+
// CHECK: %[[VAL_4:.*]] = vector.transfer_read %[[VAL_0]]{{\[}}%[[VAL_3]], %[[VAL_3]], %[[VAL_3]]], %[[VAL_2]] {in_bounds = [true, true, true]} : tensor<32x8x16xf32>, vector<32x8x16xf32>
2040+
// CHECK: %[[VAL_5:.*]] = vector.shape_cast %[[VAL_4]] : vector<32x8x16xf32> to vector<32x4x2x1x16xf32>
2041+
// CHECK: %[[VAL_6:.*]] = vector.transpose %[[VAL_5]], [1, 3, 0, 4, 2] : vector<32x4x2x1x16xf32> to vector<4x1x32x16x2xf32>
2042+
// CHECK: %[[VAL_7:.*]] = tensor.empty() : tensor<4x1x32x16x2xf32>
2043+
// CHECK: %[[VAL_8:.*]] = vector.transfer_write %[[VAL_6]], %[[VAL_7]]{{\[}}%[[VAL_3]], %[[VAL_3]], %[[VAL_3]], %[[VAL_3]], %[[VAL_3]]] {in_bounds = [true, true, true, true, true]} : vector<4x1x32x16x2xf32>, tensor<4x1x32x16x2xf32>
2044+
// CHECK: return %[[VAL_8]] : tensor<4x1x32x16x2xf32>
2045+
2046+
// -----
2047+
2048+
// Input identical as the test in vectorization.mlir. Output is different -
2049+
// vector sizes are inferred (rather than user-specified) and hence _no_
2050+
// masking was used.
2051+
2052+
func.func @test_vectorize_padded_pack(%arg0: tensor<32x7x15xf32>, %arg1: tensor<32x4x1x16x2xf32>) -> tensor<32x4x1x16x2xf32> {
2053+
%pad = arith.constant 0.000000e+00 : f32
2054+
%pack = tensor.pack %arg0 padding_value(%pad : f32) inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x7x15xf32> -> tensor<32x4x1x16x2xf32>
2055+
return %pack : tensor<32x4x1x16x2xf32>
2056+
}
2057+
2058+
// CHECK-LABEL: func.func @test_vectorize_padded_pack(
2059+
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x7x15xf32>,
2060+
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x4x1x16x2xf32>) -> tensor<32x4x1x16x2xf32> {
2061+
// CHECK: %[[VAL_2:.*]] = arith.constant 0.000000e+00 : f32
2062+
// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index
2063+
// CHECK: %[[VAL_4:.*]] = vector.transfer_read %[[VAL_0]]{{\[}}%[[VAL_3]], %[[VAL_3]], %[[VAL_3]]], %[[VAL_2]] {in_bounds = [true, false, false]} : tensor<32x7x15xf32>, vector<32x8x16xf32>
2064+
// CHECK: %[[VAL_5:.*]] = vector.shape_cast %[[VAL_4]] : vector<32x8x16xf32> to vector<32x4x2x1x16xf32>
2065+
// CHECK: %[[VAL_6:.*]] = vector.transpose %[[VAL_5]], [0, 1, 3, 4, 2] : vector<32x4x2x1x16xf32> to vector<32x4x1x16x2xf32>
2066+
// CHECK: %[[VAL_7:.*]] = tensor.empty() : tensor<32x4x1x16x2xf32>
2067+
// CHECK: %[[VAL_8:.*]] = vector.transfer_write %[[VAL_6]], %[[VAL_7]]{{\[}}%[[VAL_3]], %[[VAL_3]], %[[VAL_3]], %[[VAL_3]], %[[VAL_3]]] {in_bounds = [true, true, true, true, true]} : vector<32x4x1x16x2xf32>, tensor<32x4x1x16x2xf32>
2068+
// CHECK: return %[[VAL_8]] : tensor<32x4x1x16x2xf32>
2069+
2070+
module attributes {transform.with_named_sequence} {
2071+
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
2072+
%0 = transform.structured.match ops{["tensor.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
2073+
%1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op
2074+
%2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op
2075+
transform.yield
2076+
}
2077+
}

mlir/test/Dialect/Linalg/vectorization.mlir

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -666,6 +666,10 @@ module attributes {transform.with_named_sequence} {
666666

667667
// -----
668668

669+
// Input identical as the test in vectorization-with-patterns.mlir. Output is
670+
// different - vector sizes are inferred (rather than user-specified) and hence
671+
// masking was used.
672+
669673
func.func @test_vectorize_pack(%arg0: tensor<32x8x16xf32>, %arg1: tensor<4x1x32x16x2xf32>) -> tensor<4x1x32x16x2xf32> {
670674
%pack = tensor.pack %arg0 outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x8x16xf32> -> tensor<4x1x32x16x2xf32>
671675
return %pack : tensor<4x1x32x16x2xf32>
@@ -692,6 +696,10 @@ module attributes {transform.with_named_sequence} {
692696

693697
// -----
694698

699+
// Input identical as the test in vectorization-with-patterns.mlir. Output is
700+
// different - vector sizes are inferred (rather than user-specified) and hence
701+
// masking was used.
702+
695703
func.func @test_vectorize_padded_pack(%arg0: tensor<32x7x15xf32>, %arg1: tensor<32x4x1x16x2xf32>) -> tensor<32x4x1x16x2xf32> {
696704
%pad = arith.constant 0.000000e+00 : f32
697705
%pack = tensor.pack %arg0 padding_value(%pad : f32) inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x7x15xf32> -> tensor<32x4x1x16x2xf32>

0 commit comments

Comments
 (0)