-
Notifications
You must be signed in to change notification settings - Fork 14.2k
[mlir][linalg] Add vectorization to the e2e test for tensor.unpack #123032
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[mlir][linalg] Add vectorization to the e2e test for tensor.unpack #123032
Conversation
@llvm/pr-subscribers-mlir-linalg @llvm/pr-subscribers-mlir Author: Andrzej Warzyński (banach-space) Changes
Patch is 38.26 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/123032.diff 7 Files Affected:
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
index 1dc700f22c2027..726ce22ac70dc3 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -1723,11 +1723,6 @@ void populateDecomposePadPatterns(RewritePatternSet &patterns);
/// \see rewriteInIm2Col for more details.
void populateConvertConv2DToImg2ColPatterns(RewritePatternSet &patterns);
-/// Populates `patterns` with vectorisation patterns for tensor.insert_slice.
-/// TODO: Avoid having a dedicated `populate{}` for one pattern. Instead, either
-/// expand or merge with other `populate{}`.
-void populateInsertSliceVectorizationPatterns(RewritePatternSet &patterns);
-
/// Populates `patterns` with patterns that vectorize tensor.pad.
/// These patterns are meant to apply in a complementary fashion. Benefits
/// are used to encode a certain ordering of pattern application. To avoid
diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
index 67dd21aafe4fe0..73a52ebc46f15b 100644
--- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
+++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
@@ -265,7 +265,6 @@ void transform::ApplyFoldAddIntoDestPatternsOp::populatePatterns(
void transform::ApplyPadVectorizationPatternsOp::populatePatterns(
RewritePatternSet &patterns) {
linalg::populatePadOpVectorizationPatterns(patterns);
- linalg::populateInsertSliceVectorizationPatterns(patterns);
}
//===----------------------------------------------------------------------===//
@@ -3504,9 +3503,6 @@ transform::VectorizeChildrenAndApplyPatternsOp::applyToOne(
patterns.add<CopyVectorizationPattern>(ctx);
- // Add misc. vectorization patterns (e.g. for tensor.insert_slice)
- linalg::populateInsertSliceVectorizationPatterns(patterns);
-
if (getVectorizePadding()) {
linalg::populatePadOpVectorizationPatterns(patterns);
// This creates an alternative path for lowering tensor.pad - by
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
index 863f2280e46ce6..3c59bcb8d6ecb3 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -59,6 +59,30 @@ vectorizeConvolution(RewriterBase &rewriter, LinalgOp convOp,
ArrayRef<bool> inputVecScalableFlags = {},
bool flatten1DDepthwiseConv = false);
+/// Vectorize tensor::InsertSliceOp with:
+/// * vector::TransferReadOp + vector::TransferWriteOp
+/// The vector sizes are either:
+/// * user-provided in `inputVectorSizes`, or
+/// * inferred from the static dims in the input and output tensors.
+/// Bails out if:
+/// * vector sizes are not user-provided, and
+/// * at least one dim is dynamic (in both the input and output tensors),
+/// bails out.
+///
+/// Before:
+/// !t_in_type = tensor<1x2x3xf32>
+/// !t_out_type = tensor<9x8x7x1x2x3xf32>
+/// !v_type = vector<1x2x3xf32>
+/// %inserted_slice = tensor.insert_slice %src into %dest ... : !t_in_type
+/// into !t_out_type
+/// After:
+/// %read = vector.transfer_read %src[...], %pad ... : !t_in_type, !v_type
+/// %write = vector.transfer_write %read, %dest ... : !v_type, !t_out_type
+static LogicalResult
+vectorizeAsInsertSliceOp(RewriterBase &rewriter, tensor::InsertSliceOp sliceOp,
+ ArrayRef<int64_t> inputVectorSizes,
+ SmallVectorImpl<Value> &newResults);
+
/// Return the unique instance of OpType in `block` if it is indeed unique.
/// Return null if none or more than 1 instances exist.
template <typename OpType>
@@ -1557,6 +1581,7 @@ static LogicalResult
vectorizeAsTensorPackOp(RewriterBase &rewriter, tensor::PackOp packOp,
ArrayRef<int64_t> inputVectorSizes,
SmallVectorImpl<Value> &newResults) {
+ // TODO: Introduce a parent class that will handle the insertion point update.
OpBuilder::InsertionGuard g(rewriter);
rewriter.setInsertionPoint(packOp);
@@ -1633,6 +1658,7 @@ vectorizeAsTensorUnpackOp(RewriterBase &rewriter, tensor::UnPackOp unpackOp,
ArrayRef<int64_t> inputVectorSizes,
SmallVectorImpl<Value> &newResults) {
+ // TODO: Introduce a parent class that will handle the insertion point update.
OpBuilder::InsertionGuard g(rewriter);
rewriter.setInsertionPoint(unpackOp);
@@ -1763,7 +1789,7 @@ vectorizeAsTensorPadOp(RewriterBase &rewriter, tensor::PadOp padOp,
auto padValue = padOp.getConstantPaddingValue();
Location loc = padOp.getLoc();
- // transfer_write_in_bounds(transfer_read_masked(pad_source, pad_value))
+ // TODO: Introduce a parent class that will handle the insertion point update.
OpBuilder::InsertionGuard g(rewriter);
rewriter.setInsertionPoint(padOp);
@@ -1874,6 +1900,15 @@ vectorizeUnPackOpPrecondition(tensor::UnPackOp unpackOp,
return success();
}
+/// Need to check if the inner-tiles are static/constant.
+static LogicalResult
+vectorizeInsertSliceOpPrecondition(tensor::InsertSliceOp sliceOp,
+ ArrayRef<int64_t> inputVectorSizes) {
+
+ // TODO: Move pre-conditions from the vectorization logic
+ return success();
+}
+
static LogicalResult vectorizeLinalgOpPrecondition(
LinalgOp linalgOp, ArrayRef<int64_t> inputVectorSizes,
bool vectorizeNDExtract, bool flatten1DDepthwiseConv) {
@@ -2144,6 +2179,9 @@ LogicalResult mlir::linalg::vectorizeOpPrecondition(
.Case<tensor::UnPackOp>([&](auto unpackOp) {
return vectorizeUnPackOpPrecondition(unpackOp, inputVectorSizes);
})
+ .Case<tensor::InsertSliceOp>([&](auto sliceOp) {
+ return vectorizeInsertSliceOpPrecondition(sliceOp, inputVectorSizes);
+ })
.Default([](auto) { return failure(); });
}
@@ -2163,8 +2201,8 @@ static void convertAffineApply(RewriterBase &rewriter, LinalgOp linalgOp) {
}
bool mlir::linalg::hasVectorizationImpl(Operation *op) {
- return isa<linalg::LinalgOp, tensor::PadOp, tensor::PackOp, tensor::UnPackOp>(
- op);
+ return isa<linalg::LinalgOp, tensor::PadOp, tensor::PackOp, tensor::UnPackOp,
+ tensor::InsertSliceOp>(op);
}
/// Emit a suitable vector form for an operation. If provided,
@@ -2178,6 +2216,7 @@ LogicalResult mlir::linalg::vectorize(RewriterBase &rewriter, Operation *op,
ArrayRef<bool> inputScalableVecDims,
bool vectorizeNDExtract,
bool flatten1DDepthwiseConv) {
+ rewriter.getInsertionPoint();
LDBG("Attempting to vectorize:\n" << *op << "\n");
LDBG("Input vector sizes: ");
LLVM_DEBUG(llvm::interleaveComma(inputVectorSizes, llvm::dbgs()));
@@ -2244,6 +2283,10 @@ LogicalResult mlir::linalg::vectorize(RewriterBase &rewriter, Operation *op,
return vectorizeAsTensorPackOp(rewriter, packOp, inputVectorSizes,
results);
})
+ .Case<tensor::InsertSliceOp>([&](auto sliceOp) {
+ return vectorizeAsInsertSliceOp(rewriter, sliceOp, inputVectorSizes,
+ results);
+ })
.Case<tensor::UnPackOp>([&](auto unpackOp) {
return vectorizeAsTensorUnpackOp(rewriter, unpackOp,
inputVectorSizes, results);
@@ -2583,113 +2626,143 @@ static Value getStaticPadVal(Operation *op) {
return {};
}
-/// Rewrite tensor.insert.slice as a vector.transfer_read +
-/// vector.transfer_write pair. The vector size is inferred from the static
-/// dims in the input and output tensors. If a dim is dynamic in both the input
-/// and output tensors, bails out.
-///
-/// Before:
-/// !t_in_type = tensor<1x2x3xf32>
-/// !t_out_type = tensor<9x8x7x1x2x3xf32>
-/// !v_type = vector<1x2x3xf32>
-/// %inserted_slice = tensor.insert_slice %src into %dest ... : !t_in_type
-/// into !t_out_type
-/// After:
-/// %read = vector.transfer_read %src[...], %pad ... : !t_in_type, !v_type
-/// %write = vector.transfer_write %read, %dest ... : !v_type, !t_out_type
-///
-/// TODO: Support masking
-struct InsertSliceVectorizePattern
- : public OpRewritePattern<tensor::InsertSliceOp> {
- using OpRewritePattern<tensor::InsertSliceOp>::OpRewritePattern;
+static LogicalResult
+vectorizeAsInsertSliceOp(RewriterBase &rewriter, tensor::InsertSliceOp sliceOp,
+ ArrayRef<int64_t> inputVectorSizes,
+ SmallVectorImpl<Value> &newResults) {
+ // TODO: Introduce a parent class that will handle the insertion point update.
+ OpBuilder::InsertionGuard g(rewriter);
+ rewriter.setInsertionPoint(sliceOp);
- LogicalResult matchAndRewrite(tensor::InsertSliceOp sliceOp,
- PatternRewriter &rewriter) const final {
- auto sourceType = sliceOp.getSource().getType();
- if (!VectorType::isValidElementType(sourceType.getElementType()))
- return failure();
+ TypedValue<RankedTensorType> source = sliceOp.getSource();
+ auto sourceType = source.getType();
+ if (!VectorType::isValidElementType(sourceType.getElementType()))
+ return failure();
- auto resultType = sliceOp.getResultType();
-
- // 1. Get the pad value.
- // TransferReadOp requires a scalar padding value. Note that:
- // * for in-bounds access, the value is actually irrelevant.
- // There are 2 cases in which xfer.read accesses are known to be in-bounds:
- // 1. The source shape is static (output vector sizes would be based on
- // the source shape and hence all memory accesses would be in-bounds),
- // 2. Masking is used (output vector sizes would be user-provided, in which
- // case it is assumed that all memory accesses are in-bounds). This
- // remains a TODO.
- //
- // When the value is not known and not needed, use 0. Otherwise, bail out.
- Value padValue = getStaticPadVal(sliceOp);
- bool isOutOfBoundsRead = !sourceType.hasStaticShape();
-
- if (!padValue && isOutOfBoundsRead) {
- LDBG("Failed to get a pad value for out-of-bounds read access\n");
+ auto resultType = sliceOp.getResultType();
+
+ // 1. Get the pad value.
+ // TransferReadOp requires a scalar padding value. Note that:
+ // * for in-bounds access, the value is actually irrelevant.
+ // There are 2 cases in which xfer.read accesses are known to be in-bounds:
+ // 1. The source shape is static (output vector sizes would be based on
+ // the source shape and hence all memory accesses would be in-bounds),
+ // 2. Masking is used (output vector sizes would be user-provided, in which
+ // case it is assumed that all memory accesses are in-bounds). This
+ // remains a TODO.
+ //
+ // When the value is not known and not needed, use 0. Otherwise, bail out.
+ Value padValue = getStaticPadVal(sliceOp);
+ bool isOutOfBoundsRead =
+ !sourceType.hasStaticShape() && inputVectorSizes.empty();
+
+ if (!padValue && isOutOfBoundsRead) {
+ LDBG("Failed to get a pad value for out-of-bounds read access\n");
+ return failure();
+ }
+
+ if (!padValue) {
+ auto elemType = sourceType.getElementType();
+ padValue = rewriter.create<arith::ConstantOp>(
+ sliceOp.getLoc(), elemType, rewriter.getZeroAttr(elemType));
+ }
+
+ // 2. Get the vector shape and in-bounds attributes
+ SmallVector<int64_t> vecShape;
+ SmallVector<bool> readInBounds;
+ SmallVector<bool> writeInBounds;
+ size_t rankDiff = resultType.getRank() - sourceType.getRank();
+ for (unsigned i = 0; i < sourceType.getRank(); ++i) {
+ if (!inputVectorSizes.empty()) {
+ vecShape.push_back(inputVectorSizes[i]);
+ readInBounds.push_back(false);
+ writeInBounds.push_back(false);
+ } else if (!sourceType.isDynamicDim(i)) {
+ vecShape.push_back(sourceType.getDimSize(i));
+ // Source shape is statically known: Neither read nor write are
+ // out-of-bounds.
+ readInBounds.push_back(true);
+ writeInBounds.push_back(true);
+ } else if (!resultType.isDynamicDim(i)) {
+ // Source shape is not statically known, but result shape is.
+ // Vectorize with size of result shape. This may be larger than the
+ // source size.
+ // FIXME: Using rankDiff implies that the source tensor is inserted at
+ // the end of the destination tensor. However, that's not required.
+ vecShape.push_back(resultType.getDimSize(rankDiff + i));
+ // Read may be out-of-bounds because the result size could be larger
+ // than the source size.
+ readInBounds.push_back(false);
+ // Write will be in-bounds provided that the corresponding write idx is 0.
+ // To keep this logic simple, conservatively mark as out-of-bounds.
+ writeInBounds.push_back(false);
+ } else {
+ // Neither source nor result dim of padOp is static. Cannot vectorize
+ // the copy.
+ // TODO: Add support for masking
return failure();
}
+ }
+ auto vecType = VectorType::get(vecShape, sourceType.getElementType());
+
+ // 3. Generate TransferReadOp + TransferWriteOp
+ ReifiedRankedShapedTypeDims reifiedSrcSizes;
+ Value maskOp;
- if (!padValue) {
- auto elemType = sourceType.getElementType();
- padValue = rewriter.create<arith::ConstantOp>(
- sliceOp.getLoc(), elemType, rewriter.getZeroAttr(elemType));
+ // If vector sizes are user provided, make sure to mask. First, generate the
+ // mask.
+ if (!inputVectorSizes.empty()) {
+ auto *srcDefOp = source.getDefiningOp();
+ if (!srcDefOp) {
+ LDBG("Unable to get the defining Op of " << sliceOp);
+ return failure();
}
- // 2. Get the vector shape and in-bounds attributes
- SmallVector<int64_t> vecShape;
- SmallVector<bool> readInBounds;
- SmallVector<bool> writeInBounds;
- size_t rankDiff = resultType.getRank() - sourceType.getRank();
- for (unsigned i = 0; i < sourceType.getRank(); ++i) {
- if (!sourceType.isDynamicDim(i)) {
- vecShape.push_back(sourceType.getDimSize(i));
- // Source shape is statically known: Neither read nor write are
- // out-of-bounds.
- readInBounds.push_back(true);
- writeInBounds.push_back(true);
- } else if (!resultType.isDynamicDim(i)) {
- // Source shape is not statically known, but result shape is.
- // Vectorize with size of result shape. This may be larger than the
- // source size.
- // FIXME: Using rankDiff implies that the source tensor is inserted at
- // the end of the destination tensor. However, that's not required.
- vecShape.push_back(resultType.getDimSize(rankDiff + i));
- // Read may be out-of-bounds because the result size could be larger
- // than the source size.
- readInBounds.push_back(false);
- // Write will in-bounds provided that the corresponding write idx is 0.
- // To keep this logic simple, conservatively mark as out-of-bounds.
- writeInBounds.push_back(false);
- } else {
- // Neither source nor result dim of padOp is static. Cannot vectorize
- // the copy.
- // TODO: Add support for masking
- return failure();
- }
+ LogicalResult status =
+ cast<ReifyRankedShapedTypeOpInterface>(srcDefOp).reifyResultShapes(
+ rewriter, reifiedSrcSizes);
+ if (status.failed()) {
+ LDBG("Unable to reify result shapes of " << srcDefOp);
+ return failure();
}
- auto vecType = VectorType::get(vecShape, sourceType.getElementType());
- // 3. Generate TransferReadOp.
- SmallVector<Value> readIndices(
- vecType.getRank(),
- rewriter.create<arith::ConstantIndexOp>(sliceOp.getLoc(), 0));
- auto read = rewriter.create<vector::TransferReadOp>(
- sliceOp.getLoc(), vecType, sliceOp.getSource(), readIndices, padValue,
- ArrayRef<bool>{readInBounds});
+ // Create the mask
+ auto readMaskType = VectorType::get(inputVectorSizes, rewriter.getI1Type());
+ maskOp = rewriter.create<vector::CreateMaskOp>(
+ sliceOp.getLoc(), readMaskType, reifiedSrcSizes[0]);
+ }
- // 4. Generate TransferWriteOp.
- auto writeIndices = getValueOrCreateConstantIndexOp(
- rewriter, sliceOp.getLoc(), sliceOp.getMixedOffsets());
+ // 3.a. TransferReadOp
+ SmallVector<Value> readIndices(
+ vecType.getRank(),
+ rewriter.create<arith::ConstantIndexOp>(sliceOp.getLoc(), 0));
+ Operation *read = rewriter.create<vector::TransferReadOp>(
+ sliceOp.getLoc(), vecType, source, readIndices, padValue,
+ ArrayRef<bool>{readInBounds});
- // 5. Finalize
- rewriter.replaceOpWithNewOp<vector::TransferWriteOp>(
- sliceOp, read, sliceOp.getDest(), writeIndices,
- ArrayRef<bool>{writeInBounds});
+ // Mask the xfer_read Op
+ if (!inputVectorSizes.empty()) {
+ read = mlir::vector::maskOperation(rewriter, read, maskOp);
+ }
- return success();
+ // 3.b. TransferWriteOp
+ auto writeIndices = getValueOrCreateConstantIndexOp(
+ rewriter, sliceOp.getLoc(), sliceOp.getMixedOffsets());
+
+ Operation *write = rewriter.create<vector::TransferWriteOp>(
+ sliceOp.getLoc(), read->getResult(0), sliceOp.getDest(), writeIndices,
+ ArrayRef<bool>{writeInBounds});
+
+ // Mask the xfer_write Op
+ if (!inputVectorSizes.empty()) {
+ write = mlir::vector::maskOperation(rewriter, write, maskOp);
}
-};
+
+ // 4. Finalize
+ newResults.push_back(write->getResult(0));
+
+ return success();
+}
/// Rewrite use of tensor::PadOp result in InsertSliceOp. E.g.:
/// ```
@@ -2778,11 +2851,6 @@ struct PadOpVectorizationWithInsertSlicePattern
}
};
-void mlir::linalg::populateInsertSliceVectorizationPatterns(
- RewritePatternSet &patterns) {
- patterns.add<InsertSliceVectorizePattern>(patterns.getContext());
-}
-
void mlir::linalg::populatePadOpVectorizationPatterns(
RewritePatternSet &patterns, PatternBenefit baseBenefit) {
patterns.add<PadOpVectorizationWithTransferReadPattern,
diff --git a/mlir/test/Dialect/Linalg/vectorization-pad-patterns.mlir b/mlir/test/Dialect/Linalg/vectorization-pad-patterns.mlir
index 08a3bbbb301c87..747b6f6d90cc7f 100644
--- a/mlir/test/Dialect/Linalg/vectorization-pad-patterns.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization-pad-patterns.mlir
@@ -224,34 +224,16 @@ module attributes {transform.with_named_sequence} {
}
}
-
// -----
-///----------------------------------------------------------------------------------------
-/// tensor::PadOp -> tensor::EmptyOp + linalg::FillOp/tensor::GenerateOp + tensor::InsertSliceOp
-/// [Pattern: GenericPadOpVectorizationPattern + InsertSliceVectorizePattern]
-/// TODO: Split the test into two, one for each pattern.
-///----------------------------------------------------------------------------------------
-
func.func private @make_vector() -> tensor<12x13xf32>
-// Same as @pad_and_insert_slice_dest in vectorization-with-patterns.mlir, but
-// over here linalg::fill is not vectorized (patterns for linalg.fill are not
-// included here)
-// CHECK-LABEL: func.func @pad_and_insert_slice_dest(
-// CHECK-SAME: %[[ARG_0:.*]]: tensor<1x5x6xf32>) -> tensor<1x12x13xf32> {
-// CHECK-NOT: tensor.pad
-// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
-// CHECK-DAG: %[[PAD:.*]] = arith.constant 5.000000e+00 : f32
-// CHECK-DAG: %[[PAD_READ:.*]] = arith.constant 0.000000e+00 : f32
-// CHECK: %[[EMPTY:.*]] = tensor.empty() : tensor<1x12x13xf32>
-// CHECK: %[[FILL:.*]] = linalg.fill ins(%[[PAD]] : f32) outs(%[[EMPTY]] : tensor<1x12x13xf32>) -> tensor<1x12x13xf32>
-// CHECK: %[[READ_1:.*]] = vector.transfer_read %[[ARG_0]]{{\[}}%[[C0]], %[[C0]], %[[C0]]], %[[PAD]] {in_bounds = [true, true, true]} : tensor<1x5x6xf32>, vector<1x5x6xf32>
-// CHECK: %[[WRITE_1:.*]] = vector.transfer_write %[[READ_1]], %[[FILL]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x5x6xf32>, tensor<1x12x13xf32>
-// CHECK: %[[VEC:.*]] = call @make_vector() : () -> ten...
[truncated]
|
0a0fbd5
to
dce5fb6
Compare
@@ -121,11 +121,11 @@ module @transforms attributes { transform.with_named_sequence } { | |||
transform.apply_patterns.canonicalization | |||
} : !transform.op<"func.func"> | |||
|
|||
// 3. Bufferize before lowering to LLVM | |||
// 4. Bufferize before lowering to LLVM |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is why I prefer not using numbers. They always end up wrong when code is moved around or when code is inserted/removed. If it were me, I'd remove all the numbers.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
They always end up wrong when code is moved around or when code is inserted/removed.
Blame the PR author 😅
This is a very good point and, in principle, I agree. To me, the numbering itself is secondary. However, these numbers provide clear separation of high-level steps - that's far more valuable to me and that's what I want to document here. Is there a better way than numbers?
// Use HS to mark High-Level STEP
// HS: Tile so that we can decompose tensor.pack
// HS: Decompose the tiled unpack Op into tensor.extract_slice + tensor.insert_slice
// HS: Vectorize tensor.insert_slice
// HS: Bufferize before lowering to LLVM
// HS: Canonicalize
// Use # to mark High-Level STEP
// # Tile so that we can decompose tensor.pack
// # Decompose the tiled unpack Op into tensor.extract_slice + tensor.insert_slice
// # Vectorize tensor.insert_slice
// # Bufferize before lowering to LLVM
// # Canonicalize
// Use caps to mark High-Level STEP
// TILE (so that we can decompose tensor.pack)
// DECOMPOSE THE TILED UNPACK OP (into tensor.extract_slice + tensor.insert_slice)
// VECTORIZE
// BUFFERIZE
// CANONICALIZE
It won't matter that much in the context in this single test, but perhaps it's something to consider in general. Having said that, "numbers" is what we tend to use in MLIR for marking high-level steps 🤷🏻
// Vector sizes match the inner tiles in the payload IR. | ||
%slice = transform.structured.match ops{["tensor.insert_slice"]} in %func_op : (!transform.op<"func.func">) -> !transform.any_op | ||
transform.structured.vectorize %slice vector_sizes [8, 1] : !transform.any_op | ||
|
||
// 3. Bufferize before lowering to LLVM |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The number needs to be updated, if you want to keep the numbers.
Following on from llvm#122927 + llvm#123031 that added support for masked vectorization of `tensor.insert_slice`, this PR extends the e2e test for `tensor.unpack` to leverage the new functionality.
…npack` Fix numbering
d3e47f6
to
eab99bb
Compare
Hey @hanhanW , are you OK for me to go ahead with this one? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry that I missed this one, LGTM.
…lvm#123032) Following on from llvm#122927 + llvm#123031 that added support for masked vectorization of `tensor.insert_slice`, this PR extends the e2e test for `tensor.unpack` to leverage the new functionality.
Following on from #122927 + #123031 that added support for masked
vectorization of
tensor.insert_slice
, this PR extends the e2e test fortensor.unpack
to leverage the new functionality.