Skip to content

[mlir][linalg] Add unit dim folding pattern for tensor.pad #84684

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,10 @@ struct ControlDropUnitDims {
if (auto genericOp = dyn_cast_or_null<GenericOp>(op)) {
return llvm::to_vector(llvm::seq<unsigned>(0, genericOp.getNumLoops()));
}
if (auto padOp = dyn_cast_or_null<tensor::PadOp>(op)) {
return llvm::to_vector(
llvm::seq<unsigned>(0, padOp.getSourceType().getRank()));
}
return SmallVector<unsigned>{};
};
};
Expand Down
122 changes: 122 additions & 0 deletions mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,126 @@ struct DropUnitDims : public OpRewritePattern<GenericOp> {
};
} // namespace

//===---------------------------------------------------------------------===//
// Drop dimensions that are unit-extents within tensor operations.
//===---------------------------------------------------------------------===//

namespace {
struct DropPadUnitDims : public OpRewritePattern<tensor::PadOp> {
DropPadUnitDims(MLIRContext *context, ControlDropUnitDims options = {},
PatternBenefit benefit = 1)
: OpRewritePattern(context, benefit), options(std::move(options)) {}

LogicalResult matchAndRewrite(tensor::PadOp padOp,
PatternRewriter &rewriter) const override {
// 1a. Get the allowed list of dimensions to drop from the `options`.
SmallVector<unsigned> allowedUnitDims = options.controlFn(padOp);
if (allowedUnitDims.empty()) {
return rewriter.notifyMatchFailure(
padOp, "control function returns no allowed unit dims to prune");
}

if (padOp.getSourceType().getEncoding()) {
return rewriter.notifyMatchFailure(
padOp, "cannot collapse dims of tensor with encoding");
}

// Fail for non-constant padding values. The body of the pad could
// depend on the padding indices and/or properties of the padded
// tensor so for now we fail.
// TODO: Support non-constant padding values.
Value paddingVal = padOp.getConstantPaddingValue();
if (!paddingVal) {
return rewriter.notifyMatchFailure(
padOp, "unimplemented: non-constant padding value");
}

ArrayRef<int64_t> sourceShape = padOp.getSourceType().getShape();
int64_t padRank = sourceShape.size();

auto isStaticZero = [](OpFoldResult f) {
std::optional<int64_t> maybeInt = getConstantIntValue(f);
return maybeInt && *maybeInt == 0;
};

llvm::SmallDenseSet<unsigned> unitDimsFilter(allowedUnitDims.begin(),
allowedUnitDims.end());
llvm::SmallDenseSet<unsigned> unitDims;
SmallVector<int64_t> newShape;
SmallVector<OpFoldResult> newLowPad;
SmallVector<OpFoldResult> newHighPad;
for (const auto [dim, size, low, high] :
zip_equal(llvm::seq(static_cast<int64_t>(0), padRank), sourceShape,
padOp.getMixedLowPad(), padOp.getMixedHighPad())) {
if (unitDimsFilter.contains(dim) && size == 1 && isStaticZero(low) &&
isStaticZero(high)) {
unitDims.insert(dim);
} else {
newShape.push_back(size);
newLowPad.push_back(low);
newHighPad.push_back(high);
}
}

if (unitDims.empty()) {
return rewriter.notifyMatchFailure(padOp, "no unit dims to collapse");
}

ReassociationIndices reassociationGroup;
SmallVector<ReassociationIndices> reassociationMap;
int64_t dim = 0;
while (dim < padRank && unitDims.contains(dim))
reassociationGroup.push_back(dim++);
while (dim < padRank) {
assert(!unitDims.contains(dim) && "expected non unit-extent");
reassociationGroup.push_back(dim);
dim++;
// Fold all following dimensions that are unit-extent.
while (dim < padRank && unitDims.contains(dim))
reassociationGroup.push_back(dim++);
reassociationMap.push_back(reassociationGroup);
reassociationGroup.clear();
}

Value collapsedSource =
collapseValue(rewriter, padOp.getLoc(), padOp.getSource(), newShape,
reassociationMap, options.rankReductionStrategy);

auto newPadOp = rewriter.create<tensor::PadOp>(
padOp.getLoc(), /*result=*/Type(), collapsedSource, newLowPad,
newHighPad, paddingVal, padOp.getNofold());

Value dest = padOp.getResult();
if (options.rankReductionStrategy ==
ControlDropUnitDims::RankReductionStrategy::ExtractInsertSlice) {
SmallVector<OpFoldResult> expandedSizes;
int64_t numUnitDims = 0;
for (auto dim : llvm::seq(static_cast<int64_t>(0), padRank)) {
if (unitDims.contains(dim)) {
expandedSizes.push_back(rewriter.getIndexAttr(1));
numUnitDims++;
continue;
}
expandedSizes.push_back(tensor::getMixedSize(
rewriter, padOp.getLoc(), newPadOp, dim - numUnitDims));
}
dest = rewriter.create<tensor::EmptyOp>(
padOp.getLoc(), expandedSizes,
padOp.getResultType().getElementType());
}

Value expandedValue =
expandValue(rewriter, padOp.getLoc(), newPadOp.getResult(), dest,
reassociationMap, options.rankReductionStrategy);
rewriter.replaceOp(padOp, expandedValue);
return success();
}

private:
ControlDropUnitDims options;
};
} // namespace

namespace {
/// Convert `extract_slice` operations to rank-reduced versions.
struct RankReducedExtractSliceOp
Expand Down Expand Up @@ -640,6 +760,7 @@ populateFoldUnitExtentDimsViaReshapesPatterns(RewritePatternSet &patterns,
ControlDropUnitDims &options) {
auto *context = patterns.getContext();
patterns.add<DropUnitDims>(context, options);
patterns.add<DropPadUnitDims>(context, options);
// TODO: Patterns unrelated to unit dim folding should be factored out.
patterns.add<RankReducedExtractSliceOp,
RankReducedInsertSliceOp<tensor::InsertSliceOp>,
Expand All @@ -661,6 +782,7 @@ populateFoldUnitExtentDimsViaSlicesPatterns(RewritePatternSet &patterns,
options.rankReductionStrategy =
ControlDropUnitDims::RankReductionStrategy::ExtractInsertSlice;
patterns.add<DropUnitDims>(context, options);
patterns.add<DropPadUnitDims>(context, options);
// TODO: Patterns unrelated to unit dim folding should be factored out.
linalg::FillOp::getCanonicalizationPatterns(patterns, context);
tensor::EmptyOp::getCanonicalizationPatterns(patterns, context);
Expand Down
87 changes: 87 additions & 0 deletions mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -946,3 +946,90 @@ func.func @drop_all_loops(%arg0 : memref<1x1xf32, 3>) -> memref<1x1xf32, 3>
// CHECK-SLICES-LABEL: func @drop_all_loops
// CHECK-SLICES: memref.subview %{{.*}}[0, 0] [1, 1] [1, 1] : memref<1x1xf32, 3> to memref<f32, strided<[]>, 3>
// CHECK-SLICES: linalg.generic{{.*}}memref<f32, strided<[]>, 3>

// -----

func.func @drop_unit_pad_dims(%arg0: tensor<1x1x3x1x1xf32>) -> tensor<1x2x3x1x3xf32>
{
%c0 = arith.constant 0 : index
%cst0 = arith.constant 0.0 : f32
%0 = tensor.pad %arg0 low[0, 1, 0, %c0, 0] high[0, 0, 0, %c0, 2] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index):
tensor.yield %cst0 : f32
} : tensor<1x1x3x1x1xf32> to tensor<1x2x3x1x3xf32>
return %0 : tensor<1x2x3x1x3xf32>
}

// CHECK-LABEL: func @drop_unit_pad_dims
// CHECK: %[[COLLAPSE:.+]] = tensor.collapse_shape
// CHECK-SAME: {{\[}}[0, 1], [2, 3], [4]{{\]}} : tensor<1x1x3x1x1xf32> into tensor<1x3x1xf32>
// CHECK: %[[PADDED:.+]] = tensor.pad %[[COLLAPSE]] low[1, 0, 0] high[0, 0, 2]
// CHECK: } : tensor<1x3x1xf32> to tensor<2x3x3xf32>
// CHECK: tensor.expand_shape %[[PADDED]]
// CHECK-SAME: {{\[}}[0, 1], [2, 3], [4]{{\]}} : tensor<2x3x3xf32> into tensor<1x2x3x1x3xf32>

// CHECK-SLICES-LABEL: func @drop_unit_pad_dims
// CHECK-SLICES: %[[EXTRACT:.+]] = tensor.extract_slice
// CHECK-SLICES-SAME: [0, 0, 0, 0, 0] [1, 1, 3, 1, 1] [1, 1, 1, 1, 1] : tensor<1x1x3x1x1xf32> to tensor<1x3x1xf32>
// CHECK-SLICES: %[[PADDED:.+]] = tensor.pad %[[EXTRACT]] low[1, 0, 0] high[0, 0, 2]
// CHECK-SLICES: } : tensor<1x3x1xf32> to tensor<2x3x3xf32>
// CHECK-SLICES: tensor.insert_slice %[[PADDED]]
// CHECK-SLICES-SAME: [0, 0, 0, 0, 0] [1, 2, 3, 1, 3] [1, 1, 1, 1, 1] : tensor<2x3x3xf32> into tensor<1x2x3x1x3xf32>

// -----

func.func @drop_unit_pad_dynamic_dims(%arg0: tensor<1x?xf32>) -> tensor<1x?xf32>
{
%c0 = arith.constant 0 : index
%cst0 = arith.constant 0.0 : f32
%0 = tensor.pad %arg0 low[0, 5] high[0, 6] {
^bb0(%arg1: index, %arg2: index):
tensor.yield %cst0 : f32
} : tensor<1x?xf32> to tensor<1x?xf32>
return %0 : tensor<1x?xf32>
}

// CHECK-LABEL: func @drop_unit_pad_dynamic_dims
// CHECK: %[[COLLAPSE:.+]] = tensor.collapse_shape
// CHECK-SAME: {{\[}}[0, 1]{{\]}} : tensor<1x?xf32> into tensor<?xf32>
// CHECK: %[[PADDED:.+]] = tensor.pad %[[COLLAPSE]] low[5] high[6]
// CHECK: } : tensor<?xf32> to tensor<?xf32>
// CHECK: tensor.expand_shape %[[PADDED]]
// CHECK-SAME: {{\[}}[0, 1]{{\]}} : tensor<?xf32> into tensor<1x?xf32>

// CHECK-SLICES: #[[$MAP:.+]] = affine_map<()[s0] -> (s0 + 11)>

// CHECK-SLICES-LABEL: func @drop_unit_pad_dynamic_dims
// CHECK-SLICES-SAME: %[[ARG0:[A-Za-z0-9]+]]: tensor<1x?xf32>
// CHECK-SLICES: %[[DIM:.+]] = tensor.dim %[[ARG0]], %c1
// CHECK-SLICES: %[[EXTRACT:.+]] = tensor.extract_slice
// CHECK-SLICES-SAME: [0, 0] [1, %[[DIM]]] [1, 1] : tensor<1x?xf32> to tensor<?xf32>
// CHECK-SLICES: %[[PADDED:.+]] = tensor.pad %[[EXTRACT]] low[5] high[6]
// CHECK-SLICES: } : tensor<?xf32> to tensor<?xf32>
// CHECK-SLICES: %[[PADDED_DIM:.+]] = affine.apply #[[$MAP]]()[%[[DIM]]]
// CHECK-SLICES: %[[EMPTY:.+]] = tensor.empty(%[[PADDED_DIM]]) : tensor<1x?xf32>
// CHECK-SLICES: tensor.insert_slice %[[PADDED]] into %[[EMPTY]]
// CHECK-SLICES-SAME: [0, 0] [1, %[[PADDED_DIM]]] [1, 1] : tensor<?xf32> into tensor<1x?xf32>

// -----

func.func @do_not_drop_non_constant_padding(%arg0: tensor<1x1x3x1x1xf32>, %pad: f32) -> tensor<1x2x3x1x3xf32>
{
%c0 = arith.constant 0 : index
%0 = tensor.pad %arg0 low[0, 1, 0, %c0, 0] high[0, 0, 0, %c0, 2] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index):
%0 = arith.index_cast %arg3 : index to i64
%1 = arith.sitofp %0 : i64 to f32
%add = arith.addf %pad, %1 : f32
tensor.yield %add : f32
} : tensor<1x1x3x1x1xf32> to tensor<1x2x3x1x3xf32>
return %0 : tensor<1x2x3x1x3xf32>
}

// CHECK-LABEL: func @do_not_drop_non_constant_padding
// CHECK: tensor.pad %{{.*}} low[0, 1, 0, %c0, 0] high[0, 0, 0, %c0, 2]
// CHECK: } : tensor<1x1x3x1x1xf32> to tensor<1x2x3x1x3xf32>

// CHECK-SLICES-LABEL: func @do_not_drop_non_constant_padding
// CHECK-SLICES: tensor.pad %{{.*}} low[0, 1, 0, %c0, 0] high[0, 0, 0, %c0, 2]
// CHECK-SLICES: } : tensor<1x1x3x1x1xf32> to tensor<1x2x3x1x3xf32>