Skip to content

Commit 60e562d

Browse files
authored
[mlir][linalg] Add unit dim folding pattern for tensor.pad (#84684)
Unit extent dims that are not padded by a tensor.pad can be folded away. When folding unit extent dims of surrounding linalg ops, this increases the chance that the iteration space of the linalg op will align with nearby pad ops, improving fusion opportunities.
1 parent c93c76b commit 60e562d

File tree

3 files changed

+213
-0
lines changed

3 files changed

+213
-0
lines changed

mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,10 @@ struct ControlDropUnitDims {
481481
if (auto genericOp = dyn_cast_or_null<GenericOp>(op)) {
482482
return llvm::to_vector(llvm::seq<unsigned>(0, genericOp.getNumLoops()));
483483
}
484+
if (auto padOp = dyn_cast_or_null<tensor::PadOp>(op)) {
485+
return llvm::to_vector(
486+
llvm::seq<unsigned>(0, padOp.getSourceType().getRank()));
487+
}
484488
return SmallVector<unsigned>{};
485489
};
486490
};

mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -561,6 +561,126 @@ struct DropUnitDims : public OpRewritePattern<GenericOp> {
561561
};
562562
} // namespace
563563

564+
//===---------------------------------------------------------------------===//
565+
// Drop dimensions that are unit-extents within tensor operations.
566+
//===---------------------------------------------------------------------===//
567+
568+
namespace {
569+
struct DropPadUnitDims : public OpRewritePattern<tensor::PadOp> {
570+
DropPadUnitDims(MLIRContext *context, ControlDropUnitDims options = {},
571+
PatternBenefit benefit = 1)
572+
: OpRewritePattern(context, benefit), options(std::move(options)) {}
573+
574+
LogicalResult matchAndRewrite(tensor::PadOp padOp,
575+
PatternRewriter &rewriter) const override {
576+
// 1a. Get the allowed list of dimensions to drop from the `options`.
577+
SmallVector<unsigned> allowedUnitDims = options.controlFn(padOp);
578+
if (allowedUnitDims.empty()) {
579+
return rewriter.notifyMatchFailure(
580+
padOp, "control function returns no allowed unit dims to prune");
581+
}
582+
583+
if (padOp.getSourceType().getEncoding()) {
584+
return rewriter.notifyMatchFailure(
585+
padOp, "cannot collapse dims of tensor with encoding");
586+
}
587+
588+
// Fail for non-constant padding values. The body of the pad could
589+
// depend on the padding indices and/or properties of the padded
590+
// tensor so for now we fail.
591+
// TODO: Support non-constant padding values.
592+
Value paddingVal = padOp.getConstantPaddingValue();
593+
if (!paddingVal) {
594+
return rewriter.notifyMatchFailure(
595+
padOp, "unimplemented: non-constant padding value");
596+
}
597+
598+
ArrayRef<int64_t> sourceShape = padOp.getSourceType().getShape();
599+
int64_t padRank = sourceShape.size();
600+
601+
auto isStaticZero = [](OpFoldResult f) {
602+
std::optional<int64_t> maybeInt = getConstantIntValue(f);
603+
return maybeInt && *maybeInt == 0;
604+
};
605+
606+
llvm::SmallDenseSet<unsigned> unitDimsFilter(allowedUnitDims.begin(),
607+
allowedUnitDims.end());
608+
llvm::SmallDenseSet<unsigned> unitDims;
609+
SmallVector<int64_t> newShape;
610+
SmallVector<OpFoldResult> newLowPad;
611+
SmallVector<OpFoldResult> newHighPad;
612+
for (const auto [dim, size, low, high] :
613+
zip_equal(llvm::seq(static_cast<int64_t>(0), padRank), sourceShape,
614+
padOp.getMixedLowPad(), padOp.getMixedHighPad())) {
615+
if (unitDimsFilter.contains(dim) && size == 1 && isStaticZero(low) &&
616+
isStaticZero(high)) {
617+
unitDims.insert(dim);
618+
} else {
619+
newShape.push_back(size);
620+
newLowPad.push_back(low);
621+
newHighPad.push_back(high);
622+
}
623+
}
624+
625+
if (unitDims.empty()) {
626+
return rewriter.notifyMatchFailure(padOp, "no unit dims to collapse");
627+
}
628+
629+
ReassociationIndices reassociationGroup;
630+
SmallVector<ReassociationIndices> reassociationMap;
631+
int64_t dim = 0;
632+
while (dim < padRank && unitDims.contains(dim))
633+
reassociationGroup.push_back(dim++);
634+
while (dim < padRank) {
635+
assert(!unitDims.contains(dim) && "expected non unit-extent");
636+
reassociationGroup.push_back(dim);
637+
dim++;
638+
// Fold all following dimensions that are unit-extent.
639+
while (dim < padRank && unitDims.contains(dim))
640+
reassociationGroup.push_back(dim++);
641+
reassociationMap.push_back(reassociationGroup);
642+
reassociationGroup.clear();
643+
}
644+
645+
Value collapsedSource =
646+
collapseValue(rewriter, padOp.getLoc(), padOp.getSource(), newShape,
647+
reassociationMap, options.rankReductionStrategy);
648+
649+
auto newPadOp = rewriter.create<tensor::PadOp>(
650+
padOp.getLoc(), /*result=*/Type(), collapsedSource, newLowPad,
651+
newHighPad, paddingVal, padOp.getNofold());
652+
653+
Value dest = padOp.getResult();
654+
if (options.rankReductionStrategy ==
655+
ControlDropUnitDims::RankReductionStrategy::ExtractInsertSlice) {
656+
SmallVector<OpFoldResult> expandedSizes;
657+
int64_t numUnitDims = 0;
658+
for (auto dim : llvm::seq(static_cast<int64_t>(0), padRank)) {
659+
if (unitDims.contains(dim)) {
660+
expandedSizes.push_back(rewriter.getIndexAttr(1));
661+
numUnitDims++;
662+
continue;
663+
}
664+
expandedSizes.push_back(tensor::getMixedSize(
665+
rewriter, padOp.getLoc(), newPadOp, dim - numUnitDims));
666+
}
667+
dest = rewriter.create<tensor::EmptyOp>(
668+
padOp.getLoc(), expandedSizes,
669+
padOp.getResultType().getElementType());
670+
}
671+
672+
Value expandedValue =
673+
expandValue(rewriter, padOp.getLoc(), newPadOp.getResult(), dest,
674+
reassociationMap, options.rankReductionStrategy);
675+
rewriter.replaceOp(padOp, expandedValue);
676+
return success();
677+
}
678+
679+
private:
680+
ControlDropUnitDims options;
681+
};
682+
} // namespace
683+
564684
namespace {
565685
/// Convert `extract_slice` operations to rank-reduced versions.
566686
struct RankReducedExtractSliceOp
@@ -640,6 +760,7 @@ populateFoldUnitExtentDimsViaReshapesPatterns(RewritePatternSet &patterns,
640760
ControlDropUnitDims &options) {
641761
auto *context = patterns.getContext();
642762
patterns.add<DropUnitDims>(context, options);
763+
patterns.add<DropPadUnitDims>(context, options);
643764
// TODO: Patterns unrelated to unit dim folding should be factored out.
644765
patterns.add<RankReducedExtractSliceOp,
645766
RankReducedInsertSliceOp<tensor::InsertSliceOp>,
@@ -661,6 +782,7 @@ populateFoldUnitExtentDimsViaSlicesPatterns(RewritePatternSet &patterns,
661782
options.rankReductionStrategy =
662783
ControlDropUnitDims::RankReductionStrategy::ExtractInsertSlice;
663784
patterns.add<DropUnitDims>(context, options);
785+
patterns.add<DropPadUnitDims>(context, options);
664786
// TODO: Patterns unrelated to unit dim folding should be factored out.
665787
linalg::FillOp::getCanonicalizationPatterns(patterns, context);
666788
tensor::EmptyOp::getCanonicalizationPatterns(patterns, context);

mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -946,3 +946,90 @@ func.func @drop_all_loops(%arg0 : memref<1x1xf32, 3>) -> memref<1x1xf32, 3>
946946
// CHECK-SLICES-LABEL: func @drop_all_loops
947947
// CHECK-SLICES: memref.subview %{{.*}}[0, 0] [1, 1] [1, 1] : memref<1x1xf32, 3> to memref<f32, strided<[]>, 3>
948948
// CHECK-SLICES: linalg.generic{{.*}}memref<f32, strided<[]>, 3>
949+
950+
// -----
951+
952+
func.func @drop_unit_pad_dims(%arg0: tensor<1x1x3x1x1xf32>) -> tensor<1x2x3x1x3xf32>
953+
{
954+
%c0 = arith.constant 0 : index
955+
%cst0 = arith.constant 0.0 : f32
956+
%0 = tensor.pad %arg0 low[0, 1, 0, %c0, 0] high[0, 0, 0, %c0, 2] {
957+
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index):
958+
tensor.yield %cst0 : f32
959+
} : tensor<1x1x3x1x1xf32> to tensor<1x2x3x1x3xf32>
960+
return %0 : tensor<1x2x3x1x3xf32>
961+
}
962+
963+
// CHECK-LABEL: func @drop_unit_pad_dims
964+
// CHECK: %[[COLLAPSE:.+]] = tensor.collapse_shape
965+
// CHECK-SAME: {{\[}}[0, 1], [2, 3], [4]{{\]}} : tensor<1x1x3x1x1xf32> into tensor<1x3x1xf32>
966+
// CHECK: %[[PADDED:.+]] = tensor.pad %[[COLLAPSE]] low[1, 0, 0] high[0, 0, 2]
967+
// CHECK: } : tensor<1x3x1xf32> to tensor<2x3x3xf32>
968+
// CHECK: tensor.expand_shape %[[PADDED]]
969+
// CHECK-SAME: {{\[}}[0, 1], [2, 3], [4]{{\]}} : tensor<2x3x3xf32> into tensor<1x2x3x1x3xf32>
970+
971+
// CHECK-SLICES-LABEL: func @drop_unit_pad_dims
972+
// CHECK-SLICES: %[[EXTRACT:.+]] = tensor.extract_slice
973+
// CHECK-SLICES-SAME: [0, 0, 0, 0, 0] [1, 1, 3, 1, 1] [1, 1, 1, 1, 1] : tensor<1x1x3x1x1xf32> to tensor<1x3x1xf32>
974+
// CHECK-SLICES: %[[PADDED:.+]] = tensor.pad %[[EXTRACT]] low[1, 0, 0] high[0, 0, 2]
975+
// CHECK-SLICES: } : tensor<1x3x1xf32> to tensor<2x3x3xf32>
976+
// CHECK-SLICES: tensor.insert_slice %[[PADDED]]
977+
// CHECK-SLICES-SAME: [0, 0, 0, 0, 0] [1, 2, 3, 1, 3] [1, 1, 1, 1, 1] : tensor<2x3x3xf32> into tensor<1x2x3x1x3xf32>
978+
979+
// -----
980+
981+
func.func @drop_unit_pad_dynamic_dims(%arg0: tensor<1x?xf32>) -> tensor<1x?xf32>
982+
{
983+
%c0 = arith.constant 0 : index
984+
%cst0 = arith.constant 0.0 : f32
985+
%0 = tensor.pad %arg0 low[0, 5] high[0, 6] {
986+
^bb0(%arg1: index, %arg2: index):
987+
tensor.yield %cst0 : f32
988+
} : tensor<1x?xf32> to tensor<1x?xf32>
989+
return %0 : tensor<1x?xf32>
990+
}
991+
992+
// CHECK-LABEL: func @drop_unit_pad_dynamic_dims
993+
// CHECK: %[[COLLAPSE:.+]] = tensor.collapse_shape
994+
// CHECK-SAME: {{\[}}[0, 1]{{\]}} : tensor<1x?xf32> into tensor<?xf32>
995+
// CHECK: %[[PADDED:.+]] = tensor.pad %[[COLLAPSE]] low[5] high[6]
996+
// CHECK: } : tensor<?xf32> to tensor<?xf32>
997+
// CHECK: tensor.expand_shape %[[PADDED]]
998+
// CHECK-SAME: {{\[}}[0, 1]{{\]}} : tensor<?xf32> into tensor<1x?xf32>
999+
1000+
// CHECK-SLICES: #[[$MAP:.+]] = affine_map<()[s0] -> (s0 + 11)>
1001+
1002+
// CHECK-SLICES-LABEL: func @drop_unit_pad_dynamic_dims
1003+
// CHECK-SLICES-SAME: %[[ARG0:[A-Za-z0-9]+]]: tensor<1x?xf32>
1004+
// CHECK-SLICES: %[[DIM:.+]] = tensor.dim %[[ARG0]], %c1
1005+
// CHECK-SLICES: %[[EXTRACT:.+]] = tensor.extract_slice
1006+
// CHECK-SLICES-SAME: [0, 0] [1, %[[DIM]]] [1, 1] : tensor<1x?xf32> to tensor<?xf32>
1007+
// CHECK-SLICES: %[[PADDED:.+]] = tensor.pad %[[EXTRACT]] low[5] high[6]
1008+
// CHECK-SLICES: } : tensor<?xf32> to tensor<?xf32>
1009+
// CHECK-SLICES: %[[PADDED_DIM:.+]] = affine.apply #[[$MAP]]()[%[[DIM]]]
1010+
// CHECK-SLICES: %[[EMPTY:.+]] = tensor.empty(%[[PADDED_DIM]]) : tensor<1x?xf32>
1011+
// CHECK-SLICES: tensor.insert_slice %[[PADDED]] into %[[EMPTY]]
1012+
// CHECK-SLICES-SAME: [0, 0] [1, %[[PADDED_DIM]]] [1, 1] : tensor<?xf32> into tensor<1x?xf32>
1013+
1014+
// -----
1015+
1016+
func.func @do_not_drop_non_constant_padding(%arg0: tensor<1x1x3x1x1xf32>, %pad: f32) -> tensor<1x2x3x1x3xf32>
1017+
{
1018+
%c0 = arith.constant 0 : index
1019+
%0 = tensor.pad %arg0 low[0, 1, 0, %c0, 0] high[0, 0, 0, %c0, 2] {
1020+
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index):
1021+
%0 = arith.index_cast %arg3 : index to i64
1022+
%1 = arith.sitofp %0 : i64 to f32
1023+
%add = arith.addf %pad, %1 : f32
1024+
tensor.yield %add : f32
1025+
} : tensor<1x1x3x1x1xf32> to tensor<1x2x3x1x3xf32>
1026+
return %0 : tensor<1x2x3x1x3xf32>
1027+
}
1028+
1029+
// CHECK-LABEL: func @do_not_drop_non_constant_padding
1030+
// CHECK: tensor.pad %{{.*}} low[0, 1, 0, %c0, 0] high[0, 0, 0, %c0, 2]
1031+
// CHECK: } : tensor<1x1x3x1x1xf32> to tensor<1x2x3x1x3xf32>
1032+
1033+
// CHECK-SLICES-LABEL: func @do_not_drop_non_constant_padding
1034+
// CHECK-SLICES: tensor.pad %{{.*}} low[0, 1, 0, %c0, 0] high[0, 0, 0, %c0, 2]
1035+
// CHECK-SLICES: } : tensor<1x1x3x1x1xf32> to tensor<1x2x3x1x3xf32>

0 commit comments

Comments
 (0)