Skip to content

Commit e494278

Browse files
author
gysit
committed
[mlir][linalg] Add transpose support to hoist padding.
Add a transpose option to hoist padding to transpose the padded tensor before storing it into the packed tensor. The early transpose improves the memory access patterns of the actual compute kernel. The patch introduces a transpose right after the hoisted pad tensor and a second transpose inside the compute loop. The second transpose can either be fused into the compute operation or will canonicalize away when lowering to vector instructions. Reviewed By: nicolasvasilache Differential Revision: https://reviews.llvm.org/D117893
1 parent a43ed49 commit e494278

File tree

8 files changed

+316
-118
lines changed

8 files changed

+316
-118
lines changed

mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,17 @@ class PadOp;
1919
} // namespace tensor
2020

2121
namespace linalg {
22+
class GenericOp;
2223

2324
/// Mechanically hoist padding operations on tensors by `numLoops` into a new,
2425
/// generally larger tensor. This achieves packing of multiple padding ops into
25-
/// a larger tensor. On success, `padTensorOp` is replaced by the cloned version
26+
/// a larger tensor. On success, `opToHoist` is replaced by the cloned version
2627
/// in the packing loop so the caller can continue reasoning about the padding
27-
/// operation.
28+
/// operation. If `transposeVector` is non-empty, hoist padding introduces a
29+
/// GenericOp to transpose the padded tensor before inserting it into the packed
30+
/// tensor. A `transposeVector` can change the storage order of the padded
31+
/// tensor but does not change the order of the pack or compute loops.
32+
///
2833
///
2934
/// Example in pseudo-mlir:
3035
/// =======================
@@ -33,7 +38,7 @@ namespace linalg {
3338
/// ```
3439
/// scf.for (%i, %j, %k)
3540
/// %st0 = tensor.extract_slice f(%i, %k) : ... to tensor<?x?xf32>
36-
/// %0 = linalg.pad_tensor %st0 low[0, 0] high[...] {
41+
/// %0 = tensor.pad %st0 low[0, 0] high[...] {
3742
/// ^bb0( ... ):
3843
/// linalg.yield %pad
3944
/// } : tensor<?x?xf32> to tensor<4x8xf32>
@@ -47,7 +52,7 @@ namespace linalg {
4752
/// %packed_init = linalg.init_tensor range(%j) : tensor<?x4x8xf32>
4853
/// %packed = scf.for (%k) iter_args(%p : %packed_init) {
4954
/// %st0 = tensor.extract_slice f(%i, %k) : ... to tensor<?x?xf32>
50-
/// %0 = linalg.pad_tensor %st0 low[0, 0] high[...] {
55+
/// %0 = tensor.pad %st0 low[0, 0] high[...] {
5156
/// ^bb0( ... ):
5257
/// linalg.yield %pad
5358
/// } : tensor<?x?xf32> to tensor<4x8xf32>
@@ -62,8 +67,9 @@ namespace linalg {
6267
/// }
6368
/// }
6469
/// ```
65-
FailureOr<Value> hoistPaddingOnTensors(tensor::PadOp opToHoist, int numLoops,
66-
tensor::PadOp &hoistedOp);
70+
FailureOr<Value> hoistPaddingOnTensors(
71+
tensor::PadOp opToHoist, int numLoops, ArrayRef<int64_t> transposeVector,
72+
tensor::PadOp &hoistedOp, SmallVectorImpl<GenericOp> &transposeOps);
6773

6874
} // namespace linalg
6975
} // namespace mlir

mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -484,14 +484,19 @@ using TileSizeComputationFunction =
484484
using PaddingValueComputationFunction =
485485
std::function<FailureOr<Value>(OpBuilder &, OpOperand &)>;
486486

487-
/// Callback returning true if the pad tensor operation defining the given
488-
/// OpOperand shall be marked as nofold to enable packing.
487+
/// Callback returning true if the PadOp defining the given OpOperand shall be
488+
/// marked as nofold to enable packing.
489489
using PaddingNoFoldComputationFunction = std::function<bool(OpOperand &)>;
490490

491-
/// Callback returning the number of loops to hoist the pad tensor operation
492-
/// defining the given OpOperand.
491+
/// Callback returning the number of loops to hoist the PadOp defining the given
492+
/// OpOperand.
493493
using PaddingHoistComputationFunction = std::function<int64_t(OpOperand &)>;
494494

495+
/// Callback returning the transpose vector used to permute the result tensor
496+
/// dimensions of the PadOp defining the given OpOperand.
497+
using PaddingTransposeComputationFunction =
498+
std::function<SmallVector<int64_t>(OpOperand &)>;
499+
495500
struct LinalgPaddingOptions {
496501
/// Callback returning the padding value to use for a given OpOperand or
497502
/// failure for no padding. Padding operations are introduced if
@@ -506,10 +511,10 @@ struct LinalgPaddingOptions {
506511
return *this;
507512
}
508513

509-
/// Callback returning true if the pad tensor operation defining the given
510-
/// OpOperand shall be marked as nofold to enable packing. A padding operation
511-
/// is only marked nofold if `paddingNoFoldComputationFunction` is set and
512-
/// returns true. Otherwise, the nofold attribute is set to false.
514+
/// Callback returning true if the PadOp defining the given OpOperand shall be
515+
/// marked as nofold to enable packing. A padding operation is only marked
516+
/// nofold if `paddingNoFoldComputationFunction` is set and returns true.
517+
/// Otherwise, the nofold attribute is set to false.
513518
PaddingNoFoldComputationFunction paddingNoFoldComputationFunction = nullptr;
514519

515520
LinalgPaddingOptions &
@@ -518,15 +523,26 @@ struct LinalgPaddingOptions {
518523
return *this;
519524
}
520525

521-
/// Callback returning the number of loops to hoist the pad tensor operation
522-
/// defining the given OpOperand.
526+
/// Callback returning the number of loops to hoist the PadOp defining the
527+
/// given OpOperand.
523528
PaddingHoistComputationFunction paddingHoistComputationFunction = nullptr;
524529

525530
LinalgPaddingOptions &
526531
setPaddingHoistComputationFunction(PaddingHoistComputationFunction fun) {
527532
paddingHoistComputationFunction = std::move(fun);
528533
return *this;
529534
}
535+
536+
/// Callback returning the transpose vector used to permute the result tensor
537+
/// dimensions of the PadOp defining the given OpOperand.
538+
PaddingTransposeComputationFunction paddingTransposeComputationFunction =
539+
nullptr;
540+
541+
LinalgPaddingOptions &setPaddingTransposeComputationFunction(
542+
PaddingTransposeComputationFunction fun) {
543+
paddingTransposeComputationFunction = std::move(fun);
544+
return *this;
545+
}
530546
};
531547

532548
struct LinalgTilingAndFusionOptions {

mlir/include/mlir/Dialect/Linalg/Utils/Utils.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,18 +117,24 @@ tensor::ExtractSliceOp makeComposedExtractSliceOp(
117117
/// Example:
118118
/// ```
119119
/// %0 = tensor.extract_slice %arg0 [%iv0, %iv1] [%sz0, %sz1]
120-
/// %1 = linalg.pad_tensor %0 low[0, 0] high[...] { linalg.yield %cst }
120+
/// %1 = tensor.pad %0 low[0, 0] high[...] { tensor.yield %cst }
121121
/// %2 = linalg.matmul ins(...) outs(%1)
122122
/// %3 = tensor.extract_slice %2 [0, 0] [%sz0, %sz1]
123123
/// ```
124124
/// makeComposedPadHighOp(source=%3, pad=%cst) returns %2
125125
/// makeComposedPadHighOp(source=%3, pad=%other_cst) returns %4
126126
/// ```
127-
/// %4 = linalg.pad_tensor %3 low[0, 0] high[...] { linalg.yield %other_cst }
127+
/// %4 = tensor.pad %3 low[0, 0] high[...] { tensor.yield %other_cst }
128128
/// ```
129129
Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type,
130130
Value source, Value pad, bool nofold);
131131

132+
/// Returns a GenericOp that tansposes `inputTensor` into `outputTensor` using
133+
/// `transposeVector` to permute the `inputTensor` dimensions.
134+
GenericOp makeTransposeOp(OpBuilder &b, Location loc, Value inputTensor,
135+
Value outputTensor,
136+
ArrayRef<int64_t> transposeVector);
137+
132138
//===----------------------------------------------------------------------===//
133139
// Fusion / Tiling utilities
134140
//===----------------------------------------------------------------------===//

0 commit comments

Comments
 (0)