llvm
diff --git a/‎mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h
Lines changed: 12 additions & 6 deletions b/‎mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h
Lines changed: 12 additions & 6 deletions
diff --git a/‎mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
Lines changed: 26 additions & 10 deletions b/‎mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
Lines changed: 26 additions & 10 deletions
diff --git a/‎mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
Lines changed: 8 additions & 2 deletions b/‎mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
Lines changed: 8 additions & 2 deletions
@@ -19,12 +19,17 @@ class PadOp;
 } // namespace tensor
 
 namespace linalg {
+class GenericOp;
 
 /// Mechanically hoist padding operations on tensors by `numLoops` into a new,
 /// generally larger tensor. This achieves packing of multiple padding ops into
-/// a larger tensor. On success, `padTensorOp` is replaced by the cloned version
+/// a larger tensor. On success, `opToHoist` is replaced by the cloned version
 /// in the packing loop so the caller can continue reasoning about the padding
-/// operation.
+/// operation. If `transposeVector` is non-empty, hoist padding introduces a
+/// GenericOp to transpose the padded tensor before inserting it into the packed
+/// tensor. A `transposeVector` can change the storage order of the padded
+/// tensor but does not change the order of the pack or compute loops.
+///
 ///
 /// Example in pseudo-mlir:
 /// =======================
@@ -33,7 +38,7 @@ namespace linalg {
 /// ```
 ///    scf.for (%i, %j, %k)
 ///      %st0 = tensor.extract_slice f(%i, %k) : ... to tensor<?x?xf32>
-///      %0 = linalg.pad_tensor %st0 low[0, 0] high[...] {
+///      %0 = tensor.pad %st0 low[0, 0] high[...] {
 ///      ^bb0( ... ):
 ///        linalg.yield %pad
 ///      } : tensor<?x?xf32> to tensor<4x8xf32>
@@ -47,7 +52,7 @@ namespace linalg {
 ///      %packed_init = linalg.init_tensor range(%j) : tensor<?x4x8xf32>
 ///      %packed = scf.for (%k) iter_args(%p : %packed_init) {
 ///        %st0 = tensor.extract_slice f(%i, %k) : ... to tensor<?x?xf32>
-///        %0 = linalg.pad_tensor %st0 low[0, 0] high[...] {
+///        %0 = tensor.pad %st0 low[0, 0] high[...] {
 ///        ^bb0( ... ):
 ///          linalg.yield %pad
 ///        } : tensor<?x?xf32> to tensor<4x8xf32>
@@ -62,8 +67,9 @@ namespace linalg {
 ///      }
 ///    }
 /// ```
-FailureOr<Value> hoistPaddingOnTensors(tensor::PadOp opToHoist, int numLoops,
-                                       tensor::PadOp &hoistedOp);
+FailureOr<Value> hoistPaddingOnTensors(
+    tensor::PadOp opToHoist, int numLoops, ArrayRef<int64_t> transposeVector,
+    tensor::PadOp &hoistedOp, SmallVectorImpl<GenericOp> &transposeOps);
 
 } // namespace linalg
 } // namespace mlir
 
@@ -484,14 +484,19 @@ using TileSizeComputationFunction =
 using PaddingValueComputationFunction =
     std::function<FailureOr<Value>(OpBuilder &, OpOperand &)>;
 
-/// Callback returning true if the pad tensor operation defining the given
-/// OpOperand shall be marked as nofold to enable packing.
+/// Callback returning true if the PadOp defining the given OpOperand shall be
+/// marked as nofold to enable packing.
 using PaddingNoFoldComputationFunction = std::function<bool(OpOperand &)>;
 
-/// Callback returning the number of loops to hoist the pad tensor operation
-/// defining the given OpOperand.
+/// Callback returning the number of loops to hoist the PadOp defining the given
+/// OpOperand.
 using PaddingHoistComputationFunction = std::function<int64_t(OpOperand &)>;
 
+/// Callback returning the transpose vector used to permute the result tensor
+/// dimensions of the PadOp defining the given OpOperand.
+using PaddingTransposeComputationFunction =
+    std::function<SmallVector<int64_t>(OpOperand &)>;
+
 struct LinalgPaddingOptions {
   /// Callback returning the padding value to use for a given OpOperand or
   /// failure for no padding. Padding operations are introduced if
@@ -506,10 +511,10 @@ struct LinalgPaddingOptions {
     return *this;
   }
 
-  /// Callback returning true if the pad tensor operation defining the given
-  /// OpOperand shall be marked as nofold to enable packing. A padding operation
-  /// is only marked nofold if `paddingNoFoldComputationFunction` is set and
-  /// returns true. Otherwise, the nofold attribute is set to false.
+  /// Callback returning true if the PadOp defining the given OpOperand shall be
+  /// marked as nofold to enable packing. A padding operation is only marked
+  /// nofold if `paddingNoFoldComputationFunction` is set and returns true.
+  /// Otherwise, the nofold attribute is set to false.
   PaddingNoFoldComputationFunction paddingNoFoldComputationFunction = nullptr;
 
   LinalgPaddingOptions &
@@ -518,15 +523,26 @@ struct LinalgPaddingOptions {
     return *this;
   }
 
-  /// Callback returning the number of loops to hoist the pad tensor operation
-  /// defining the given OpOperand.
+  /// Callback returning the number of loops to hoist the PadOp defining the
+  /// given OpOperand.
   PaddingHoistComputationFunction paddingHoistComputationFunction = nullptr;
 
   LinalgPaddingOptions &
   setPaddingHoistComputationFunction(PaddingHoistComputationFunction fun) {
     paddingHoistComputationFunction = std::move(fun);
     return *this;
   }
+
+  /// Callback returning the transpose vector used to permute the result tensor
+  /// dimensions of the PadOp defining the given OpOperand.
+  PaddingTransposeComputationFunction paddingTransposeComputationFunction =
+      nullptr;
+
+  LinalgPaddingOptions &setPaddingTransposeComputationFunction(
+      PaddingTransposeComputationFunction fun) {
+    paddingTransposeComputationFunction = std::move(fun);
+    return *this;
+  }
 };
 
 struct LinalgTilingAndFusionOptions {
 
@@ -117,18 +117,24 @@ tensor::ExtractSliceOp makeComposedExtractSliceOp(
 /// Example:
 /// ```
 /// %0 = tensor.extract_slice %arg0 [%iv0, %iv1] [%sz0, %sz1]
-/// %1 = linalg.pad_tensor %0 low[0, 0] high[...] { linalg.yield %cst }
+/// %1 = tensor.pad %0 low[0, 0] high[...] { tensor.yield %cst }
 /// %2 = linalg.matmul ins(...) outs(%1)
 /// %3 = tensor.extract_slice %2 [0, 0] [%sz0, %sz1]
 /// ```
 /// makeComposedPadHighOp(source=%3, pad=%cst) returns %2
 /// makeComposedPadHighOp(source=%3, pad=%other_cst) returns %4
 /// ```
-/// %4 = linalg.pad_tensor %3 low[0, 0] high[...] { linalg.yield %other_cst }
+/// %4 = tensor.pad %3 low[0, 0] high[...] { tensor.yield %other_cst }
 /// ```
 Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type,
                             Value source, Value pad, bool nofold);
 
+/// Returns a GenericOp that tansposes `inputTensor` into `outputTensor` using
+/// `transposeVector` to permute the `inputTensor` dimensions.
+GenericOp makeTransposeOp(OpBuilder &b, Location loc, Value inputTensor,
+                          Value outputTensor,
+                          ArrayRef<int64_t> transposeVector);
+
 //===----------------------------------------------------------------------===//
 // Fusion / Tiling utilities
 //===----------------------------------------------------------------------===//