Skip to content

Commit e9ed0a1

Browse files
committed
[mlir][tensor][linalg] Move Pack/Unpack Ops to Linalg (1/4)
This is merely moving code around, no new functionality is added. PATCH 1: Copies `tensor.pack` and `tensor.unpack` as `linalg.pack` and `linalg.unpack`, respectively. New Ops are defined in LinalgRelayoutOps.td. Note, `tensor.pack` and `tensor.unpack` are still present at this point. CONTEXT: This change was discussed in the following RFC: * https://discourse.llvm.org/t/rfc-move-tensor-pack-and-tensor-unpack-into-linalg
1 parent d839c06 commit e9ed0a1

File tree

9 files changed

+1286
-15
lines changed

9 files changed

+1286
-15
lines changed

mlir/include/mlir/Dialect/Linalg/IR/CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,13 @@ add_public_tablegen_target(MLIRLinalgStructuredOpsIncGen)
6565
add_dependencies(MLIRLinalgStructuredOpsIncGen LinalgOdsGen)
6666
add_dependencies(mlir-headers MLIRLinalgStructuredOpsIncGen)
6767

68+
set(LLVM_TARGET_DEFINITIONS LinalgRelayoutOps.td)
69+
mlir_tablegen(LinalgRelayoutOps.h.inc -gen-op-decls)
70+
mlir_tablegen(LinalgRelayoutOps.cpp.inc -gen-op-defs)
71+
add_public_tablegen_target(MLIRLinalgRelayoutOpsIncGen)
72+
add_dependencies(MLIRLinalgRelayoutOpsIncGen LinalgOdsGen)
73+
add_dependencies(mlir-headers MLIRLinalgRelayoutOpsIncGen)
74+
6875
set(LLVM_TARGET_DEFINITIONS LinalgInterfaces.td)
6976
mlir_tablegen(LinalgInterfaces.h.inc -gen-op-interface-decls)
7077
mlir_tablegen(LinalgInterfaces.cpp.inc -gen-op-interface-defs)

mlir/include/mlir/Dialect/Linalg/IR/Linalg.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,4 +123,7 @@ OpFoldResult createFoldedDimOp(OpBuilder &b, Location loc, Value val,
123123
#define GET_OP_CLASSES
124124
#include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.h.inc"
125125

126+
#define GET_OP_CLASSES
127+
#include "mlir/Dialect/Linalg/IR/LinalgRelayoutOps.h.inc"
128+
126129
#endif // MLIR_DIALECT_LINALG_IR_LINALG_H
Lines changed: 331 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,331 @@
1+
//===- LinalgReleayoutOps.td - Linalg dialect library ops -*- tablegen -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This is the operation definition file for structured operations on buffers
10+
// that correspond to underlying library calls (e.g. BLAS).
11+
//
12+
//===----------------------------------------------------------------------===//
13+
14+
#ifndef LINALG_RELEAYOUT_OPS
15+
#define LINALG_RELEAYOUT_OPS
16+
17+
include "mlir/Dialect/Linalg/IR/LinalgBase.td"
18+
include "mlir/Interfaces/DestinationStyleOpInterface.td"
19+
include "mlir/Interfaces/SideEffectInterfaces.td"
20+
include "mlir/Interfaces/InferTypeOpInterface.td"
21+
include "mlir/IR/OpAsmInterface.td"
22+
23+
//===----------------------------------------------------------------------===//
24+
// RelayoutOp
25+
//===----------------------------------------------------------------------===//
26+
27+
class Linalg_RelayoutOp<string mnemonic, list<Trait> traits = []> :
28+
Op<Linalg_Dialect, mnemonic, !listconcat(traits, [
29+
DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>,
30+
DestinationStyleOpInterface,
31+
ConditionallySpeculatable, NoMemoryEffect,
32+
DeclareOpInterfaceMethods<ReifyRankedShapedTypeOpInterface>,
33+
TypesMatchWith<"result type matches type of dest",
34+
"dest", "result",
35+
"$_self">])> {
36+
37+
code commonExtraClassDeclaration = [{
38+
size_t getSourceRank() { return getSourceType().getRank(); };
39+
size_t getDestRank() { return getDestType().getRank(); };
40+
RankedTensorType getSourceType() {
41+
return ::llvm::cast<RankedTensorType>(getSource().getType()); };
42+
RankedTensorType getDestType() {
43+
return ::llvm::cast<RankedTensorType>(getDest().getType()); };
44+
45+
MutableOperandRange getDpsInitsMutable() { return getDestMutable(); }
46+
47+
/// Interface method for ConditionallySpeculatable.
48+
Speculation::Speculatability getSpeculatability();
49+
50+
/// Return a mapping from positions `inner_dims_pos` to their
51+
/// tile factors.
52+
DenseMap<int64_t, OpFoldResult> getDimAndTileMapping();
53+
54+
/// Return the tile sizes as OpFoldResult.
55+
SmallVector<OpFoldResult> getMixedTiles();
56+
57+
/// Return the tile sizes as `int64_t`. If a tile size is dynamic
58+
/// a sentinel `kDynamic` is introduced at that position in
59+
/// the returned vector.
60+
SmallVector<int64_t> getStaticTiles();
61+
62+
/// Retrieve all outer dims for this Pack/UnPack Op, i.e. all the leading
63+
/// dims excluding the trailing dims corresponding to `innerTiles`. Note
64+
/// that this will include both tiled and non-tiled dimensions. The order
65+
/// of the output dimensions is consistent with the shape of the packed
66+
/// tensor.
67+
ArrayRef<int64_t> getAllOuterDims();
68+
69+
/// Similar to `getAllOuterDims`, but only retrieve the outer dims that
70+
/// have been tiled. Also, the order of the output dimensions is consistent
71+
/// with `inner_dims_pos` rather than the packed tensor.
72+
SmallVector<int64_t> getTiledOuterDims();
73+
}];
74+
75+
let hasVerifier = 1;
76+
}
77+
78+
//===----------------------------------------------------------------------===//
79+
// PackOp
80+
//===----------------------------------------------------------------------===//
81+
82+
def Linalg_PackOp : Linalg_RelayoutOp<"pack", [
83+
AttrSizedOperandSegments]> {
84+
let summary = "linalg.pack operation";
85+
let description = [{
86+
The "pack" operation converts a source tensor of rank `n` into a result
87+
tensor of rank `n + k` with a tiled and packed layout (maybe with padding)
88+
and optionally transposes the tiled source tensor dimensions.
89+
90+
`inner_dims_pos` (mandatory) specifies `k` source tensor dimensions that are
91+
being tiled, where `0 < k <= n`. The order of the dimensions matters:
92+
- The tiled dimensions (of size `inner_tiles`) are added to the end of the result
93+
tensor in the order in which they appear in `inner_dims_pos`.
94+
- `inner_dims_pos[i]` specifies the source tensor dimension tiled by
95+
`inner_tiles[i]`.
96+
97+
`inner_tiles` (mandatory) specifies `k` tile sizes. These tile sizes
98+
correspond to the least significant ("inner") result tensor dimension sizes,
99+
in the same order. Tile sizes can be static or dynamic.
100+
101+
Example: If `inner_tiles = [16, 32]`, the result tensor has a shape of
102+
`...x16x32`. If `inner_dims_pos = [0, 1]`, the 0th source dimension is tiled
103+
by 16 and the 1st source dimension is tiled by 32. Other source dimensions
104+
(if any) are not tiled. If `inner_dims_pos = [1, 0]`, the 1st dimension is
105+
tiled by 16 and the 0th dimension is tiled by 32.
106+
107+
Example:
108+
```mlir
109+
// NC to NCnc
110+
%0 = linalg.pack %source inner_dims_pos = [0, 1] inner_tiles = [8, 32]
111+
into %dest : tensor<128x256xf32> -> tensor<16x8 x 8x32 xf32>
112+
// \ / \ /
113+
// outer dims inner dims
114+
```
115+
116+
`outer_dims_perm` (optional) specifies a permutation for the outer
117+
dimensions. If specified, it must have `n` elements.
118+
119+
Example:
120+
```mlir
121+
// CK to KCck
122+
%0 = linalg.pack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1]
123+
inner_tiles = [8, 32] into %dest
124+
: tensor<128x256xf32> -> tensor<8x16 x 8x32 xf32>
125+
// \ /
126+
// compare with "NC to NCnc": outer dims are transposed
127+
```
128+
129+
`padding_value` specifies a padding value at the boundary on non-perfectly
130+
divisible dimensions. Padding is optional:
131+
- If absent, it is UB if the tile does not perfectly divide the dimension.
132+
- If present, it will pad along high dimensions (high-padding) to make the
133+
tile complete.
134+
135+
Example:
136+
```mlir
137+
%0 = linalg.pack %arg0 padding_value(%pad : f32) outer_dims_perm = [2, 1, 0]
138+
inner_dims_pos = [1] inner_tiles = [2] into %arg1
139+
: tensor<200x127x256xf32> -> tensor<256x64x200x2xf32>
140+
// \
141+
// padded and tiled dim
142+
//
143+
// Source dimension 1 is tiled. 64 does not divide 127 evenly, so 1 padded
144+
// element is added at the end.
145+
//
146+
// Note: Only tiled dimensions can be padded.
147+
```
148+
}];
149+
let arguments = (ins AnyRankedTensor:$source,
150+
AnyRankedTensor:$dest,
151+
Optional<AnyType>:$padding_value,
152+
DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$outer_dims_perm,
153+
DenseI64ArrayAttr:$inner_dims_pos,
154+
Variadic<Index>:$inner_tiles,
155+
DenseI64ArrayAttr:$static_inner_tiles);
156+
let results = (outs AnyRankedTensor:$result);
157+
let assemblyFormat = [{
158+
$source
159+
(`padding_value` `(` $padding_value^ `:` type($padding_value) `)`)?
160+
(`outer_dims_perm` `=` $outer_dims_perm^)?
161+
`inner_dims_pos` `=` $inner_dims_pos
162+
`inner_tiles` `=`
163+
custom<DynamicIndexList>($inner_tiles, $static_inner_tiles)
164+
`into` $dest attr-dict `:` type($source) `->` type($dest)
165+
}];
166+
167+
let builders = [
168+
OpBuilder<(ins "Value":$source, "Value":$dest,
169+
"ArrayRef<int64_t>":$innerDimsPos,
170+
"ArrayRef<OpFoldResult>":$innerTiles,
171+
CArg<"std::optional<Value>", "std::nullopt">:$paddingValue,
172+
CArg<"ArrayRef<int64_t>", "{}">:$outerDimsPerm)>
173+
];
174+
175+
let extraClassDeclaration = commonExtraClassDeclaration # [{
176+
// Method to get the shape of the result as `SmallVector<OpFoldResult>`.
177+
// This is a static method to allow getting the shape of the destination
178+
// expected while creating a `pack` op.
179+
static SmallVector<OpFoldResult> getResultShape(OpBuilder &builder,
180+
Location loc, ArrayRef<OpFoldResult> sourceDims,
181+
ArrayRef<OpFoldResult> innerTileDims, ArrayRef<int64_t> innerDimsPos,
182+
ArrayRef<int64_t> outerDimsPerm = {});
183+
184+
// Method to get the `RankedTensorType` of the result based on the inner
185+
// tiles, position of the inner tiles (innerDimsPos) and interchange vector
186+
// of outer loops (outerDimsPerm).
187+
static RankedTensorType inferPackedType(RankedTensorType sourceType,
188+
ArrayRef<int64_t> innerTileSizes, ArrayRef<int64_t> innerDimsPos,
189+
ArrayRef<int64_t> outerDimsPerm = {});
190+
191+
// Returns true if we have enough static information to catch undefined
192+
// behavior when the tile size does not divide perfectly the dimension of
193+
// the input tensor. Detecting UB requires that the input size and either
194+
// corresponding tile or output size are static.
195+
static bool requirePaddingValue(ArrayRef<int64_t> inputShape,
196+
ArrayRef<int64_t> innerDimsPos,
197+
ArrayRef<int64_t> outputShape,
198+
ArrayRef<int64_t> outerDimsPerm,
199+
ArrayRef<OpFoldResult> innerTiles);
200+
201+
static Value createDestinationTensor(OpBuilder &b, Location loc,
202+
Value source, ArrayRef<OpFoldResult> innerTileSizes,
203+
ArrayRef<int64_t> innerDimsPos, ArrayRef<int64_t> outerDimsPerm);
204+
205+
/// Build and return a new PackOp that is a clone of the current PackOp with
206+
/// (innerDimsPos, innerTiles) (resp. outerDimsPerm) are permuted by
207+
/// innerPermutation (resp. outerPermutation).
208+
/// A new `tensor.empty` of the proper shape is built in the process.
209+
/// Asserts that:
210+
/// - At least one of innerPermutation or outerPermutation is non-empty.
211+
/// - If not empty, innerPermutation is a valid permutation of size
212+
/// matching innerDimPos.
213+
/// - If not empty, outerPermutation is a valid permutation of size
214+
/// matching outerDimsPerm.
215+
PackOp createTransposedClone(OpBuilder &b,
216+
Location loc,
217+
ArrayRef<int64_t> innerPermutation,
218+
ArrayRef<int64_t> outerPermutation);
219+
220+
/// Check if this PackOp is like a simple pad operation.
221+
/// In other words, this operation:
222+
/// 1. adds useless dimensions (dimension of size 1),
223+
/// 2. pads the other ones, and
224+
/// 3. doesn't shuffle the dimensions
225+
bool isLikePad();
226+
}];
227+
228+
let hasCanonicalizeMethod = 1;
229+
230+
let hasFolder = 1;
231+
}
232+
233+
//===----------------------------------------------------------------------===//
234+
// UnPackOp
235+
//===----------------------------------------------------------------------===//
236+
237+
def Linalg_UnPackOp : Linalg_RelayoutOp<"unpack"> {
238+
let summary = "linalg.unpack operation";
239+
let description = [{
240+
The "unpack" operation converts a source tensor of rank `n` with a tiled and
241+
packed layout to a result tensor of rank `n - k`.
242+
243+
`inner_dims_pos` (mandatory) specifies `k` source tensor dimensions with
244+
which the last `k` source tensor dimensions are combined, where
245+
`0 < k <= n/2`. Each `inner_dims_pos` element must be `>= 0` and `< n - k`.
246+
The order of the dimensions in `inner_dims_pos` matters: dimension
247+
`inner_dims_pos[i]` is combined with dimension `n - k + i` (assuming that
248+
`outer_dims_perm` is not specified).
249+
250+
`inner_tiles` (mandatory) specifies `k` tile sizes. These tile sizes
251+
correspond to the least significant ("inner") source tensor dimension sizes.
252+
The behavior of this op is undefined if:
253+
- `inner_tiles` do not exactly match with the corresponding source tensor
254+
dimension sizes.
255+
- Or, `inner_tiles[i]` does not divide the size of dimension
256+
`inner_dims_pos[i]` (assuming that `outer_dims_perm` is not specified)
257+
evenly.
258+
259+
`outer_dims_perm` (optional) specifies a permutation for the outer
260+
dimensions. If specified, it must have `n - k` elements. If specified, this
261+
permutation is applied before combining any dimensions.
262+
263+
Example:
264+
265+
```mlir
266+
// NCnc to NC:
267+
%0 = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [8, 32]
268+
into %dest : tensor<16x8x8x32xf32> -> tensor<128x256xf32>
269+
270+
// CK to KCck:
271+
%0 = linalg.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1]
272+
inner_tiles = [8, 32] into %dest
273+
: tensor<8x16x8x32xf32> -> tensor<128x256xf32>
274+
```
275+
}];
276+
let arguments = (ins AnyRankedTensor:$source,
277+
AnyRankedTensor:$dest,
278+
DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$outer_dims_perm,
279+
DenseI64ArrayAttr:$inner_dims_pos,
280+
Variadic<Index>:$inner_tiles,
281+
DenseI64ArrayAttr:$static_inner_tiles);
282+
let results = (outs AnyRankedTensor:$result);
283+
let assemblyFormat = [{
284+
$source
285+
(`outer_dims_perm` `=` $outer_dims_perm^)?
286+
`inner_dims_pos` `=` $inner_dims_pos
287+
`inner_tiles` `=`
288+
custom<DynamicIndexList>($inner_tiles, $static_inner_tiles)
289+
`into` $dest attr-dict `:` type($source) `->` type($dest)
290+
}];
291+
292+
let builders = [
293+
OpBuilder<(ins "Value":$source, "Value":$dest,
294+
"ArrayRef<int64_t>":$innerDimsPos,
295+
"ArrayRef<OpFoldResult>":$innerTiles,
296+
CArg<"ArrayRef<int64_t>", "{}">:$outerDimsPerm)>
297+
];
298+
299+
let extraClassDeclaration = commonExtraClassDeclaration # [{
300+
static Value createDestinationTensor(OpBuilder &b, Location loc,
301+
Value source, ArrayRef<OpFoldResult> innerTileSizes,
302+
ArrayRef<int64_t> innerDimsPos, ArrayRef<int64_t> outerDimsPerm);
303+
304+
/// Build and return a new UnPackOp that is a clone of the current UnPackOp
305+
/// with (innerDimsPos, innerTiles) (resp. outerDimsPerm) are permuted by
306+
/// innerPermutation (resp. outerPermutation).
307+
/// Asserts that:
308+
/// - At least one of innerPermutation or outerPermutation is non-empty.
309+
/// - If not empty, innerPermutation is a valid permutation of size
310+
/// matching innerDimPos.
311+
/// - If not empty, outerPermutation is a valid permutation of size
312+
/// matching outerDimsPerm.
313+
UnPackOp createTransposedClone(OpBuilder &b,
314+
Location loc,
315+
Value transposedSource,
316+
ArrayRef<int64_t> innerPermutation,
317+
ArrayRef<int64_t> outerPermutation);
318+
319+
/// Check if this UnPackOp is like a simple unpad operation.
320+
/// In other words, this operation:
321+
/// 1. drops useless dimensions (dimension of size 1), and
322+
/// 2. reduces dimensions in place (i.e., no transpose.)
323+
bool isLikeUnPad();
324+
}];
325+
326+
let hasCanonicalizeMethod = 1;
327+
328+
let hasFolder = 1;
329+
}
330+
331+
#endif // LINALG_RELEAYOUT_OPS

mlir/include/mlir/Dialect/Utils/ReshapeOpsUtils.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -568,6 +568,13 @@ struct PackingMetadata {
568568
// repeated N^2 counts).
569569
PackingMetadata computePackingMetadata(int64_t packedRank,
570570
ArrayRef<int64_t> innerDimPos);
571+
572+
/// Try to remove a tensor operation if it would only reshape a constant.
573+
/// Removes the op and replaces the constant with a new constant of the result
574+
/// shape. When an optional cst attribute is passed, it is reshaped only if the
575+
/// splat value matches the value in the attribute.
576+
OpFoldResult reshapeConstantSource(DenseElementsAttr source, TensorType result,
577+
std::optional<Attribute> cst = std::nullopt);
571578
} // namespace mlir
572579

573580
#endif // MLIR_DIALECT_UTILS_RESHAPEOPSUTILS_H

mlir/lib/Dialect/Linalg/IR/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ add_mlir_dialect_library(MLIRLinalgDialect
1313
MLIRLinalgOpsEnumsIncGen
1414
MLIRLinalgOpsIncGen
1515
MLIRLinalgStructuredOpsIncGen
16+
MLIRLinalgRelayoutOpsIncGen
1617
MLIRShardingInterfaceIncGen
1718

1819
LINK_LIBS PUBLIC

0 commit comments

Comments
 (0)