Skip to content

Commit 517800e

Browse files
authored
[mlir][tensor][linalg] Move Pack/UnPack Ops to Linalg (#123902)
Moves `PackOp` and `UnPackOp` from the Tensor dialect to Linalg. This change was discussed in the following RFC: * https://discourse.llvm.org/t/rfc-move-tensor-pack-and-tensor-unpack-into-linalg This change involves significant churn but only relocates existing code - no new functionality is added. **Note for Downstream Users** Downstream users must update references to `PackOp` and `UnPackOp` as follows: * Code: `s/tensor::(Up)PackOp/linalg::(Un)PackOp/g` * Tests: `s/tensor.(un)pack/linalg.(un)pack/g` No other modifications should be required.
1 parent 9d24f94 commit 517800e

File tree

76 files changed

+4496
-4394
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

76 files changed

+4496
-4394
lines changed

mlir/include/mlir/Dialect/Linalg/IR/CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,13 @@ add_public_tablegen_target(MLIRLinalgStructuredOpsIncGen)
6565
add_dependencies(MLIRLinalgStructuredOpsIncGen LinalgOdsGen)
6666
add_dependencies(mlir-headers MLIRLinalgStructuredOpsIncGen)
6767

68+
set(LLVM_TARGET_DEFINITIONS LinalgRelayoutOps.td)
69+
mlir_tablegen(LinalgRelayoutOps.h.inc -gen-op-decls)
70+
mlir_tablegen(LinalgRelayoutOps.cpp.inc -gen-op-defs)
71+
add_public_tablegen_target(MLIRLinalgRelayoutOpsIncGen)
72+
add_dependencies(MLIRLinalgRelayoutOpsIncGen LinalgOdsGen)
73+
add_dependencies(mlir-headers MLIRLinalgRelayoutOpsIncGen)
74+
6875
set(LLVM_TARGET_DEFINITIONS LinalgInterfaces.td)
6976
mlir_tablegen(LinalgInterfaces.h.inc -gen-op-interface-decls)
7077
mlir_tablegen(LinalgInterfaces.cpp.inc -gen-op-interface-defs)

mlir/include/mlir/Dialect/Linalg/IR/Linalg.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,4 +123,7 @@ OpFoldResult createFoldedDimOp(OpBuilder &b, Location loc, Value val,
123123
#define GET_OP_CLASSES
124124
#include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.h.inc"
125125

126+
#define GET_OP_CLASSES
127+
#include "mlir/Dialect/Linalg/IR/LinalgRelayoutOps.h.inc"
128+
126129
#endif // MLIR_DIALECT_LINALG_IR_LINALG_H

mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,16 @@ def LinalgConvolutionOpInterface : OpInterface<"ConvolutionOpInterface"> {
178178
];
179179
}
180180

181+
def LinalgRelayoutOpInterface : OpInterface<"RelayoutOpInterface"> {
182+
let description = [{
183+
A Linalg relayout-op is either linalg.pack or linalg.unpack.
184+
185+
While we could extend this interface with methods from Linalg_RelayoutOp,
186+
this is currently not needed and left as a TODO.
187+
}];
188+
let cppNamespace = "::mlir::linalg";
189+
}
190+
181191
def LinalgFillOpInterface : OpInterface<"FillOpInterface"> {
182192
let description = [{
183193
A fill operation is defined in general terms:
Lines changed: 336 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,336 @@
1+
//===- LinalgReleayoutOps.td - Linalg relayout ops ---------*- tablegen -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file defines Pack + Unpack Ops that have been moved from the Tensor
10+
// dialect. As such, these are defined as memory-effect-free and only accept
11+
// "tensors" as inputs.
12+
//
13+
// TODO: Once a good motivating example is identified, relax these
14+
// restrictions.
15+
//
16+
//===----------------------------------------------------------------------===//
17+
18+
#ifndef LINALG_RELEAYOUT_OPS
19+
#define LINALG_RELEAYOUT_OPS
20+
21+
include "mlir/Dialect/Linalg/IR/LinalgBase.td"
22+
include "mlir/Interfaces/DestinationStyleOpInterface.td"
23+
include "mlir/Interfaces/SideEffectInterfaces.td"
24+
include "mlir/Interfaces/InferTypeOpInterface.td"
25+
include "mlir/Dialect/Linalg/IR/LinalgInterfaces.td"
26+
include "mlir/IR/OpAsmInterface.td"
27+
28+
//===----------------------------------------------------------------------===//
29+
// RelayoutOp
30+
//===----------------------------------------------------------------------===//
31+
32+
class Linalg_RelayoutOp<string mnemonic, list<Trait> traits = []> :
33+
Op<Linalg_Dialect, mnemonic, !listconcat(traits, [
34+
DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>,
35+
DestinationStyleOpInterface, LinalgRelayoutOpInterface,
36+
ConditionallySpeculatable, NoMemoryEffect,
37+
DeclareOpInterfaceMethods<ReifyRankedShapedTypeOpInterface>,
38+
TypesMatchWith<"result type matches type of dest",
39+
"dest", "result",
40+
"$_self">])> {
41+
42+
code commonExtraClassDeclaration = [{
43+
size_t getSourceRank() { return getSourceType().getRank(); };
44+
size_t getDestRank() { return getDestType().getRank(); };
45+
RankedTensorType getSourceType() {
46+
return ::llvm::cast<RankedTensorType>(getSource().getType()); };
47+
RankedTensorType getDestType() {
48+
return ::llvm::cast<RankedTensorType>(getDest().getType()); };
49+
50+
MutableOperandRange getDpsInitsMutable() { return getDestMutable(); }
51+
52+
/// Interface method for ConditionallySpeculatable.
53+
Speculation::Speculatability getSpeculatability();
54+
55+
/// Return a mapping from positions `inner_dims_pos` to their
56+
/// tile factors.
57+
DenseMap<int64_t, OpFoldResult> getDimAndTileMapping();
58+
59+
/// Return the tile sizes as OpFoldResult.
60+
SmallVector<OpFoldResult> getMixedTiles();
61+
62+
/// Return the tile sizes as `int64_t`. If a tile size is dynamic
63+
/// a sentinel `kDynamic` is introduced at that position in
64+
/// the returned vector.
65+
SmallVector<int64_t> getStaticTiles();
66+
67+
/// Retrieve all outer dims for this Pack/UnPack Op, i.e. all the leading
68+
/// dims excluding the trailing dims corresponding to `innerTiles`. Note
69+
/// that this will include both tiled and non-tiled dimensions. The order
70+
/// of the output dimensions is consistent with the shape of the packed
71+
/// tensor.
72+
ArrayRef<int64_t> getAllOuterDims();
73+
74+
/// Similar to `getAllOuterDims`, but only retrieve the outer dims that
75+
/// have been tiled. Also, the order of the output dimensions is consistent
76+
/// with `inner_dims_pos` rather than the packed tensor.
77+
SmallVector<int64_t> getTiledOuterDims();
78+
}];
79+
80+
let hasVerifier = 1;
81+
}
82+
83+
//===----------------------------------------------------------------------===//
84+
// PackOp
85+
//===----------------------------------------------------------------------===//
86+
87+
def Linalg_PackOp : Linalg_RelayoutOp<"pack", [
88+
AttrSizedOperandSegments]> {
89+
let summary = "linalg.pack operation";
90+
let description = [{
91+
The "pack" operation converts a source tensor of rank `n` into a result
92+
tensor of rank `n + k` with a tiled and packed layout (maybe with padding)
93+
and optionally transposes the tiled source tensor dimensions.
94+
95+
`inner_dims_pos` (mandatory) specifies `k` source tensor dimensions that are
96+
being tiled, where `0 < k <= n`. The order of the dimensions matters:
97+
- The tiled dimensions (of size `inner_tiles`) are added to the end of the result
98+
tensor in the order in which they appear in `inner_dims_pos`.
99+
- `inner_dims_pos[i]` specifies the source tensor dimension tiled by
100+
`inner_tiles[i]`.
101+
102+
`inner_tiles` (mandatory) specifies `k` tile sizes. These tile sizes
103+
correspond to the least significant ("inner") result tensor dimension sizes,
104+
in the same order. Tile sizes can be static or dynamic.
105+
106+
Example: If `inner_tiles = [16, 32]`, the result tensor has a shape of
107+
`...x16x32`. If `inner_dims_pos = [0, 1]`, the 0th source dimension is tiled
108+
by 16 and the 1st source dimension is tiled by 32. Other source dimensions
109+
(if any) are not tiled. If `inner_dims_pos = [1, 0]`, the 1st dimension is
110+
tiled by 16 and the 0th dimension is tiled by 32.
111+
112+
Example:
113+
```mlir
114+
// NC to NCnc
115+
%0 = linalg.pack %source inner_dims_pos = [0, 1] inner_tiles = [8, 32]
116+
into %dest : tensor<128x256xf32> -> tensor<16x8 x 8x32 xf32>
117+
// \ / \ /
118+
// outer dims inner dims
119+
```
120+
121+
`outer_dims_perm` (optional) specifies a permutation for the outer
122+
dimensions. If specified, it must have `n` elements.
123+
124+
Example:
125+
```mlir
126+
// CK to KCck
127+
%0 = linalg.pack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1]
128+
inner_tiles = [8, 32] into %dest
129+
: tensor<128x256xf32> -> tensor<8x16 x 8x32 xf32>
130+
// \ /
131+
// compare with "NC to NCnc": outer dims are transposed
132+
```
133+
134+
`padding_value` specifies a padding value at the boundary on non-perfectly
135+
divisible dimensions. Padding is optional:
136+
- If absent, it is UB if the tile does not perfectly divide the dimension.
137+
- If present, it will pad along high dimensions (high-padding) to make the
138+
tile complete.
139+
140+
Example:
141+
```mlir
142+
%0 = linalg.pack %arg0 padding_value(%pad : f32) outer_dims_perm = [2, 1, 0]
143+
inner_dims_pos = [1] inner_tiles = [2] into %arg1
144+
: tensor<200x127x256xf32> -> tensor<256x64x200x2xf32>
145+
// \
146+
// padded and tiled dim
147+
//
148+
// Source dimension 1 is tiled. 64 does not divide 127 evenly, so 1 padded
149+
// element is added at the end.
150+
//
151+
// Note: Only tiled dimensions can be padded.
152+
```
153+
}];
154+
let arguments = (ins AnyRankedTensor:$source,
155+
AnyRankedTensor:$dest,
156+
Optional<AnyType>:$padding_value,
157+
DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$outer_dims_perm,
158+
DenseI64ArrayAttr:$inner_dims_pos,
159+
Variadic<Index>:$inner_tiles,
160+
DenseI64ArrayAttr:$static_inner_tiles);
161+
let results = (outs AnyRankedTensor:$result);
162+
let assemblyFormat = [{
163+
$source
164+
(`padding_value` `(` $padding_value^ `:` type($padding_value) `)`)?
165+
(`outer_dims_perm` `=` $outer_dims_perm^)?
166+
`inner_dims_pos` `=` $inner_dims_pos
167+
`inner_tiles` `=`
168+
custom<DynamicIndexList>($inner_tiles, $static_inner_tiles)
169+
`into` $dest attr-dict `:` type($source) `->` type($dest)
170+
}];
171+
172+
let builders = [
173+
OpBuilder<(ins "Value":$source, "Value":$dest,
174+
"ArrayRef<int64_t>":$innerDimsPos,
175+
"ArrayRef<OpFoldResult>":$innerTiles,
176+
CArg<"std::optional<Value>", "std::nullopt">:$paddingValue,
177+
CArg<"ArrayRef<int64_t>", "{}">:$outerDimsPerm)>
178+
];
179+
180+
let extraClassDeclaration = commonExtraClassDeclaration # [{
181+
// Method to get the shape of the result as `SmallVector<OpFoldResult>`.
182+
// This is a static method to allow getting the shape of the destination
183+
// expected while creating a `pack` op.
184+
static SmallVector<OpFoldResult> getResultShape(OpBuilder &builder,
185+
Location loc, ArrayRef<OpFoldResult> sourceDims,
186+
ArrayRef<OpFoldResult> innerTileDims, ArrayRef<int64_t> innerDimsPos,
187+
ArrayRef<int64_t> outerDimsPerm = {});
188+
189+
// Method to get the `RankedTensorType` of the result based on the inner
190+
// tiles, position of the inner tiles (innerDimsPos) and interchange vector
191+
// of outer loops (outerDimsPerm).
192+
static RankedTensorType inferPackedType(RankedTensorType sourceType,
193+
ArrayRef<int64_t> innerTileSizes, ArrayRef<int64_t> innerDimsPos,
194+
ArrayRef<int64_t> outerDimsPerm = {});
195+
196+
// Returns true if we have enough static information to catch undefined
197+
// behavior when the tile size does not divide perfectly the dimension of
198+
// the input tensor. Detecting UB requires that the input size and either
199+
// corresponding tile or output size are static.
200+
static bool requirePaddingValue(ArrayRef<int64_t> inputShape,
201+
ArrayRef<int64_t> innerDimsPos,
202+
ArrayRef<int64_t> outputShape,
203+
ArrayRef<int64_t> outerDimsPerm,
204+
ArrayRef<OpFoldResult> innerTiles);
205+
206+
static Value createDestinationTensor(OpBuilder &b, Location loc,
207+
Value source, ArrayRef<OpFoldResult> innerTileSizes,
208+
ArrayRef<int64_t> innerDimsPos, ArrayRef<int64_t> outerDimsPerm);
209+
210+
/// Build and return a new PackOp that is a clone of the current PackOp with
211+
/// (innerDimsPos, innerTiles) (resp. outerDimsPerm) are permuted by
212+
/// innerPermutation (resp. outerPermutation).
213+
/// A new `tensor.empty` of the proper shape is built in the process.
214+
/// Asserts that:
215+
/// - At least one of innerPermutation or outerPermutation is non-empty.
216+
/// - If not empty, innerPermutation is a valid permutation of size
217+
/// matching innerDimPos.
218+
/// - If not empty, outerPermutation is a valid permutation of size
219+
/// matching outerDimsPerm.
220+
PackOp createTransposedClone(OpBuilder &b,
221+
Location loc,
222+
ArrayRef<int64_t> innerPermutation,
223+
ArrayRef<int64_t> outerPermutation);
224+
225+
/// Check if this PackOp is like a simple pad operation.
226+
/// In other words, this operation:
227+
/// 1. adds useless dimensions (dimension of size 1),
228+
/// 2. pads the other ones, and
229+
/// 3. doesn't shuffle the dimensions
230+
bool isLikePad();
231+
}];
232+
233+
let hasCanonicalizeMethod = 1;
234+
235+
let hasFolder = 1;
236+
}
237+
238+
//===----------------------------------------------------------------------===//
239+
// UnPackOp
240+
//===----------------------------------------------------------------------===//
241+
242+
def Linalg_UnPackOp : Linalg_RelayoutOp<"unpack"> {
243+
let summary = "linalg.unpack operation";
244+
let description = [{
245+
The "unpack" operation converts a source tensor of rank `n` with a tiled and
246+
packed layout to a result tensor of rank `n - k`.
247+
248+
`inner_dims_pos` (mandatory) specifies `k` source tensor dimensions with
249+
which the last `k` source tensor dimensions are combined, where
250+
`0 < k <= n/2`. Each `inner_dims_pos` element must be `>= 0` and `< n - k`.
251+
The order of the dimensions in `inner_dims_pos` matters: dimension
252+
`inner_dims_pos[i]` is combined with dimension `n - k + i` (assuming that
253+
`outer_dims_perm` is not specified).
254+
255+
`inner_tiles` (mandatory) specifies `k` tile sizes. These tile sizes
256+
correspond to the least significant ("inner") source tensor dimension sizes.
257+
The behavior of this op is undefined if:
258+
- `inner_tiles` do not exactly match with the corresponding source tensor
259+
dimension sizes.
260+
- Or, `inner_tiles[i]` does not divide the size of dimension
261+
`inner_dims_pos[i]` (assuming that `outer_dims_perm` is not specified)
262+
evenly.
263+
264+
`outer_dims_perm` (optional) specifies a permutation for the outer
265+
dimensions. If specified, it must have `n - k` elements. If specified, this
266+
permutation is applied before combining any dimensions.
267+
268+
Example:
269+
270+
```mlir
271+
// NCnc to NC:
272+
%0 = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [8, 32]
273+
into %dest : tensor<16x8x8x32xf32> -> tensor<128x256xf32>
274+
275+
// CK to KCck:
276+
%0 = linalg.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1]
277+
inner_tiles = [8, 32] into %dest
278+
: tensor<8x16x8x32xf32> -> tensor<128x256xf32>
279+
```
280+
}];
281+
let arguments = (ins AnyRankedTensor:$source,
282+
AnyRankedTensor:$dest,
283+
DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$outer_dims_perm,
284+
DenseI64ArrayAttr:$inner_dims_pos,
285+
Variadic<Index>:$inner_tiles,
286+
DenseI64ArrayAttr:$static_inner_tiles);
287+
let results = (outs AnyRankedTensor:$result);
288+
let assemblyFormat = [{
289+
$source
290+
(`outer_dims_perm` `=` $outer_dims_perm^)?
291+
`inner_dims_pos` `=` $inner_dims_pos
292+
`inner_tiles` `=`
293+
custom<DynamicIndexList>($inner_tiles, $static_inner_tiles)
294+
`into` $dest attr-dict `:` type($source) `->` type($dest)
295+
}];
296+
297+
let builders = [
298+
OpBuilder<(ins "Value":$source, "Value":$dest,
299+
"ArrayRef<int64_t>":$innerDimsPos,
300+
"ArrayRef<OpFoldResult>":$innerTiles,
301+
CArg<"ArrayRef<int64_t>", "{}">:$outerDimsPerm)>
302+
];
303+
304+
let extraClassDeclaration = commonExtraClassDeclaration # [{
305+
static Value createDestinationTensor(OpBuilder &b, Location loc,
306+
Value source, ArrayRef<OpFoldResult> innerTileSizes,
307+
ArrayRef<int64_t> innerDimsPos, ArrayRef<int64_t> outerDimsPerm);
308+
309+
/// Build and return a new UnPackOp that is a clone of the current UnPackOp
310+
/// with (innerDimsPos, innerTiles) (resp. outerDimsPerm) are permuted by
311+
/// innerPermutation (resp. outerPermutation).
312+
/// Asserts that:
313+
/// - At least one of innerPermutation or outerPermutation is non-empty.
314+
/// - If not empty, innerPermutation is a valid permutation of size
315+
/// matching innerDimPos.
316+
/// - If not empty, outerPermutation is a valid permutation of size
317+
/// matching outerDimsPerm.
318+
UnPackOp createTransposedClone(OpBuilder &b,
319+
Location loc,
320+
Value transposedSource,
321+
ArrayRef<int64_t> innerPermutation,
322+
ArrayRef<int64_t> outerPermutation);
323+
324+
/// Check if this UnPackOp is like a simple unpad operation.
325+
/// In other words, this operation:
326+
/// 1. drops useless dimensions (dimension of size 1), and
327+
/// 2. reduces dimensions in place (i.e., no transpose.)
328+
bool isLikeUnPad();
329+
}];
330+
331+
let hasCanonicalizeMethod = 1;
332+
333+
let hasFolder = 1;
334+
}
335+
336+
#endif // LINALG_RELEAYOUT_OPS

0 commit comments

Comments
 (0)