|
| 1 | +//===- LinalgReleayoutOps.td - Linalg relayout ops ---------*- tablegen -*-===// |
| 2 | +// |
| 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
| 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | +// |
| 7 | +//===----------------------------------------------------------------------===// |
| 8 | +// |
| 9 | +// This file defines Pack + Unpack Ops that have been moved from the Tensor |
| 10 | +// dialect. As such, these are defined as memory-effect-free and only accept |
| 11 | +// "tensors" as inputs. |
| 12 | +// |
| 13 | +// TODO: Once a good motivating example is identified, relax these |
| 14 | +// restrictions. |
| 15 | +// |
| 16 | +//===----------------------------------------------------------------------===// |
| 17 | + |
| 18 | +#ifndef LINALG_RELEAYOUT_OPS |
| 19 | +#define LINALG_RELEAYOUT_OPS |
| 20 | + |
| 21 | +include "mlir/Dialect/Linalg/IR/LinalgBase.td" |
| 22 | +include "mlir/Interfaces/DestinationStyleOpInterface.td" |
| 23 | +include "mlir/Interfaces/SideEffectInterfaces.td" |
| 24 | +include "mlir/Interfaces/InferTypeOpInterface.td" |
| 25 | +include "mlir/Dialect/Linalg/IR/LinalgInterfaces.td" |
| 26 | +include "mlir/IR/OpAsmInterface.td" |
| 27 | + |
| 28 | +//===----------------------------------------------------------------------===// |
| 29 | +// RelayoutOp |
| 30 | +//===----------------------------------------------------------------------===// |
| 31 | + |
| 32 | +class Linalg_RelayoutOp<string mnemonic, list<Trait> traits = []> : |
| 33 | + Op<Linalg_Dialect, mnemonic, !listconcat(traits, [ |
| 34 | + DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>, |
| 35 | + DestinationStyleOpInterface, LinalgRelayoutOpInterface, |
| 36 | + ConditionallySpeculatable, NoMemoryEffect, |
| 37 | + DeclareOpInterfaceMethods<ReifyRankedShapedTypeOpInterface>, |
| 38 | + TypesMatchWith<"result type matches type of dest", |
| 39 | + "dest", "result", |
| 40 | + "$_self">])> { |
| 41 | + |
| 42 | + code commonExtraClassDeclaration = [{ |
| 43 | + size_t getSourceRank() { return getSourceType().getRank(); }; |
| 44 | + size_t getDestRank() { return getDestType().getRank(); }; |
| 45 | + RankedTensorType getSourceType() { |
| 46 | + return ::llvm::cast<RankedTensorType>(getSource().getType()); }; |
| 47 | + RankedTensorType getDestType() { |
| 48 | + return ::llvm::cast<RankedTensorType>(getDest().getType()); }; |
| 49 | + |
| 50 | + MutableOperandRange getDpsInitsMutable() { return getDestMutable(); } |
| 51 | + |
| 52 | + /// Interface method for ConditionallySpeculatable. |
| 53 | + Speculation::Speculatability getSpeculatability(); |
| 54 | + |
| 55 | + /// Return a mapping from positions `inner_dims_pos` to their |
| 56 | + /// tile factors. |
| 57 | + DenseMap<int64_t, OpFoldResult> getDimAndTileMapping(); |
| 58 | + |
| 59 | + /// Return the tile sizes as OpFoldResult. |
| 60 | + SmallVector<OpFoldResult> getMixedTiles(); |
| 61 | + |
| 62 | + /// Return the tile sizes as `int64_t`. If a tile size is dynamic |
| 63 | + /// a sentinel `kDynamic` is introduced at that position in |
| 64 | + /// the returned vector. |
| 65 | + SmallVector<int64_t> getStaticTiles(); |
| 66 | + |
| 67 | + /// Retrieve all outer dims for this Pack/UnPack Op, i.e. all the leading |
| 68 | + /// dims excluding the trailing dims corresponding to `innerTiles`. Note |
| 69 | + /// that this will include both tiled and non-tiled dimensions. The order |
| 70 | + /// of the output dimensions is consistent with the shape of the packed |
| 71 | + /// tensor. |
| 72 | + ArrayRef<int64_t> getAllOuterDims(); |
| 73 | + |
| 74 | + /// Similar to `getAllOuterDims`, but only retrieve the outer dims that |
| 75 | + /// have been tiled. Also, the order of the output dimensions is consistent |
| 76 | + /// with `inner_dims_pos` rather than the packed tensor. |
| 77 | + SmallVector<int64_t> getTiledOuterDims(); |
| 78 | + }]; |
| 79 | + |
| 80 | + let hasVerifier = 1; |
| 81 | +} |
| 82 | + |
| 83 | +//===----------------------------------------------------------------------===// |
| 84 | +// PackOp |
| 85 | +//===----------------------------------------------------------------------===// |
| 86 | + |
| 87 | +def Linalg_PackOp : Linalg_RelayoutOp<"pack", [ |
| 88 | + AttrSizedOperandSegments]> { |
| 89 | + let summary = "linalg.pack operation"; |
| 90 | + let description = [{ |
| 91 | + The "pack" operation converts a source tensor of rank `n` into a result |
| 92 | + tensor of rank `n + k` with a tiled and packed layout (maybe with padding) |
| 93 | + and optionally transposes the tiled source tensor dimensions. |
| 94 | + |
| 95 | + `inner_dims_pos` (mandatory) specifies `k` source tensor dimensions that are |
| 96 | + being tiled, where `0 < k <= n`. The order of the dimensions matters: |
| 97 | + - The tiled dimensions (of size `inner_tiles`) are added to the end of the result |
| 98 | + tensor in the order in which they appear in `inner_dims_pos`. |
| 99 | + - `inner_dims_pos[i]` specifies the source tensor dimension tiled by |
| 100 | + `inner_tiles[i]`. |
| 101 | + |
| 102 | + `inner_tiles` (mandatory) specifies `k` tile sizes. These tile sizes |
| 103 | + correspond to the least significant ("inner") result tensor dimension sizes, |
| 104 | + in the same order. Tile sizes can be static or dynamic. |
| 105 | + |
| 106 | + Example: If `inner_tiles = [16, 32]`, the result tensor has a shape of |
| 107 | + `...x16x32`. If `inner_dims_pos = [0, 1]`, the 0th source dimension is tiled |
| 108 | + by 16 and the 1st source dimension is tiled by 32. Other source dimensions |
| 109 | + (if any) are not tiled. If `inner_dims_pos = [1, 0]`, the 1st dimension is |
| 110 | + tiled by 16 and the 0th dimension is tiled by 32. |
| 111 | + |
| 112 | + Example: |
| 113 | + ```mlir |
| 114 | + // NC to NCnc |
| 115 | + %0 = linalg.pack %source inner_dims_pos = [0, 1] inner_tiles = [8, 32] |
| 116 | + into %dest : tensor<128x256xf32> -> tensor<16x8 x 8x32 xf32> |
| 117 | + // \ / \ / |
| 118 | + // outer dims inner dims |
| 119 | + ``` |
| 120 | + |
| 121 | + `outer_dims_perm` (optional) specifies a permutation for the outer |
| 122 | + dimensions. If specified, it must have `n` elements. |
| 123 | + |
| 124 | + Example: |
| 125 | + ```mlir |
| 126 | + // CK to KCck |
| 127 | + %0 = linalg.pack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] |
| 128 | + inner_tiles = [8, 32] into %dest |
| 129 | + : tensor<128x256xf32> -> tensor<8x16 x 8x32 xf32> |
| 130 | + // \ / |
| 131 | + // compare with "NC to NCnc": outer dims are transposed |
| 132 | + ``` |
| 133 | + |
| 134 | + `padding_value` specifies a padding value at the boundary on non-perfectly |
| 135 | + divisible dimensions. Padding is optional: |
| 136 | + - If absent, it is UB if the tile does not perfectly divide the dimension. |
| 137 | + - If present, it will pad along high dimensions (high-padding) to make the |
| 138 | + tile complete. |
| 139 | + |
| 140 | + Example: |
| 141 | + ```mlir |
| 142 | + %0 = linalg.pack %arg0 padding_value(%pad : f32) outer_dims_perm = [2, 1, 0] |
| 143 | + inner_dims_pos = [1] inner_tiles = [2] into %arg1 |
| 144 | + : tensor<200x127x256xf32> -> tensor<256x64x200x2xf32> |
| 145 | + // \ |
| 146 | + // padded and tiled dim |
| 147 | + // |
| 148 | + // Source dimension 1 is tiled. 64 does not divide 127 evenly, so 1 padded |
| 149 | + // element is added at the end. |
| 150 | + // |
| 151 | + // Note: Only tiled dimensions can be padded. |
| 152 | + ``` |
| 153 | + }]; |
| 154 | + let arguments = (ins AnyRankedTensor:$source, |
| 155 | + AnyRankedTensor:$dest, |
| 156 | + Optional<AnyType>:$padding_value, |
| 157 | + DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$outer_dims_perm, |
| 158 | + DenseI64ArrayAttr:$inner_dims_pos, |
| 159 | + Variadic<Index>:$inner_tiles, |
| 160 | + DenseI64ArrayAttr:$static_inner_tiles); |
| 161 | + let results = (outs AnyRankedTensor:$result); |
| 162 | + let assemblyFormat = [{ |
| 163 | + $source |
| 164 | + (`padding_value` `(` $padding_value^ `:` type($padding_value) `)`)? |
| 165 | + (`outer_dims_perm` `=` $outer_dims_perm^)? |
| 166 | + `inner_dims_pos` `=` $inner_dims_pos |
| 167 | + `inner_tiles` `=` |
| 168 | + custom<DynamicIndexList>($inner_tiles, $static_inner_tiles) |
| 169 | + `into` $dest attr-dict `:` type($source) `->` type($dest) |
| 170 | + }]; |
| 171 | + |
| 172 | + let builders = [ |
| 173 | + OpBuilder<(ins "Value":$source, "Value":$dest, |
| 174 | + "ArrayRef<int64_t>":$innerDimsPos, |
| 175 | + "ArrayRef<OpFoldResult>":$innerTiles, |
| 176 | + CArg<"std::optional<Value>", "std::nullopt">:$paddingValue, |
| 177 | + CArg<"ArrayRef<int64_t>", "{}">:$outerDimsPerm)> |
| 178 | + ]; |
| 179 | + |
| 180 | + let extraClassDeclaration = commonExtraClassDeclaration # [{ |
| 181 | + // Method to get the shape of the result as `SmallVector<OpFoldResult>`. |
| 182 | + // This is a static method to allow getting the shape of the destination |
| 183 | + // expected while creating a `pack` op. |
| 184 | + static SmallVector<OpFoldResult> getResultShape(OpBuilder &builder, |
| 185 | + Location loc, ArrayRef<OpFoldResult> sourceDims, |
| 186 | + ArrayRef<OpFoldResult> innerTileDims, ArrayRef<int64_t> innerDimsPos, |
| 187 | + ArrayRef<int64_t> outerDimsPerm = {}); |
| 188 | + |
| 189 | + // Method to get the `RankedTensorType` of the result based on the inner |
| 190 | + // tiles, position of the inner tiles (innerDimsPos) and interchange vector |
| 191 | + // of outer loops (outerDimsPerm). |
| 192 | + static RankedTensorType inferPackedType(RankedTensorType sourceType, |
| 193 | + ArrayRef<int64_t> innerTileSizes, ArrayRef<int64_t> innerDimsPos, |
| 194 | + ArrayRef<int64_t> outerDimsPerm = {}); |
| 195 | + |
| 196 | + // Returns true if we have enough static information to catch undefined |
| 197 | + // behavior when the tile size does not divide perfectly the dimension of |
| 198 | + // the input tensor. Detecting UB requires that the input size and either |
| 199 | + // corresponding tile or output size are static. |
| 200 | + static bool requirePaddingValue(ArrayRef<int64_t> inputShape, |
| 201 | + ArrayRef<int64_t> innerDimsPos, |
| 202 | + ArrayRef<int64_t> outputShape, |
| 203 | + ArrayRef<int64_t> outerDimsPerm, |
| 204 | + ArrayRef<OpFoldResult> innerTiles); |
| 205 | + |
| 206 | + static Value createDestinationTensor(OpBuilder &b, Location loc, |
| 207 | + Value source, ArrayRef<OpFoldResult> innerTileSizes, |
| 208 | + ArrayRef<int64_t> innerDimsPos, ArrayRef<int64_t> outerDimsPerm); |
| 209 | + |
| 210 | + /// Build and return a new PackOp that is a clone of the current PackOp with |
| 211 | + /// (innerDimsPos, innerTiles) (resp. outerDimsPerm) are permuted by |
| 212 | + /// innerPermutation (resp. outerPermutation). |
| 213 | + /// A new `tensor.empty` of the proper shape is built in the process. |
| 214 | + /// Asserts that: |
| 215 | + /// - At least one of innerPermutation or outerPermutation is non-empty. |
| 216 | + /// - If not empty, innerPermutation is a valid permutation of size |
| 217 | + /// matching innerDimPos. |
| 218 | + /// - If not empty, outerPermutation is a valid permutation of size |
| 219 | + /// matching outerDimsPerm. |
| 220 | + PackOp createTransposedClone(OpBuilder &b, |
| 221 | + Location loc, |
| 222 | + ArrayRef<int64_t> innerPermutation, |
| 223 | + ArrayRef<int64_t> outerPermutation); |
| 224 | + |
| 225 | + /// Check if this PackOp is like a simple pad operation. |
| 226 | + /// In other words, this operation: |
| 227 | + /// 1. adds useless dimensions (dimension of size 1), |
| 228 | + /// 2. pads the other ones, and |
| 229 | + /// 3. doesn't shuffle the dimensions |
| 230 | + bool isLikePad(); |
| 231 | + }]; |
| 232 | + |
| 233 | + let hasCanonicalizeMethod = 1; |
| 234 | + |
| 235 | + let hasFolder = 1; |
| 236 | +} |
| 237 | + |
| 238 | +//===----------------------------------------------------------------------===// |
| 239 | +// UnPackOp |
| 240 | +//===----------------------------------------------------------------------===// |
| 241 | + |
| 242 | +def Linalg_UnPackOp : Linalg_RelayoutOp<"unpack"> { |
| 243 | + let summary = "linalg.unpack operation"; |
| 244 | + let description = [{ |
| 245 | + The "unpack" operation converts a source tensor of rank `n` with a tiled and |
| 246 | + packed layout to a result tensor of rank `n - k`. |
| 247 | + |
| 248 | + `inner_dims_pos` (mandatory) specifies `k` source tensor dimensions with |
| 249 | + which the last `k` source tensor dimensions are combined, where |
| 250 | + `0 < k <= n/2`. Each `inner_dims_pos` element must be `>= 0` and `< n - k`. |
| 251 | + The order of the dimensions in `inner_dims_pos` matters: dimension |
| 252 | + `inner_dims_pos[i]` is combined with dimension `n - k + i` (assuming that |
| 253 | + `outer_dims_perm` is not specified). |
| 254 | + |
| 255 | + `inner_tiles` (mandatory) specifies `k` tile sizes. These tile sizes |
| 256 | + correspond to the least significant ("inner") source tensor dimension sizes. |
| 257 | + The behavior of this op is undefined if: |
| 258 | + - `inner_tiles` do not exactly match with the corresponding source tensor |
| 259 | + dimension sizes. |
| 260 | + - Or, `inner_tiles[i]` does not divide the size of dimension |
| 261 | + `inner_dims_pos[i]` (assuming that `outer_dims_perm` is not specified) |
| 262 | + evenly. |
| 263 | + |
| 264 | + `outer_dims_perm` (optional) specifies a permutation for the outer |
| 265 | + dimensions. If specified, it must have `n - k` elements. If specified, this |
| 266 | + permutation is applied before combining any dimensions. |
| 267 | + |
| 268 | + Example: |
| 269 | + |
| 270 | + ```mlir |
| 271 | + // NCnc to NC: |
| 272 | + %0 = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [8, 32] |
| 273 | + into %dest : tensor<16x8x8x32xf32> -> tensor<128x256xf32> |
| 274 | + |
| 275 | + // CK to KCck: |
| 276 | + %0 = linalg.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] |
| 277 | + inner_tiles = [8, 32] into %dest |
| 278 | + : tensor<8x16x8x32xf32> -> tensor<128x256xf32> |
| 279 | + ``` |
| 280 | + }]; |
| 281 | + let arguments = (ins AnyRankedTensor:$source, |
| 282 | + AnyRankedTensor:$dest, |
| 283 | + DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$outer_dims_perm, |
| 284 | + DenseI64ArrayAttr:$inner_dims_pos, |
| 285 | + Variadic<Index>:$inner_tiles, |
| 286 | + DenseI64ArrayAttr:$static_inner_tiles); |
| 287 | + let results = (outs AnyRankedTensor:$result); |
| 288 | + let assemblyFormat = [{ |
| 289 | + $source |
| 290 | + (`outer_dims_perm` `=` $outer_dims_perm^)? |
| 291 | + `inner_dims_pos` `=` $inner_dims_pos |
| 292 | + `inner_tiles` `=` |
| 293 | + custom<DynamicIndexList>($inner_tiles, $static_inner_tiles) |
| 294 | + `into` $dest attr-dict `:` type($source) `->` type($dest) |
| 295 | + }]; |
| 296 | + |
| 297 | + let builders = [ |
| 298 | + OpBuilder<(ins "Value":$source, "Value":$dest, |
| 299 | + "ArrayRef<int64_t>":$innerDimsPos, |
| 300 | + "ArrayRef<OpFoldResult>":$innerTiles, |
| 301 | + CArg<"ArrayRef<int64_t>", "{}">:$outerDimsPerm)> |
| 302 | + ]; |
| 303 | + |
| 304 | + let extraClassDeclaration = commonExtraClassDeclaration # [{ |
| 305 | + static Value createDestinationTensor(OpBuilder &b, Location loc, |
| 306 | + Value source, ArrayRef<OpFoldResult> innerTileSizes, |
| 307 | + ArrayRef<int64_t> innerDimsPos, ArrayRef<int64_t> outerDimsPerm); |
| 308 | + |
| 309 | + /// Build and return a new UnPackOp that is a clone of the current UnPackOp |
| 310 | + /// with (innerDimsPos, innerTiles) (resp. outerDimsPerm) are permuted by |
| 311 | + /// innerPermutation (resp. outerPermutation). |
| 312 | + /// Asserts that: |
| 313 | + /// - At least one of innerPermutation or outerPermutation is non-empty. |
| 314 | + /// - If not empty, innerPermutation is a valid permutation of size |
| 315 | + /// matching innerDimPos. |
| 316 | + /// - If not empty, outerPermutation is a valid permutation of size |
| 317 | + /// matching outerDimsPerm. |
| 318 | + UnPackOp createTransposedClone(OpBuilder &b, |
| 319 | + Location loc, |
| 320 | + Value transposedSource, |
| 321 | + ArrayRef<int64_t> innerPermutation, |
| 322 | + ArrayRef<int64_t> outerPermutation); |
| 323 | + |
| 324 | + /// Check if this UnPackOp is like a simple unpad operation. |
| 325 | + /// In other words, this operation: |
| 326 | + /// 1. drops useless dimensions (dimension of size 1), and |
| 327 | + /// 2. reduces dimensions in place (i.e., no transpose.) |
| 328 | + bool isLikeUnPad(); |
| 329 | + }]; |
| 330 | + |
| 331 | + let hasCanonicalizeMethod = 1; |
| 332 | + |
| 333 | + let hasFolder = 1; |
| 334 | +} |
| 335 | + |
| 336 | +#endif // LINALG_RELEAYOUT_OPS |
0 commit comments