@@ -1753,42 +1753,71 @@ def Tensor_PackOp : Tensor_RelayoutOp<"pack", [
1753
1753
AttrSizedOperandSegments]> {
1754
1754
let summary = "tensor pack operation";
1755
1755
let description = [{
1756
- The pack operation converts an input tensor to a higher-dimensional tensor
1757
- with a tiled and packed layout. The mandatory `inner_dims_pos` attribute
1758
- specifies a permutation for the original dimensions, while `inner_tiles` is the
1759
- tiling factor for each dimension. The optional attribute `outer_dims_perm`
1760
- specifies the order for the tiled data dimension, while the attribute
1761
- `padding_value` specifies a padding value at the boundary on non-perfectly
1762
- divisible dimensions. Padding is optional:
1763
- - If absent, it is UB if the tile does not perfectly divide the dimension.
1764
- - If present, it will pad along high dimensions (high-padding) to make the
1765
- tile complete.
1766
-
1767
- Example NC_to_NCnc:
1756
+ The "pack" operation converts a source tensor of rank `n` into a result
1757
+ tensor of rank `n + k` with a tiled and packed layout (maybe with padding)
1758
+ and optionally transposes the tiled source tensor dimensions.
1759
+
1760
+ `inner_dims_pos` (mandatory) specifies `k` source tensor dimensions that are
1761
+ being tiled, where `0 < k <= n`. The order of the dimensions matters: the
1762
+ tiled dimensions (of size `inner_tiles`) are added to the end of the result
1763
+ tensor in the order in which they appear in `inner_dims_pos`.
1764
+
1765
+ `inner_dims_pos` (mandatory) specifies `k` source tensor dimensions that are
1766
+ being tiled, where `0 < k <= n`. The order of the dimensions matters:
1767
+ `inner_dims_pos[i]` specifies the source tensor dimension tiled by
1768
+ `inner_tiles[i]`.
1769
+
1770
+ `inner_tiles` (mandatory) specifies `k` tile sizes. These tile sizes
1771
+ correspond to the least significant ("inner") result tensor dimension sizes,
1772
+ in the same order. Tile sizes can be static or dynamic.
1773
+
1774
+ Example: If `inner_tiles = [16, 32]`, the result tensor has a shape of
1775
+ `...x16x32`. If `inner_dims_pos = [0, 1]`, the 0th source dimension is tiled
1776
+ by 16 and the 1st source dimension is tiled by 32. Other source dimensions
1777
+ (if any) are not tiled. If `inner_dims_pos = [1, 0]`, the 1st dimension is
1778
+ tiled by 16 and the 0th dimension is tiled by 32.
1768
1779
1780
+ Example:
1769
1781
```mlir
1770
- %0 = tensor.pack %source inner_dims_pos = [0, 1]
1771
- inner_tiles = [8, 32] into %dest : tensor<128x256xf32> -> tensor<16x8x8x32xf32>
1782
+ // NC to NCnc
1783
+ %0 = tensor.pack %source inner_dims_pos = [0, 1] inner_tiles = [8, 32]
1784
+ into %dest : tensor<128x256xf32> -> tensor<16x8 x 8x32 xf32>
1785
+ // \ / \ /
1786
+ // outer dims inner dims
1772
1787
```
1773
- Example CK to KCck
1774
1788
1789
+ `outer_dims_perm` (optional) specifies a permutation for the outer
1790
+ dimensions. If specified, it must have `n` elements.
1791
+
1792
+ Example:
1775
1793
```mlir
1794
+ // CK to KCck
1776
1795
%0 = tensor.pack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1]
1777
- inner_tiles = [8, 32] into %dest : tensor<128x256xf32> -> tensor<8x16x8x32xf32>
1796
+ inner_tiles = [8, 32] into %dest
1797
+ : tensor<128x256xf32> -> tensor<8x16 x 8x32 xf32>
1798
+ // \ /
1799
+ // compare with "NC to NCnc": outer dims are transposed
1778
1800
```
1779
1801
1780
- In all cases, dimension at position 0 in the input tensor (128) is tiled
1781
- with a factor of 8, while dimension at position 1 (256) is tiled with a factor
1782
- of 32. In the second example, the outer data dimensions are interchanged
1783
- according to `outer_dims_perm`.
1784
-
1785
- Example NC_to_NCnc with padding:
1802
+ `padding_value` specifies a padding value at the boundary on non-perfectly
1803
+ divisible dimensions. Padding is optional:
1804
+ - If absent, it is UB if the tile does not perfectly divide the dimension.
1805
+ - If present, it will pad along high dimensions (high-padding) to make the
1806
+ tile complete.
1786
1807
1808
+ Example:
1787
1809
```mlir
1788
- %0 = tensor.pack %arg padding_value(%pad : f32) inner_dims_pos = [0, 1]
1789
- inner_tiles = [8, 2] into %arg1 : tensor<13x15xf32> -> tensor<2x8x8x2xf32>
1810
+ %0 = tensor.pack %arg0 padding_value(%pad : f32) outer_dims_perm = [2, 1, 0]
1811
+ inner_dims_pos = [1] inner_tiles = [2] into %arg1
1812
+ : tensor<200x127x256xf32> -> tensor<256x64x200x2xf32>
1813
+ // \
1814
+ // padded and tiled dim
1815
+ //
1816
+ // Source dimension 1 is tiled. 64 does not divide 127 evenly, so 1 padded
1817
+ // element is added at the end.
1818
+ //
1819
+ // Note: Only tiled dimensions can be padded.
1790
1820
```
1791
-
1792
1821
}];
1793
1822
let arguments = (ins AnyRankedTensor:$source,
1794
1823
AnyRankedTensor:$dest,
@@ -1880,25 +1909,40 @@ def Tensor_PackOp : Tensor_RelayoutOp<"pack", [
1880
1909
def Tensor_UnPackOp : Tensor_RelayoutOp<"unpack"> {
1881
1910
let summary = "tensor unpack operation";
1882
1911
let description = [{
1883
- The unpack operation converts a tensor with a tiled and packed layout to a
1884
- lower-dimensional tensor. Similar to `pack`, the mandatory attributes
1885
- `inner_dims_pos` specifies a permutation for the inner data dimensions, while
1886
- `inner_tiles` is the tiling factor. The attribute `outer_dims_perm` has the
1887
- exact behavior as the one described in `pack`. In `unpack`, it is UB if the
1888
- tile does not perfectly divide the dimension.
1912
+ The "unpack" operation converts a source tensor of rank `n` with a tiled and
1913
+ packed layout to a result tensor of rank `n - k`.
1914
+
1915
+ `inner_dims_pos` (mandatory) specifies `k` source tensor dimensions with
1916
+ which the last `k` source tensor dimensions are combined, where
1917
+ `0 < k <= n/2`. Each `inner_dims_pos` element must be `>= 0` and `< n - k`.
1918
+ The order of the dimensions in `inner_dims_pos` matters: dimension
1919
+ `inner_dims_pos[i]` is combined with dimension `n - k + i` (assuming that
1920
+ `outer_dims_perm` is not specified).
1921
+
1922
+ `inner_tiles` (mandatory) specifies `k` tile sizes. These tile sizes
1923
+ correspond to the least significant ("inner") source tensor dimension sizes.
1924
+ The behavior of this op is undefined if:
1925
+ - `inner_tiles` do not exactly match with the corresponding source tensor
1926
+ dimension sizes.
1927
+ - Or, `inner_tiles[i]` does not divide the size of dimension
1928
+ `inner_dims_pos[i]` (assuming that `outer_dims_perm` is not specified)
1929
+ evenly.
1930
+
1931
+ `outer_dims_perm` (optional) specifies a permutation for the outer
1932
+ dimensions. If specified, it must have `n - k` elements. If specified, this
1933
+ permutation is applied before combining any dimensions.
1889
1934
1890
- Example NCnc_to_NC:
1891
-
1892
- ```mlir
1893
- %0 = tensor.unpack %source inner_dims_pos = [0, 1]
1894
- inner_tiles = [8, 32] into %dest : tensor<16x8x8x32xf32> -> tensor<128x256xf32>
1895
- ```
1896
-
1897
- Example CK to KCck:
1935
+ Example:
1898
1936
1899
1937
```mlir
1900
- %0 = tensor.unapck %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1]
1901
- inner_tiles = [8, 32] into %dest : tensor<8x16x8x32xf32> -> tensor<128x256xf32>
1938
+ // NCnc to NC:
1939
+ %0 = tensor.unpack %source inner_dims_pos = [0, 1] inner_tiles = [8, 32]
1940
+ into %dest : tensor<16x8x8x32xf32> -> tensor<128x256xf32>
1941
+
1942
+ // CK to KCck:
1943
+ %0 = tensor.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1]
1944
+ inner_tiles = [8, 32] into %dest
1945
+ : tensor<8x16x8x32xf32> -> tensor<128x256xf32>
1902
1946
```
1903
1947
}];
1904
1948
let arguments = (ins AnyRankedTensor:$source,
0 commit comments