@@ -19,13 +19,14 @@ func.func @simple_KCRS_to_KCRSsr(%arg0: tensor<1x1x32x8xf32>, %arg1: tensor<1x1x
19
19
20
20
// -----
21
21
22
- func.func @simple_pad_and_pack (%input: tensor <5 x1 xf32 >, %output: tensor <1 x1 x8 x2 xf32 >, %pad: f32 ) -> tensor <1 x1 x8 x2 xf32 > {
22
+ func.func @simple_pad_and_pack_static_tiles (%input: tensor <5 x1 xf32 >, %output: tensor <1 x1 x8 x2 xf32 >, %pad: f32 ) -> tensor <1 x1 x8 x2 xf32 > {
23
23
%0 = tensor.pack %input padding_value (%pad : f32 ) inner_dims_pos = [0 , 1 ] inner_tiles = [8 , 2 ] into %output : tensor <5 x1 xf32 > -> tensor <1 x1 x8 x2 xf32 >
24
24
return %0 : tensor <1 x1 x8 x2 xf32 >
25
25
}
26
26
// CHECK: #[[$ATTR_0:.+]] = affine_map<()[s0] -> (s0 - 5)>
27
+ // CHECK: #[[$ATTR_1:.+]] = affine_map<()[s0] -> (s0 - 1)>
27
28
28
- // CHECK-LABEL: func.func @simple_pad_and_pack
29
+ // CHECK-LABEL: func.func @simple_pad_and_pack_static_tiles
29
30
// CHECK-SAME: %[[SRC:[a-zA-Z0-9]+]]
30
31
// CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]]
31
32
// CHECK-SAME: %[[PAD_VAL:[a-zA-Z0-9]+]]
@@ -36,18 +37,18 @@ func.func @simple_pad_and_pack(%input: tensor<5x1xf32>, %output: tensor<1x1x8x2x
36
37
// CHECK-SAME: [0, 0, 0, 0] [1, 1, 8, 2] [1, 1, 1, 1]
37
38
// CHECK: return %[[INSERT]]
38
39
39
- /// Same as example above, but with dynamic tile size.
40
+ /// Same as example above, but with 1 dynamic tile size.
40
41
41
- func.func @simple_pad_and_pack_dynamic (%input: tensor <5 x1 xf32 >, %output: tensor <1 x1 x?x2 xf32 >, %pad: f32 , %high: index ) -> tensor <1 x1 x?x2 xf32 > {
42
+ func.func @simple_pad_and_pack_dynamic_tile (%input: tensor <5 x1 xf32 >, %output: tensor <1 x1 x?x2 xf32 >, %pad: f32 , %high: index ) -> tensor <1 x1 x?x2 xf32 > {
42
43
%0 = tensor.pack %input padding_value (%pad : f32 ) inner_dims_pos = [0 , 1 ] inner_tiles = [%high , 2 ] into %output : tensor <5 x1 xf32 > -> tensor <1 x1 x?x2 xf32 >
43
44
return %0 : tensor <1 x1 x?x2 xf32 >
44
45
}
45
46
46
- // CHECK-LABEL: func.func @simple_pad_and_pack_dynamic (
47
+ // CHECK-LABEL: func.func @simple_pad_and_pack_dynamic_tile (
47
48
// CHECK-SAME: %[[SRC:[a-zA-Z0-9]+]]
48
49
// CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]]
49
50
// CHECK-SAME: %[[PAD_VAL:[a-zA-Z0-9]+]]
50
- // CHECK-SAME: %[[HIGH_VAL:.* ]]: index) -> tensor<1x1x?x2xf32> {
51
+ // CHECK-SAME: %[[HIGH_VAL:[a-zA-Z0-9]+ ]]: index) -> tensor<1x1x?x2xf32> {
51
52
// CHECK: %[[C2:.*]] = arith.constant 2 : index
52
53
// CHECK: %[[PAD_HIGH:.*]] = affine.apply #[[$ATTR_0]](){{\[}}%[[HIGH_VAL]]]
53
54
// CHECK: %[[PAD:.*]] = tensor.pad %[[SRC]] low[0, 0] high{{\[}}%[[PAD_HIGH]], 1] {
@@ -58,21 +59,21 @@ func.func @simple_pad_and_pack_dynamic(%input: tensor<5x1xf32>, %output: tensor<
58
59
// CHECK: %[[RES:.*]] = tensor.insert_slice %[[SLICE]] into %[[DEST]][0, 0, 0, 0] [1, 1, %[[DIM]], 2] [1, 1, 1, 1] : tensor<?x2xf32> into tensor<1x1x?x2xf32>
59
60
// CHECK: return %[[RES]] : tensor<1x1x?x2xf32>
60
61
61
- /// Same as example above, but with scalable tile size.
62
+ /// Same as example above, but with 1 scalable tile size.
62
63
63
64
/// NOTE: For this example to make sense in practice, the "?" in the output shape
64
65
/// should effectively be 8 * vector.vscale (and that's what tensor.dim
65
66
/// below should return).
66
67
67
- func.func @simple_pad_and_pack_scalable (%input: tensor <5 x1 xf32 >, %output: tensor <1 x1 x?x2 xf32 >, %pad: f32 ) -> tensor <1 x1 x?x2 xf32 > {
68
+ func.func @simple_pad_and_pack_scalable_tile (%input: tensor <5 x1 xf32 >, %output: tensor <1 x1 x?x2 xf32 >, %pad: f32 ) -> tensor <1 x1 x?x2 xf32 > {
68
69
%c8 = arith.constant 8 : index
69
70
%vscale = vector.vscale
70
71
%c8_vscale = arith.muli %vscale , %c8 : index
71
72
%0 = tensor.pack %input padding_value (%pad : f32 ) inner_dims_pos = [0 , 1 ] inner_tiles = [%c8_vscale , 2 ] into %output : tensor <5 x1 xf32 > -> tensor <1 x1 x?x2 xf32 >
72
73
return %0 : tensor <1 x1 x?x2 xf32 >
73
74
}
74
75
75
- // CHECK-LABEL: func.func @simple_pad_and_pack_scalable (
76
+ // CHECK-LABEL: func.func @simple_pad_and_pack_scalable_tile (
76
77
// CHECK-SAME: %[[SRC:[a-zA-Z0-9]+]]: tensor<5x1xf32>,
77
78
// CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]]: tensor<1x1x?x2xf32>,
78
79
// CHECK-SAME: %[[PAD_VAL:[a-zA-Z0-9]+]]: f32) -> tensor<1x1x?x2xf32> {
@@ -89,6 +90,31 @@ func.func @simple_pad_and_pack_scalable(%input: tensor<5x1xf32>, %output: tensor
89
90
// CHECK: %[[RES:.+]] = tensor.insert_slice %[[SLICE]] into %[[DEST]][0, 0, 0, 0] [1, 1, %[[DIM]], 2] [1, 1, 1, 1] : tensor<?x2xf32> into tensor<1x1x?x2xf32>
90
91
// CHECK: return %[[RES]] : tensor<1x1x?x2xf32>
91
92
93
+ /// Same as example above, but with both tile sizes dynamic.
94
+
95
+ func.func @simple_pad_and_pack_dynamic_tiles (%input: tensor <5 x1 xf32 >, %output: tensor <1 x1 x?x?xf32 >, %pad: f32 , %high_1: index , %high_2: index ) -> tensor <1 x1 x?x?xf32 > {
96
+ %0 = tensor.pack %input padding_value (%pad : f32 ) inner_dims_pos = [0 , 1 ] inner_tiles = [%high_1 , %high_2 ] into %output : tensor <5 x1 xf32 > -> tensor <1 x1 x?x?xf32 >
97
+ return %0 : tensor <1 x1 x?x?xf32 >
98
+ }
99
+ // CHECK-LABEL: func.func @simple_pad_and_pack_dynamic_tiles(
100
+ // CHECK-SAME: %[[SRC:[a-zA-Z0-9]+]]: tensor<5x1xf32>,
101
+ // CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]]: tensor<1x1x?x?xf32>,
102
+ // CHECK-SAME: %[[PAD_VAL:[a-zA-Z0-9]+]]: f32,
103
+ // CHECK-SAME: %[[HIGH_VAL_1:[a-zA-Z0-9]+]]: index,
104
+ // CHECK-SAME: %[[HIGH_VAL_2:[a-zA-Z0-9]+]]: index) -> tensor<1x1x?x?xf32> {
105
+ // CHECK: %[[C3:.*]] = arith.constant 3 : index
106
+ // CHECK: %[[C2:.*]] = arith.constant 2 : index
107
+ // CHECK: %[[PAD_HIGH_1:.*]] = affine.apply #[[$ATTR_0]](){{\[}}%[[HIGH_VAL_1]]]
108
+ // CHECK: %[[PAD_HIGH_2:.*]] = affine.apply #[[$ATTR_1]](){{\[}}%[[HIGH_VAL_2]]]
109
+ // CHECK: %[[PAD:.*]] = tensor.pad %[[SRC]] low[0, 0] high{{\[}}%[[PAD_HIGH_1]], %[[PAD_HIGH_2]]] {
110
+ // CHECK: tensor.yield %[[PAD_VAL]] : f32
111
+ // CHECK-NOT: linalg.transpose
112
+ // CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[PAD:.*]][0, 0] {{\[}}%[[HIGH_VAL_1]], %[[HIGH_VAL_2]]] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
113
+ // CHECK: %[[DIM_1:.*]] = tensor.dim %[[DEST]], %[[C2]] : tensor<1x1x?x?xf32>
114
+ // CHECK: %[[DIM_2:.*]] = tensor.dim %[[DEST]], %[[C3]] : tensor<1x1x?x?xf32>
115
+ // CHECK: %[[RES:.*]] = tensor.insert_slice %[[SLICE]] into %[[DEST]][0, 0, 0, 0] [1, 1, %[[DIM_1]], %[[DIM_2]]] [1, 1, 1, 1] : tensor<?x?xf32> into tensor<1x1x?x?xf32>
116
+ // CHECK: return %[[RES]] : tensor<1x1x?x?xf32>
117
+
92
118
// -----
93
119
94
120
func.func @simple_NC_to_CNnc (%arg0: tensor <32 x8 xf32 >, %arg1: tensor <1 x1 x32 x8 xf32 >) -> tensor <1 x1 x32 x8 xf32 >{
0 commit comments