@@ -142,3 +142,83 @@ module attributes {transform.with_named_sequence} {
142
142
}
143
143
}
144
144
145
+ // -----
146
+
147
+ func.func @vectorize_dynamic_reduction_1d (%arg0: tensor <?xf32 >,
148
+ %arg1: tensor <f32 >) -> tensor <f32 > {
149
+
150
+ %0 = linalg.reduce ins (%arg0 : tensor <?xf32 >) outs (%arg1 : tensor <f32 >) dimensions = [0 ]
151
+ (%in: f32 , %init: f32 ) {
152
+ %0 = arith.addf %in , %init : f32
153
+ linalg.yield %0 : f32
154
+ }
155
+ return %0 : tensor <f32 >
156
+ }
157
+
158
+ // CHECK-LABEL: func.func @vectorize_dynamic_reduction_1d(
159
+ // CHECK-SAME: %[[ARG_0:.*]]: tensor<?xf32>, %[[ARG_1:.*]]: tensor<f32>) -> tensor<f32> {
160
+ // CHECK: %[[VAL_0:.*]] = arith.constant 0 : index
161
+ // CHECK: %[[VAL_1:.*]] = tensor.dim %[[ARG_0]], %[[VAL_0]] : tensor<?xf32>
162
+ // CHECK: %[[VAL_2:.*]] = arith.constant 0 : index
163
+ // CHECK: %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f32
164
+ // CHECK: %[[VAL_4:.*]] = vector.create_mask %[[VAL_1]] : vector<[4]xi1>
165
+ // CHECK: %[[VAL_5:.*]] = vector.mask %[[VAL_4]] { vector.transfer_read %[[ARG_0]][%[[VAL_2]]], %[[VAL_3]] {in_bounds = [true]} : tensor<?xf32>, vector<[4]xf32> } : vector<[4]xi1> -> vector<[4]xf32>
166
+ // CHECK: %[[VAL_6:.*]] = arith.constant 0.000000e+00 : f32
167
+ // CHECK: %[[VAL_7:.*]] = vector.transfer_read %[[ARG_1]][], %[[VAL_6]] : tensor<f32>, vector<f32>
168
+ // CHECK: %[[VAL_8:.*]] = vector.extractelement %[[VAL_7]][] : vector<f32>
169
+ // CHECK: %[[VAL_9:.*]] = vector.mask %[[VAL_4]] { vector.multi_reduction <add>, %[[VAL_5]], %[[VAL_8]] [0] : vector<[4]xf32> to f32 } : vector<[4]xi1> -> f32
170
+ // CHECK: %[[VAL_10:.*]] = vector.broadcast %[[VAL_9]] : f32 to vector<f32>
171
+ // CHECK: %[[VAL_11:.*]] = vector.transfer_write %[[VAL_10]], %[[ARG_1]][] : vector<f32>, tensor<f32>
172
+ // CHECK: return %[[VAL_11]] : tensor<f32>
173
+ // CHECK: }
174
+
175
+ module attributes {transform.with_named_sequence } {
176
+ transform.named_sequence @__transform_main (%arg1: !transform.any_op {transform.readonly }) {
177
+ %0 = transform.structured.match ops {[" linalg.reduce" ]} in %arg1 : (!transform.any_op ) -> !transform.any_op
178
+ transform.structured.vectorize %0 vector_sizes [[4 ]] : !transform.any_op
179
+ transform.yield
180
+ }
181
+ }
182
+
183
+ // -----
184
+
185
+ func.func @vectorize_dynamic_reduction_2d (%arg0: tensor <?x?xf32 >,
186
+ %arg1: tensor <?xf32 >) -> tensor <?xf32 > {
187
+ %0 = linalg.generic { index ing_maps = [affine_map <(d0 , d1 ) -> (d0 , d1 )>,
188
+ affine_map <(d0 , d1 ) -> (d0 )>],
189
+ iterator_types = [" parallel" , " reduction" ] }
190
+ ins (%arg0 : tensor <?x?xf32 >)
191
+ outs (%arg1 : tensor <?xf32 >) {
192
+ ^bb (%in: f32 , %out: f32 ) :
193
+ %0 = arith.addf %in , %out : f32
194
+ linalg.yield %0 : f32
195
+ } -> tensor <?xf32 >
196
+ return %0 : tensor <?xf32 >
197
+ }
198
+
199
+ // CHECK-LABEL: func.func @vectorize_dynamic_reduction_2d(
200
+ // CHECK-SAME: %[[ARG_0:.*]]: tensor<?x?xf32>, %[[ARG_1:.*]]: tensor<?xf32>) -> tensor<?xf32> {
201
+ // CHECK: %[[VAL_0:.*]] = arith.constant 0 : index
202
+ // CHECK: %[[VAL_1:.*]] = tensor.dim %[[ARG_0]], %[[VAL_0]] : tensor<?x?xf32>
203
+ // CHECK: %[[VAL_2:.*]] = arith.constant 1 : index
204
+ // CHECK: %[[VAL_3:.*]] = tensor.dim %[[ARG_0]], %[[VAL_2]] : tensor<?x?xf32>
205
+ // CHECK: %[[VAL_4:.*]] = arith.constant 0 : index
206
+ // CHECK: %[[VAL_5:.*]] = arith.constant 0.000000e+00 : f32
207
+ // CHECK: %[[VAL_6:.*]] = vector.create_mask %[[VAL_1]], %[[VAL_3]] : vector<1x[4]xi1>
208
+ // CHECK: %[[VAL_7:.*]] = vector.mask %[[VAL_6]] { vector.transfer_read %[[ARG_0]][%[[VAL_4]], %[[VAL_4]]], %[[VAL_5]] {in_bounds = [true, true]} : tensor<?x?xf32>, vector<1x[4]xf32> } : vector<1x[4]xi1> -> vector<1x[4]xf32>
209
+ // CHECK: %[[VAL_8:.*]] = arith.constant 0.000000e+00 : f32
210
+ // CHECK: %[[VAL_9:.*]] = vector.create_mask %[[VAL_1]] : vector<1xi1>
211
+ // CHECK: %[[VAL_10:.*]] = vector.mask %[[VAL_9]] { vector.transfer_read %[[ARG_1]][%[[VAL_4]]], %[[VAL_8]] {in_bounds = [true]} : tensor<?xf32>, vector<1xf32> } : vector<1xi1> -> vector<1xf32>
212
+ // CHECK: %[[VAL_11:.*]] = vector.mask %[[VAL_6]] { vector.multi_reduction <add>, %[[VAL_7]], %[[VAL_10]] [1] : vector<1x[4]xf32> to vector<1xf32> } : vector<1x[4]xi1> -> vector<1xf32>
213
+ // CHECK: %[[VAL_12:.*]] = arith.constant 0 : index
214
+ // CHECK: %[[VAL_13:.*]] = vector.mask %[[VAL_9]] { vector.transfer_write %[[VAL_11]], %[[ARG_1]][%[[VAL_12]]] {in_bounds = [true]} : vector<1xf32>, tensor<?xf32> } : vector<1xi1> -> tensor<?xf32>
215
+ // CHECK: return %[[VAL_13]] : tensor<?xf32>
216
+ // CHECK: }
217
+
218
+ module attributes {transform.with_named_sequence } {
219
+ transform.named_sequence @__transform_main (%arg1: !transform.any_op {transform.readonly }) {
220
+ %0 = transform.structured.match ops {[" linalg.generic" ]} in %arg1 : (!transform.any_op ) -> !transform.any_op
221
+ transform.structured.vectorize %0 vector_sizes [1 , [4 ]] : !transform.any_op
222
+ transform.yield
223
+ }
224
+ }
0 commit comments