@@ -144,8 +144,6 @@ func.func @transfer_read_of_extract_slice_swappy_rank_reducing(%t : tensor<?x?x?
144
144
145
145
// -----
146
146
147
- // CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1] -> (s0 + s1)>
148
-
149
147
// CHECK: func @fold_vector_transfer_write_with_rank_reduced_insert_slice
150
148
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?x?xf32>
151
149
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: vector<4xf32>
@@ -155,18 +153,16 @@ func.func @transfer_read_of_extract_slice_swappy_rank_reducing(%t : tensor<?x?x?
155
153
// CHECK-SAME: %[[ARG5:[a-zA-Z0-9]+]]: index
156
154
// CHECK-SAME: %[[ARG6:[a-zA-Z0-9]+]]: index
157
155
// CHECK-SAME: %[[ARG7:[a-zA-Z0-9]+]]: index
156
+ // CHECK-SAME: %[[ARG8:[a-zA-Z0-9]+]]: tensor<?x?xf32>
158
157
func.func @fold_vector_transfer_write_with_rank_reduced_insert_slice (
159
158
%arg0 : tensor <?x?x?xf32 >,
160
159
%arg1 : vector <4 xf32 >, %arg2: index , %arg3 : index , %arg4 : index ,
161
160
%arg5: index , %arg6 : index , %arg7 : index ,
162
161
%st : tensor <?x?xf32 >) -> tensor <?x?x?xf32 > {
163
162
%cst = arith.constant 0.0 : f32
164
163
165
- // CHECK-NOT: insert_slice
166
- // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
167
- // CHECK-DAG: %[[IDX0:.+]] = affine.apply #[[MAP1]]()[%[[ARG2]], %[[ARG6]]]
168
- // CHECK-DAG: %[[IDX1:.+]] = affine.apply #[[MAP1]]()[%[[ARG3]], %[[ARG7]]]
169
- // CHECK-DAG: vector.transfer_write %[[ARG1]], %[[ARG0]][%[[C0]], %[[IDX0]], %[[IDX1]]] {in_bounds = [true]} : vector<4xf32>, tensor<?x?x?xf32
164
+ // CHECK-DAG: %[[r1:.*]] = vector.transfer_write %[[ARG1]], %[[ARG8]][%[[ARG6]], %[[ARG7]]] {in_bounds = [true]} : vector<4xf32>, tensor<?x?xf32>
165
+ // CHECK-DAG: %[[r2:.*]] = tensor.insert_slice %[[r1]] into %[[ARG0]][0, %[[ARG2]], %[[ARG3]]] [1, %[[ARG4]], %[[ARG5]]] [1, 1, 1] : tensor<?x?xf32> into tensor<?x?x?xf32>
170
166
%0 = vector.transfer_write %arg1 , %st [%arg6 , %arg7 ] {in_bounds = [true ]}
171
167
: vector <4 xf32 >, tensor <?x?xf32 >
172
168
%1 = tensor.insert_slice %0 into %arg0 [0 , %arg2 , %arg3 ] [1 , %arg4 , %arg5 ] [1 , 1 , 1 ]
@@ -176,9 +172,6 @@ func.func @fold_vector_transfer_write_with_rank_reduced_insert_slice(
176
172
177
173
// -----
178
174
179
- // CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1] -> (s0 + s1)>
180
- // CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d1)>
181
-
182
175
// CHECK: func @fold_vector_transfer_write_with_inner_rank_reduced_insert_slice
183
176
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?x?xf32>
184
177
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: vector<4xf32>
@@ -188,19 +181,16 @@ func.func @fold_vector_transfer_write_with_rank_reduced_insert_slice(
188
181
// CHECK-SAME: %[[ARG5:[a-zA-Z0-9]+]]: index
189
182
// CHECK-SAME: %[[ARG6:[a-zA-Z0-9]+]]: index
190
183
// CHECK-SAME: %[[ARG7:[a-zA-Z0-9]+]]: index
184
+ // CHECK-SAME: %[[ARG8:[a-zA-Z0-9]+]]: tensor<?x?xf32>
191
185
func.func @fold_vector_transfer_write_with_inner_rank_reduced_insert_slice (
192
186
%arg0 : tensor <?x?x?xf32 >,
193
187
%arg1 : vector <4 xf32 >, %arg2: index , %arg3 : index , %arg4 : index ,
194
188
%arg5: index , %arg6 : index , %arg7 : index ,
195
189
%st : tensor <?x?xf32 >) -> tensor <?x?x?xf32 > {
196
190
%cst = arith.constant 0.0 : f32
197
191
198
- // CHECK-NOT: insert_slice
199
- // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
200
- // CHECK-DAG: %[[IDX0:.+]] = affine.apply #[[MAP1]]()[%[[ARG2]], %[[ARG6]]]
201
- // CHECK-DAG: %[[IDX1:.+]] = affine.apply #[[MAP1]]()[%[[ARG3]], %[[ARG7]]]
202
- // CHECK-DAG: vector.transfer_write %[[ARG1]], %[[ARG0]][%[[IDX0]], %[[IDX1]], %[[C0]]]
203
- // CHECK-SAME: {in_bounds = [true], permutation_map = #[[MAP2]]} : vector<4xf32>, tensor<?x?x?xf32
192
+ // CHECK-DAG: %[[r1:.*]] = vector.transfer_write %[[ARG1]], %[[ARG8]][%[[ARG6]], %[[ARG7]]] {in_bounds = [true]} : vector<4xf32>, tensor<?x?xf32>
193
+ // CHECK-DAG: %[[r2:.*]] = tensor.insert_slice %[[r1]] into %[[ARG0]][%[[ARG2]], %[[ARG3]], 0] [%[[ARG4]], %[[ARG5]], 1] [1, 1, 1] : tensor<?x?xf32> into tensor<?x?x?xf32>
204
194
%0 = vector.transfer_write %arg1 , %st [%arg6 , %arg7 ] {in_bounds = [true ]}
205
195
: vector <4 xf32 >, tensor <?x?xf32 >
206
196
%1 = tensor.insert_slice %0 into %arg0 [%arg2 , %arg3 , 0 ] [%arg4 , %arg5 , 1 ] [1 , 1 , 1 ]
@@ -226,6 +216,24 @@ func.func @insert_slice_of_transfer_write(%t1 : tensor<?x12xf32>, %v : vector<5x
226
216
227
217
// -----
228
218
219
+ // This test is negative since `transfer_write` only
220
+ // writes to `5x6` of the `100x100` elements of `%arg3`
221
+ // CHECK-LABEL: func @insert_slice_of_transfer_write_overwrite_all(
222
+ // CHECK-SAME: %[[arg0:.*]]: tensor<1000x1000xf32>, %[[arg1:.*]]: vector<5x6xf32>, %[[arg2:.*]]: index, %[[arg3:.*]]: tensor<100x100xf32>
223
+ func.func @insert_slice_of_transfer_write_overwrite_all (%arg0: tensor <1000 x1000 xf32 >, %arg1: vector <5 x6 xf32 >, %arg2: index , %arg3: tensor <100 x100 xf32 >) -> tensor <1000 x1000 xf32 > {
224
+ %c0 = arith.constant 0 : index
225
+
226
+ // CHECK: %[[c0:.*]] = arith.constant 0 : index
227
+ // CHECK: %[[r1:.*]] = vector.transfer_write %[[arg1]], %[[arg3]][%[[c0]], %[[c0]]] {in_bounds = [true, true]} : vector<5x6xf32>, tensor<100x100xf32>
228
+ // CHECK: %[[r2:.*]] = tensor.insert_slice %[[r1]] into %[[arg0]][3, %[[arg2]]] [100, 100] [1, 1] : tensor<100x100xf32> into tensor<1000x1000xf32>
229
+ // CHECK: return %[[r2]] : tensor<1000x1000xf32>
230
+ %0 = vector.transfer_write %arg1 , %arg3 [%c0 , %c0 ] {in_bounds = [true , true ]} : vector <5 x6 xf32 >, tensor <100 x100 xf32 >
231
+ %inserted_slice = tensor.insert_slice %0 into %arg0 [3 , %arg2 ] [100 , 100 ] [1 , 1 ] : tensor <100 x100 xf32 > into tensor <1000 x1000 xf32 >
232
+ return %inserted_slice : tensor <1000 x1000 xf32 >
233
+ }
234
+
235
+ // -----
236
+
229
237
// CHECK-DAG: #[[$d0d2:.+]] = affine_map<(d0, d1, d2) -> (d0, d2)>
230
238
231
239
// CHECK-LABEL: func @insert_slice_of_transfer_write_swappy_rank_extending(
0 commit comments