Skip to content

Commit b558c6b

Browse files
authored
[mlir][nfc] Update vectorize-tensor-extract.mlir (4/N) (llvm#119697)
1 parent 58da789 commit b558c6b

File tree

1 file changed

+140
-138
lines changed

1 file changed

+140
-138
lines changed

mlir/test/Dialect/Linalg/vectorize-tensor-extract.mlir

Lines changed: 140 additions & 138 deletions
Original file line numberDiff line numberDiff line change
@@ -2,34 +2,9 @@
22
// RUN: -transform-preload-library='transform-library-paths=%p/td/vectorize-with-patterns.mlir' \
33
// RUN: -transform-interpreter=entry-point=vectorize_with_patterns %s | FileCheck %s
44

5-
#map0 = affine_map<(d0, d1, d2, d3) -> (d0, d2)>
6-
#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
7-
func.func @vectorize_1d_tensor_extract(%arg0: tensor<3xf32>, %arg1: tensor<4x3xi32>, %arg2: tensor<4x7x3x2xf32>) -> tensor<4x7x3x2xf32> {
8-
%1 = linalg.generic {
9-
indexing_maps = [#map0, #map1],
10-
iterator_types = ["parallel", "parallel", "parallel", "parallel"]
11-
} ins(%arg1 : tensor<4x3xi32>) outs(%arg2 : tensor<4x7x3x2xf32>) {
12-
^bb0(%arg3: i32, %arg4: f32):
13-
%2 = arith.index_cast %arg3 : i32 to index
14-
%3 = tensor.extract %arg0[%2] : tensor<3xf32>
15-
linalg.yield %3 : f32
16-
} -> tensor<4x7x3x2xf32>
17-
return %1 : tensor<4x7x3x2xf32>
18-
}
19-
// CHECK-LABEL: func.func @vectorize_1d_tensor_extract
20-
// CHECK-SAME: %[[ARG0:.*]]: tensor<3xf32>
21-
// CHECK-SAME: %[[ARG1:.*]]: tensor<4x3xi32>
22-
// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
23-
// CHECK-DAG: %[[MASK:.*]] = arith.constant dense<true> : vector<4x7x3x2xi1>
24-
// CHECK-DAG: %[[PASSTHRU:.*]] = arith.constant dense<0.000000e+00> : vector<4x7x3x2xf32>
25-
// CHECK: %[[V0:.*]] = vector.transfer_read %[[ARG1]]
26-
// CHECK: %[[CAST:.*]] = arith.index_cast %[[V0]]
27-
// CHECK: %[[BROADCAST:.*]] = vector.broadcast %[[CAST]]
28-
// CHECK: %[[INDICES:.*]] = vector.transpose %[[BROADCAST]]
29-
// CHECK: %[[GATHER:.*]] = vector.gather %[[ARG0]][%[[C0]]] [%[[INDICES]]], %[[MASK]], %[[PASSTHRU]]
30-
// CHECK: vector.transfer_write %[[GATHER]]
31-
32-
// -----
5+
//===----------------------------------------------------------------------===//
6+
// Contiguous load
7+
//===----------------------------------------------------------------------===//
338

349
#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
3510
func.func @vectorize_nd_tensor_extract_transfer_read_basic(
@@ -112,6 +87,142 @@ func.func @vectorize_nd_tensor_extract_transfer_read_complex(%6: tensor<45x80x16
11287

11388
// -----
11489

90+
// The vectorizer converts `affine.apply` so that the subsequent Ops can be vectorised based on the converted ops. Contiguous load.
91+
func.func @vectorize_nd_tensor_extract_with_affine_apply_contiguous(%6: tensor<80x16xf32>, %arg0: index, %extracted_slice : tensor<1x4xf32>) -> tensor<1x4xf32> {
92+
%c79 = arith.constant 79 : index
93+
%1 = linalg.generic {
94+
indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>],
95+
iterator_types = ["parallel", "parallel"]
96+
} outs(%extracted_slice : tensor<1x4xf32>) {
97+
^bb0(%out: f32):
98+
%2 = linalg.index 1 : index
99+
%3 = affine.apply affine_map<(d0, d1) -> (d0 + d1)>(%2, %arg0)
100+
%extracted = tensor.extract %6[%c79, %3] : tensor<80x16xf32>
101+
linalg.yield %extracted : f32
102+
} -> tensor<1x4xf32>
103+
return %1 : tensor<1x4xf32>
104+
}
105+
106+
// CHECK-LABEL: func.func @vectorize_nd_tensor_extract_with_affine_apply_contiguous(
107+
// CHECK-SAME: %[[VAL_0:.*]]: tensor<80x16xf32>,
108+
// CHECK-SAME: %[[VAL_1:.*]]: index,
109+
// CHECK-SAME: %[[VAL_2:.*]]: tensor<1x4xf32>) -> tensor<1x4xf32> {
110+
// CHECK-DAG: %[[VAL_3:.*]] = arith.constant dense<[0, 1, 2, 3]> : vector<4xindex>
111+
// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 0.000000e+00 : f32
112+
// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0 : index
113+
// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 79 : index
114+
// CHECK: %[[VAL_8:.*]] = vector.broadcast %[[VAL_1]] : index to vector<4xindex>
115+
// CHECK: %[[VAL_9:.*]] = arith.addi %[[VAL_8]], %[[VAL_3]] : vector<4xindex>
116+
// CHECK: %[[VAL_10:.*]] = vector.extract %[[VAL_9]][0] : index from vector<4xindex>
117+
// CHECK: %[[VAL_11:.*]] = vector.transfer_read %[[VAL_0]]{{\[}}%[[VAL_7]], %[[VAL_10]]], %[[VAL_5]] {in_bounds = [true, true]} : tensor<80x16xf32>, vector<1x4xf32>
118+
// CHECK: %[[VAL_12:.*]] = vector.transfer_write %[[VAL_11]], %[[VAL_2]]{{\[}}%[[VAL_6]], %[[VAL_6]]] {in_bounds = [true, true]} : vector<1x4xf32>, tensor<1x4xf32>
119+
// CHECK: return %[[VAL_12]] : tensor<1x4xf32>
120+
// CHECK: }
121+
122+
// -----
123+
124+
func.func @vectorize_nd_tensor_extract_with_tensor_extract(%input_1: tensor<1x20xi32>, %input_2: tensor<257x24xf32>, %arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index) -> tensor<1x1x4xf32> {
125+
%c0 = arith.constant 0 : index
126+
%c256 = arith.constant 256 : index
127+
%output = tensor.empty() : tensor<1x1x4xf32>
128+
%1 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} outs(%output : tensor<1x1x4xf32>) {
129+
^bb0(%out: f32):
130+
%13 = linalg.index 0 : index
131+
%14 = affine.apply affine_map<(d0, d1, d2) -> (d0 + d1 + d2)>(%arg0, %13, %arg2)
132+
%15 = linalg.index 2 : index
133+
%16 = linalg.index 1 : index
134+
%17 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 * 24 + d2 + d3)>(%arg1, %16, %15, %arg3)
135+
%extracted_0 = tensor.extract %input_1[%c0, %14] : tensor<1x20xi32>
136+
%18 = arith.index_cast %extracted_0 : i32 to index
137+
%19 = arith.maxsi %18, %c0 : index
138+
%20 = arith.minsi %19, %c256 : index
139+
%extracted_1 = tensor.extract %input_2[%20, %17] : tensor<257x24xf32>
140+
linalg.yield %extracted_1 : f32
141+
} -> tensor<1x1x4xf32>
142+
return %1 : tensor<1x1x4xf32>
143+
}
144+
145+
// CHECK-LABEL: func.func @vectorize_nd_tensor_extract_with_tensor_extract(
146+
// CHECK-SAME: %[[INPUT_1:.*]]: tensor<1x20xi32>,
147+
// CHECK-SAME: %[[INPUT_2:.*]]: tensor<257x24xf32>,
148+
// CHECK-SAME: %[[INPUT_3:.*]]: index, %[[INPUT_4:.*]]: index, %[[INPUT_5:.*]]: index,
149+
// CHECK: %[[EXTRACTED_0_IDX_0:.*]] = arith.constant 0 : index
150+
// CHECK: %[[SCALAR:.*]] = arith.addi %[[INPUT_3]], %[[INPUT_5]] : index
151+
// First `vector.transfer_read` from the generic Op - loop invariant scalar load.
152+
// CHECK: vector.transfer_read %[[INPUT_1]][%[[EXTRACTED_0_IDX_0]], %[[SCALAR]]]
153+
// CHECK-SAME: tensor<1x20xi32>, vector<i32>
154+
// The following `tensor.extract` from the generic Op s a contiguous load (all Ops used
155+
// for address calculation also satisfy the required conditions).
156+
// CHECK: vector.transfer_read %[[INPUT_2]][%{{.*}}, %{{.*}}, %{{.*}} {in_bounds = [true, true]} : tensor<257x24xf32>, vector<1x4xf32>
157+
158+
// Make sure that non-linear arithmetic operations (e.g. arith.maxsi) are allowed when calculating indices for load operations. Contiguous load.
159+
func.func @vectorize_nd_tensor_extract_with_maxsi_contiguous(%arg0: tensor<80x16xf32>, %extracted_slice : tensor<1x4xf32>) -> tensor<1x4xf32> {
160+
%c16 = arith.constant 16 : index
161+
%1 = linalg.generic {
162+
indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>],
163+
iterator_types = ["parallel", "parallel"]
164+
} outs(%extracted_slice : tensor<1x4xf32>) {
165+
^bb0(%out: f32):
166+
%2 = linalg.index 0 : index
167+
%3 = linalg.index 1 : index
168+
%4 = arith.maxsi %2, %c16 : index
169+
%extracted = tensor.extract %arg0[%4, %3] : tensor<80x16xf32>
170+
linalg.yield %extracted : f32
171+
} -> tensor<1x4xf32>
172+
return %1 : tensor<1x4xf32>
173+
}
174+
175+
// CHECK-LABEL: func.func @vectorize_nd_tensor_extract_with_maxsi_contiguous(
176+
// CHECK-SAME: %[[VAL_0:.*]]: tensor<80x16xf32>,
177+
// CHECK-SAME: %[[VAL_1:.*]]: tensor<1x4xf32>) -> tensor<1x4xf32> {
178+
// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index
179+
// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 0.000000e+00 : f32
180+
181+
// CHECK-DAG: %[[CST_0:.+]] = arith.constant dense<[0, 1, 2, 3]> : vector<4xindex>
182+
// CHECK-DAG: %[[CST_1:.+]] = arith.constant dense<16> : vector<4x1xindex>
183+
// CHECK-DAG: %[[IDX0:.+]] = vector.extract %[[CST_1]][0, 0] : index from vector<4x1xindex>
184+
// CHECK-DAG: %[[IDX1:.+]] = vector.extract %[[CST_0]][0] : index from vector<4xindex>
185+
186+
// CHECK: %[[VAL_8:.*]] = vector.transfer_read %[[VAL_0]]{{\[}}%[[IDX0]], %[[IDX1]]], %[[VAL_5]] {in_bounds = [true, true]} : tensor<80x16xf32>, vector<1x4xf32>
187+
// CHECK: %[[VAL_9:.*]] = vector.transfer_write %[[VAL_8]], %[[VAL_1]]{{\[}}%[[VAL_4]], %[[VAL_4]]] {in_bounds = [true, true]} : vector<1x4xf32>, tensor<1x4xf32>
188+
// CHECK: return %[[VAL_9]] : tensor<1x4xf32>
189+
// CHECK: }
190+
191+
// -----
192+
193+
//===----------------------------------------------------------------------===//
194+
// Gather load
195+
//===----------------------------------------------------------------------===//
196+
197+
#map0 = affine_map<(d0, d1, d2, d3) -> (d0, d2)>
198+
#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
199+
func.func @vectorize_1d_tensor_extract(%arg0: tensor<3xf32>, %arg1: tensor<4x3xi32>, %arg2: tensor<4x7x3x2xf32>) -> tensor<4x7x3x2xf32> {
200+
%1 = linalg.generic {
201+
indexing_maps = [#map0, #map1],
202+
iterator_types = ["parallel", "parallel", "parallel", "parallel"]
203+
} ins(%arg1 : tensor<4x3xi32>) outs(%arg2 : tensor<4x7x3x2xf32>) {
204+
^bb0(%arg3: i32, %arg4: f32):
205+
%2 = arith.index_cast %arg3 : i32 to index
206+
%3 = tensor.extract %arg0[%2] : tensor<3xf32>
207+
linalg.yield %3 : f32
208+
} -> tensor<4x7x3x2xf32>
209+
return %1 : tensor<4x7x3x2xf32>
210+
}
211+
// CHECK-LABEL: func.func @vectorize_1d_tensor_extract
212+
// CHECK-SAME: %[[ARG0:.*]]: tensor<3xf32>
213+
// CHECK-SAME: %[[ARG1:.*]]: tensor<4x3xi32>
214+
// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
215+
// CHECK-DAG: %[[MASK:.*]] = arith.constant dense<true> : vector<4x7x3x2xi1>
216+
// CHECK-DAG: %[[PASSTHRU:.*]] = arith.constant dense<0.000000e+00> : vector<4x7x3x2xf32>
217+
// CHECK: %[[V0:.*]] = vector.transfer_read %[[ARG1]]
218+
// CHECK: %[[CAST:.*]] = arith.index_cast %[[V0]]
219+
// CHECK: %[[BROADCAST:.*]] = vector.broadcast %[[CAST]]
220+
// CHECK: %[[INDICES:.*]] = vector.transpose %[[BROADCAST]]
221+
// CHECK: %[[GATHER:.*]] = vector.gather %[[ARG0]][%[[C0]]] [%[[INDICES]]], %[[MASK]], %[[PASSTHRU]]
222+
// CHECK: vector.transfer_write %[[GATHER]]
223+
224+
// -----
225+
115226
#map0 = affine_map<(d0, d1, d2, d3) -> (d0, d2)>
116227
#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
117228
#map2 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
@@ -159,7 +270,7 @@ func.func @vectorize_nd_tensor_extract_load_1d_column_vector_using_gather_load(%
159270
%c0 = arith.constant 0 : index
160271
%0 = tensor.empty() : tensor<8x1xf32>
161272
%1 = linalg.generic {
162-
indexing_maps = [#map],
273+
indexing_maps = [#map],
163274
iterator_types = ["parallel", "parallel"]
164275
} outs(%0 : tensor<8x1xf32>) {
165276
^bb0(%arg5: f32):
@@ -303,78 +414,6 @@ func.func @vectorize_nd_tensor_extract_contiguous_and_gather(%arg0: tensor<6xf32
303414
// CHECK: %[[VAL_14:.*]] = vector.transfer_write %[[VAL_13]], %[[VAL_8]]{{\[}}%[[VAL_2]]] {in_bounds = [true]} : vector<5xf32>, tensor<5xf32>
304415
// CHECK: return %[[VAL_14]] : tensor<5xf32>
305416

306-
// -----
307-
308-
// The vectorizer converts `affine.apply` so that the subsequent Ops can be vectorised based on the converted ops. Contiguous load.
309-
func.func @vectorize_nd_tensor_extract_with_affine_apply_contiguous(%6: tensor<80x16xf32>, %arg0: index, %extracted_slice : tensor<1x4xf32>) -> tensor<1x4xf32> {
310-
%c79 = arith.constant 79 : index
311-
%1 = linalg.generic {
312-
indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>],
313-
iterator_types = ["parallel", "parallel"]
314-
} outs(%extracted_slice : tensor<1x4xf32>) {
315-
^bb0(%out: f32):
316-
%2 = linalg.index 1 : index
317-
%3 = affine.apply affine_map<(d0, d1) -> (d0 + d1)>(%2, %arg0)
318-
%extracted = tensor.extract %6[%c79, %3] : tensor<80x16xf32>
319-
linalg.yield %extracted : f32
320-
} -> tensor<1x4xf32>
321-
return %1 : tensor<1x4xf32>
322-
}
323-
324-
// CHECK-LABEL: func.func @vectorize_nd_tensor_extract_with_affine_apply_contiguous(
325-
// CHECK-SAME: %[[VAL_0:.*]]: tensor<80x16xf32>,
326-
// CHECK-SAME: %[[VAL_1:.*]]: index,
327-
// CHECK-SAME: %[[VAL_2:.*]]: tensor<1x4xf32>) -> tensor<1x4xf32> {
328-
// CHECK-DAG: %[[VAL_3:.*]] = arith.constant dense<[0, 1, 2, 3]> : vector<4xindex>
329-
// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 0.000000e+00 : f32
330-
// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0 : index
331-
// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 79 : index
332-
// CHECK: %[[VAL_8:.*]] = vector.broadcast %[[VAL_1]] : index to vector<4xindex>
333-
// CHECK: %[[VAL_9:.*]] = arith.addi %[[VAL_8]], %[[VAL_3]] : vector<4xindex>
334-
// CHECK: %[[VAL_10:.*]] = vector.extract %[[VAL_9]][0] : index from vector<4xindex>
335-
// CHECK: %[[VAL_11:.*]] = vector.transfer_read %[[VAL_0]]{{\[}}%[[VAL_7]], %[[VAL_10]]], %[[VAL_5]] {in_bounds = [true, true]} : tensor<80x16xf32>, vector<1x4xf32>
336-
// CHECK: %[[VAL_12:.*]] = vector.transfer_write %[[VAL_11]], %[[VAL_2]]{{\[}}%[[VAL_6]], %[[VAL_6]]] {in_bounds = [true, true]} : vector<1x4xf32>, tensor<1x4xf32>
337-
// CHECK: return %[[VAL_12]] : tensor<1x4xf32>
338-
// CHECK: }
339-
340-
// -----
341-
342-
func.func @vectorize_nd_tensor_extract_with_tensor_extract(%input_1: tensor<1x20xi32>, %input_2: tensor<257x24xf32>, %arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index) -> tensor<1x1x4xf32> {
343-
%c0 = arith.constant 0 : index
344-
%c256 = arith.constant 256 : index
345-
%output = tensor.empty() : tensor<1x1x4xf32>
346-
%1 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} outs(%output : tensor<1x1x4xf32>) {
347-
^bb0(%out: f32):
348-
%13 = linalg.index 0 : index
349-
%14 = affine.apply affine_map<(d0, d1, d2) -> (d0 + d1 + d2)>(%arg0, %13, %arg2)
350-
%15 = linalg.index 2 : index
351-
%16 = linalg.index 1 : index
352-
%17 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 * 24 + d2 + d3)>(%arg1, %16, %15, %arg3)
353-
%extracted_0 = tensor.extract %input_1[%c0, %14] : tensor<1x20xi32>
354-
%18 = arith.index_cast %extracted_0 : i32 to index
355-
%19 = arith.maxsi %18, %c0 : index
356-
%20 = arith.minsi %19, %c256 : index
357-
%extracted_1 = tensor.extract %input_2[%20, %17] : tensor<257x24xf32>
358-
linalg.yield %extracted_1 : f32
359-
} -> tensor<1x1x4xf32>
360-
return %1 : tensor<1x1x4xf32>
361-
}
362-
363-
// CHECK-LABEL: func.func @vectorize_nd_tensor_extract_with_tensor_extract(
364-
// CHECK-SAME: %[[INPUT_1:.*]]: tensor<1x20xi32>,
365-
// CHECK-SAME: %[[INPUT_2:.*]]: tensor<257x24xf32>,
366-
// CHECK-SAME: %[[INPUT_3:.*]]: index, %[[INPUT_4:.*]]: index, %[[INPUT_5:.*]]: index,
367-
// CHECK: %[[EXTRACTED_0_IDX_0:.*]] = arith.constant 0 : index
368-
// CHECK: %[[SCALAR:.*]] = arith.addi %[[INPUT_3]], %[[INPUT_5]] : index
369-
// First `vector.transfer_read` from the generic Op - loop invariant scalar load.
370-
// CHECK: vector.transfer_read %[[INPUT_1]][%[[EXTRACTED_0_IDX_0]], %[[SCALAR]]]
371-
// CHECK-SAME: tensor<1x20xi32>, vector<i32>
372-
// The following `tensor.extract` from the generic Op s a contiguous load (all Ops used
373-
// for address calculation also satisfy the required conditions).
374-
// CHECK: vector.transfer_read %[[INPUT_2]][%{{.*}}, %{{.*}}, %{{.*}} {in_bounds = [true, true]} : tensor<257x24xf32>, vector<1x4xf32>
375-
376-
// -----
377-
378417
// The vectorizer converts `affine.apply` so that the subsequent Ops can be vectorised based on the converted ops. Gather load.
379418
func.func @vectorize_nd_tensor_extract_with_affine_apply_gather(%6: tensor<80x16xf32>, %arg0: index, %extracted_slice : tensor<1x4xf32>) -> tensor<1x4xf32> {
380419
%c16 = arith.constant 16 : index
@@ -410,8 +449,6 @@ func.func @vectorize_nd_tensor_extract_with_affine_apply_gather(%6: tensor<80x16
410449
// CHECK: return %[[VAL_14]] : tensor<1x4xf32>
411450
// CHECK: }
412451

413-
// -----
414-
415452
// Make sure that non-linear arithmetic operations (e.g. arith.maxsi) are allowed when calculating indices for load operations. Gather load.
416453
func.func @vectorize_nd_tensor_extract_with_maxsi_gather(%arg0: tensor<80x16xf32>, %extracted_slice : tensor<1x4xf32>) -> tensor<1x4xf32> {
417454
%c79 = arith.constant 79 : index
@@ -445,41 +482,6 @@ func.func @vectorize_nd_tensor_extract_with_maxsi_gather(%arg0: tensor<80x16xf32
445482

446483
// -----
447484

448-
// Make sure that non-linear arithmetic operations (e.g. arith.maxsi) are allowed when calculating indices for load operations. Contiguous load.
449-
func.func @vectorize_nd_tensor_extract_with_maxsi_contiguous(%arg0: tensor<80x16xf32>, %extracted_slice : tensor<1x4xf32>) -> tensor<1x4xf32> {
450-
%c16 = arith.constant 16 : index
451-
%1 = linalg.generic {
452-
indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>],
453-
iterator_types = ["parallel", "parallel"]
454-
} outs(%extracted_slice : tensor<1x4xf32>) {
455-
^bb0(%out: f32):
456-
%2 = linalg.index 0 : index
457-
%3 = linalg.index 1 : index
458-
%4 = arith.maxsi %2, %c16 : index
459-
%extracted = tensor.extract %arg0[%4, %3] : tensor<80x16xf32>
460-
linalg.yield %extracted : f32
461-
} -> tensor<1x4xf32>
462-
return %1 : tensor<1x4xf32>
463-
}
464-
465-
// CHECK-LABEL: func.func @vectorize_nd_tensor_extract_with_maxsi_contiguous(
466-
// CHECK-SAME: %[[VAL_0:.*]]: tensor<80x16xf32>,
467-
// CHECK-SAME: %[[VAL_1:.*]]: tensor<1x4xf32>) -> tensor<1x4xf32> {
468-
// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index
469-
// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 0.000000e+00 : f32
470-
471-
// CHECK-DAG: %[[CST_0:.+]] = arith.constant dense<[0, 1, 2, 3]> : vector<4xindex>
472-
// CHECK-DAG: %[[CST_1:.+]] = arith.constant dense<16> : vector<4x1xindex>
473-
// CHECK-DAG: %[[IDX0:.+]] = vector.extract %[[CST_1]][0, 0] : index from vector<4x1xindex>
474-
// CHECK-DAG: %[[IDX1:.+]] = vector.extract %[[CST_0]][0] : index from vector<4xindex>
475-
476-
// CHECK: %[[VAL_8:.*]] = vector.transfer_read %[[VAL_0]]{{\[}}%[[IDX0]], %[[IDX1]]], %[[VAL_5]] {in_bounds = [true, true]} : tensor<80x16xf32>, vector<1x4xf32>
477-
// CHECK: %[[VAL_9:.*]] = vector.transfer_write %[[VAL_8]], %[[VAL_1]]{{\[}}%[[VAL_4]], %[[VAL_4]]] {in_bounds = [true, true]} : vector<1x4xf32>, tensor<1x4xf32>
478-
// CHECK: return %[[VAL_9]] : tensor<1x4xf32>
479-
// CHECK: }
480-
481-
// -----
482-
483485
// The vectorizer assumes it's a gather load whenever using a block argument to calculate an index.
484486
#map = affine_map<(d0) -> (d0)>
485487
func.func @vectorize_nd_tensor_extract_block_arg(%arg0: tensor<5x6xf32>, %arg1: tensor<5xindex>) -> tensor<5xf32> {

0 commit comments

Comments
 (0)