@@ -36,6 +36,81 @@ module attributes {transform.with_named_sequence} {
36
36
37
37
// -----
38
38
39
+ func.func @vectorize_dynamic_identity_with_constant (%arg0: tensor <?xf32 >,
40
+ %arg1: tensor <?xf32 >,
41
+ %arg2: tensor <?xf32 >) -> tensor <?xf32 > {
42
+ %c4 = arith.constant 4 : index
43
+ %0 = linalg.generic { index ing_maps = [affine_map <(d0 ) -> (d0 )>,
44
+ affine_map <(d0 ) -> (d0 )>,
45
+ affine_map <(d0 ) -> (d0 )>],
46
+ iterator_types = [" parallel" ] }
47
+ ins (%arg0 , %arg1 : tensor <?xf32 >, tensor <?xf32 >)
48
+ outs (%arg2 : tensor <?xf32 >) {
49
+ ^bb (%in0: f32 , %in1: f32 , %out: f32 ) :
50
+ %0 = arith.addf %in0 , %in1 : f32
51
+ linalg.yield %0 : f32
52
+ } -> tensor <?xf32 >
53
+ return %0 : tensor <?xf32 >
54
+ }
55
+
56
+ // CHECK-LABEL: @vectorize_dynamic_identity_with_constant
57
+ // CHECK: %[[VAL_3:.*]] = arith.constant 0 : index
58
+ // CHECK: %[[VAL_4:.*]] = tensor.dim %{{.*}}, %[[VAL_3]] : tensor<?xf32>
59
+ // CHECK: %[[VAL_7:.*]] = vector.create_mask %[[VAL_4]] : vector<4xi1>
60
+ // CHECK: %[[VAL_8:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor<?xf32>, vector<4xf32> } : vector<4xi1> -> vector<4xf32>
61
+ // CHECK: %[[VAL_10:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor<?xf32>, vector<4xf32> } : vector<4xi1> -> vector<4xf32>
62
+ // CHECK: %[[VAL_12:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor<?xf32>, vector<4xf32> } : vector<4xi1> -> vector<4xf32>
63
+ // CHECK: %[[VAL_13:.*]] = arith.addf %[[VAL_8]], %[[VAL_10]] : vector<4xf32>
64
+ // CHECK: %[[VAL_14:.*]] = vector.mask %[[VAL_7]] { vector.transfer_write %{{.*}} {in_bounds = [true]} : vector<4xf32>, tensor<?xf32> } : vector<4xi1> -> tensor<?xf32>
65
+
66
+ module attributes {transform.with_named_sequence } {
67
+ transform.named_sequence @__transform_main (%arg1: !transform.any_op {transform.readonly }) {
68
+ %0 = transform.structured.match ops {[" linalg.generic" ]} in %arg1 : (!transform.any_op ) -> !transform.any_op
69
+ %size = transform.structured.match ops {[" arith.constant" ]} in %arg1 : (!transform.any_op ) -> !transform.any_op
70
+ transform.structured.vectorize %0 vector_sizes [%size ] : !transform.any_op , !transform.any_op
71
+ transform.yield
72
+ }
73
+ }
74
+
75
+ // -----
76
+
77
+ func.func @vectorize_dynamic_identity_with_param (%arg0: tensor <?xf32 >,
78
+ %arg1: tensor <?xf32 >,
79
+ %arg2: tensor <?xf32 >) -> tensor <?xf32 > {
80
+ %0 = linalg.generic { index ing_maps = [affine_map <(d0 ) -> (d0 )>,
81
+ affine_map <(d0 ) -> (d0 )>,
82
+ affine_map <(d0 ) -> (d0 )>],
83
+ iterator_types = [" parallel" ] }
84
+ ins (%arg0 , %arg1 : tensor <?xf32 >, tensor <?xf32 >)
85
+ outs (%arg2 : tensor <?xf32 >) {
86
+ ^bb (%in0: f32 , %in1: f32 , %out: f32 ) :
87
+ %0 = arith.addf %in0 , %in1 : f32
88
+ linalg.yield %0 : f32
89
+ } -> tensor <?xf32 >
90
+ return %0 : tensor <?xf32 >
91
+ }
92
+
93
+ // CHECK-LABEL: @vectorize_dynamic_identity_with_param
94
+ // CHECK: %[[VAL_3:.*]] = arith.constant 0 : index
95
+ // CHECK: %[[VAL_4:.*]] = tensor.dim %{{.*}}, %[[VAL_3]] : tensor<?xf32>
96
+ // CHECK: %[[VAL_7:.*]] = vector.create_mask %[[VAL_4]] : vector<4xi1>
97
+ // CHECK: %[[VAL_8:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor<?xf32>, vector<4xf32> } : vector<4xi1> -> vector<4xf32>
98
+ // CHECK: %[[VAL_10:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor<?xf32>, vector<4xf32> } : vector<4xi1> -> vector<4xf32>
99
+ // CHECK: %[[VAL_12:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor<?xf32>, vector<4xf32> } : vector<4xi1> -> vector<4xf32>
100
+ // CHECK: %[[VAL_13:.*]] = arith.addf %[[VAL_8]], %[[VAL_10]] : vector<4xf32>
101
+ // CHECK: %[[VAL_14:.*]] = vector.mask %[[VAL_7]] { vector.transfer_write %{{.*}} {in_bounds = [true]} : vector<4xf32>, tensor<?xf32> } : vector<4xi1> -> tensor<?xf32>
102
+
103
+ module attributes {transform.with_named_sequence } {
104
+ transform.named_sequence @__transform_main (%arg1: !transform.any_op {transform.readonly }) {
105
+ %0 = transform.structured.match ops {[" linalg.generic" ]} in %arg1 : (!transform.any_op ) -> !transform.any_op
106
+ %vector_size = transform.param.constant 4 : i64 -> !transform.param <i64 >
107
+ transform.structured.vectorize %0 vector_sizes [%vector_size ] : !transform.any_op , !transform.param <i64 >
108
+ transform.yield
109
+ }
110
+ }
111
+
112
+ // -----
113
+
39
114
func.func @vectorize_dynamic_1d_broadcast (%arg0: tensor <?xf32 >,
40
115
%arg1: tensor <?xf32 >,
41
116
%arg2: tensor <?xf32 >) -> tensor <?xf32 > {
@@ -231,6 +306,49 @@ module attributes {transform.with_named_sequence} {
231
306
232
307
// -----
233
308
309
+ func.func @vectorize_dynamic_transpose_reduction_with_params (%arg0: tensor <?x?x?xf32 >,
310
+ %arg1: tensor <?x?xf32 >) -> tensor <?x?xf32 > {
311
+ %0 = linalg.generic { index ing_maps = [affine_map <(d0 , d1 , d2 ) -> (d0 , d1 , d2 )>,
312
+ affine_map <(d0 , d1 , d2 ) -> (d2 , d1 )>],
313
+ iterator_types = [" reduction" , " parallel" , " parallel" ] }
314
+ ins (%arg0 : tensor <?x?x?xf32 >)
315
+ outs (%arg1 : tensor <?x?xf32 >) {
316
+ ^bb (%in: f32 , %out: f32 ) :
317
+ %0 = arith.addf %in , %out : f32
318
+ linalg.yield %0 : f32
319
+ } -> tensor <?x?xf32 >
320
+ return %0 : tensor <?x?xf32 >
321
+ }
322
+
323
+ module attributes {transform.with_named_sequence } {
324
+ transform.named_sequence @__transform_main (%arg1: !transform.any_op {transform.readonly }) {
325
+ %0 = transform.structured.match ops {[" linalg.generic" ]} in %arg1 : (!transform.any_op ) -> !transform.any_op
326
+ %vector_size_0 = transform.param.constant 4 : i64 -> !transform.param <i64 >
327
+ %vector_size_2 = transform.param.constant 16 : i64 -> !transform.param <i64 >
328
+ transform.structured.vectorize %0 vector_sizes
329
+ [%vector_size_0 , 8 , %vector_size_2 ] : !transform.any_op , !transform.param <i64 >, !transform.param <i64 >
330
+ transform.yield
331
+ }
332
+ }
333
+
334
+ // CHECK-LABEL: @vectorize_dynamic_transpose_reduction_with_params(
335
+ // CHECK-SAME: %[[VAL_0:.*]]: tensor<?x?x?xf32>,
336
+ // CHECK-SAME: %[[VAL_1:.*]]: tensor<?x?xf32>) -> tensor<?x?xf32> {
337
+ // CHECK: %[[VAL_2:.*]] = arith.constant 0 : index
338
+ // CHECK: %[[VAL_3:.*]] = tensor.dim %[[VAL_0]], %[[VAL_2]] : tensor<?x?x?xf32>
339
+ // CHECK: %[[VAL_4:.*]] = arith.constant 1 : index
340
+ // CHECK: %[[VAL_5:.*]] = tensor.dim %[[VAL_0]], %[[VAL_4]] : tensor<?x?x?xf32>
341
+ // CHECK: %[[VAL_6:.*]] = arith.constant 2 : index
342
+ // CHECK: %[[VAL_7:.*]] = tensor.dim %[[VAL_0]], %[[VAL_6]] : tensor<?x?x?xf32>
343
+ // CHECK: %[[VAL_10:.*]] = vector.create_mask %[[VAL_3]], %[[VAL_5]], %[[VAL_7]] : vector<4x8x16xi1>
344
+ // CHECK: %[[VAL_11:.*]] = vector.mask %[[VAL_10]] { vector.transfer_read %[[VAL_0]]{{.*}} {in_bounds = [true, true, true]} : tensor<?x?x?xf32>, vector<4x8x16xf32> } : vector<4x8x16xi1> -> vector<4x8x16xf32>
345
+ // CHECK: %[[VAL_13:.*]] = vector.create_mask %[[VAL_7]], %[[VAL_5]] : vector<16x8xi1>
346
+ // CHECK: %[[VAL_14:.*]] = vector.mask %[[VAL_13]] { vector.transfer_read %[[VAL_1]]{{.*}} {in_bounds = [true, true], permutation_map = #{{.*}}} : tensor<?x?xf32>, vector<8x16xf32> } : vector<16x8xi1> -> vector<8x16xf32>
347
+ // CHECK: %[[VAL_15:.*]] = vector.mask %[[VAL_10]] { vector.multi_reduction <add>, %[[VAL_11]], %[[VAL_14]] [0] : vector<4x8x16xf32> to vector<8x16xf32> } : vector<4x8x16xi1> -> vector<8x16xf32>
348
+ // CHECK: %[[VAL_17:.*]] = vector.mask %[[VAL_13]] { vector.transfer_write %[[VAL_15]], %{{.*}} {in_bounds = [true, true], permutation_map = #{{.*}}} : vector<8x16xf32>, tensor<?x?xf32> } : vector<16x8xi1> -> tensor<?x?xf32>
349
+
350
+ // -----
351
+
234
352
func.func @vectorize_partial_dynamic_identity (%arg0: tensor <8 x?xf32 >,
235
353
%arg1: tensor <8 x?xf32 >,
236
354
%arg2: tensor <8 x?xf32 >) -> tensor <8 x?xf32 > {
0 commit comments