@@ -501,6 +501,67 @@ module attributes {transform.with_named_sequence} {
501
501
502
502
// -----
503
503
504
+ func.func @test_vectorize_dynamic_pack (%arg0: tensor <?x?xf32 >, %arg1: tensor <4 x1 x16 x2 xf32 >) -> tensor <4 x1 x16 x2 xf32 > {
505
+ %pack = tensor.pack %arg0 inner_dims_pos = [1 , 0 ] inner_tiles = [16 , 2 ] into %arg1 : tensor <?x?xf32 > -> tensor <4 x1 x16 x2 xf32 >
506
+ return %pack : tensor <4 x1 x16 x2 xf32 >
507
+ }
508
+ module attributes {transform.with_named_sequence } {
509
+ transform.named_sequence @__transform_main (%arg0: !transform.any_op {transform.readonly }) {
510
+ %0 = transform.structured.match ops {[" tensor.pack" ]} in %arg0 : (!transform.any_op ) -> !transform.any_op
511
+ transform.structured.vectorize %0 vector_sizes [8 , 16 ] : !transform.any_op
512
+ transform.yield
513
+ }
514
+ }
515
+ // CHECK-DAG: %[[cst:.*]] = arith.constant 0.000000e+00 : f32
516
+ // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index
517
+ // CHECK-DAG: %[[c1:.*]] = arith.constant 1 : index
518
+ // CHECK-DAG: %[[d0:.*]] = tensor.dim {{.*}} %[[c0]] : tensor<?x?xf32>
519
+ // CHECK-DAG: %[[d1:.*]] = tensor.dim {{.*}} %[[c1]] : tensor<?x?xf32>
520
+ // CHECK-DAG: %[[empty:.*]] = tensor.empty() : tensor<4x1x16x2xf32>
521
+ // CHECK: %[[mask:.*]] = vector.create_mask %[[d0]], %[[d1]] : vector<8x16xi1>
522
+ // CHECK-DAG: %[[c0_2:.*]] = arith.constant 0 : index
523
+ // CHECK: %[[masked_read:.*]] = vector.mask %[[mask]] {
524
+ // CHECK-SAME: vector.transfer_read %{{.*}}[%[[c0_2]], %[[c0_2]]], %[[cst]]
525
+ // CHECK-SAME: {in_bounds = [true, true]} : tensor<?x?xf32>, vector<8x16xf32>
526
+ // CHECK-SAME: } : vector<8x16xi1> -> vector<8x16xf32>
527
+ // CHECK: %[[shape_cast:.*]] = vector.shape_cast %[[masked_read]] : vector<8x16xf32> to vector<4x2x1x16xf32>
528
+ // CHECK: %[[transpose:.*]] = vector.transpose %[[shape_cast]], [0, 2, 3, 1] : vector<4x2x1x16xf32> to vector<4x1x16x2xf32>
529
+ // CHECK: %[[write:.*]] = vector.transfer_write %[[transpose]], %[[empty]][%[[c0_2]], %[[c0_2]], %[[c0_2]], %[[c0_2]]]
530
+ // CHECK-SAME: {in_bounds = [true, true, true, true]} : vector<4x1x16x2xf32>, tensor<4x1x16x2xf32>
531
+ // CHECK: return %[[write]] : tensor<4x1x16x2xf32>
532
+
533
+ // -----
534
+
535
+ func.func @test_vectorize_pack (%arg0: tensor <32 x8 x16 xf32 >, %arg1: tensor <32 x4 x1 x16 x2 xf32 >) -> tensor <32 x4 x1 x16 x2 xf32 > {
536
+ %pack = tensor.pack %arg0 inner_dims_pos = [2 , 1 ] inner_tiles = [16 , 2 ] into %arg1 : tensor <32 x8 x16 xf32 > -> tensor <32 x4 x1 x16 x2 xf32 >
537
+ return %pack : tensor <32 x4 x1 x16 x2 xf32 >
538
+ }
539
+ module attributes {transform.with_named_sequence } {
540
+ transform.named_sequence @__transform_main (%arg0: !transform.any_op {transform.readonly }) {
541
+ %0 = transform.structured.match ops {[" tensor.pack" ]} in %arg0 : (!transform.any_op ) -> !transform.any_op
542
+ transform.structured.vectorize %0 vector_sizes [32 , 8 , 16 ] : !transform.any_op
543
+ transform.yield
544
+ }
545
+ }
546
+ // CHECK-DAG: %[[cst:.*]] = arith.constant 0.000000e+00 : f32
547
+ // CHECK-DAG: %[[c32:.*]] = arith.constant 32 : index
548
+ // CHECK-DAG: %[[c8:.*]] = arith.constant 8 : index
549
+ // CHECK-DAG: %[[c16:.*]] = arith.constant 16 : index
550
+ // CHECK-DAG: %[[empty:.*]] = tensor.empty() : tensor<32x4x1x16x2xf32>
551
+ // CHECK: %[[mask:.*]] = vector.create_mask %[[c32]], %[[c8]], %[[c16]] : vector<32x8x16xi1>
552
+ // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index
553
+ // CHECK: %[[masked_read:.*]] = vector.mask %[[mask]] {
554
+ // CHECK-SAME: vector.transfer_read %{{.*}}[%[[c0]], %[[c0]], %[[c0]]], %[[cst]]
555
+ // CHECK-SAME: {in_bounds = [true, true, true]} : tensor<32x8x16xf32>, vector<32x8x16xf32>
556
+ // CHECK-SAME: } : vector<32x8x16xi1> -> vector<32x8x16xf32>
557
+ // CHECK: %[[shape_cast:.*]] = vector.shape_cast %[[masked_read]] : vector<32x8x16xf32> to vector<32x4x2x1x16xf32>
558
+ // CHECK: %[[transpose:.*]] = vector.transpose %[[shape_cast]], [0, 1, 3, 4, 2] : vector<32x4x2x1x16xf32> to vector<32x4x1x16x2xf32>
559
+ // CHECK: %[[write:.*]] = vector.transfer_write %[[transpose]], %[[empty]][%[[c0]], %[[c0]], %[[c0]], %[[c0]], %[[c0]]]
560
+ // CHECK-SAME: {in_bounds = [true, true, true, true, true]} : vector<32x4x1x16x2xf32>, tensor<32x4x1x16x2xf32>
561
+ // CHECK: return %[[write]] : tensor<32x4x1x16x2xf32>
562
+
563
+ // -----
564
+
504
565
func.func @matmul (%A: memref <?x?xf32 >, %B: memref <?x?xf32 >, %C: memref <?x?xf32 >) {
505
566
linalg.matmul ins (%A , %B: memref <?x?xf32 >, memref <?x?xf32 >)
506
567
outs (%C: memref <?x?xf32 >)
0 commit comments