@@ -573,9 +573,9 @@ func.func @transpose_store_scalable_via_za(%vec: vector<2x[4]xf32>, %dest: memre
573
573
574
574
// -----
575
575
576
- // CHECK: @transpose_store_scalable_via_za_masked(
577
- // CHECK-SAME: %[[A:[a-z0-9]+]]: index,
578
- // CHECK-SAME: %[[B:[a-z0-9]+]]: index)
576
+ // CHECK-LABEL : @transpose_store_scalable_via_za_masked(
577
+ // CHECK-SAME: %[[A:[a-z0-9]+]]: index,
578
+ // CHECK-SAME: %[[B:[a-z0-9]+]]: index)
579
579
func.func @transpose_store_scalable_via_za_masked (%vec: vector <2 x[4 ]xf32 >, %dest: memref <?x?xf32 >, %a: index , %b: index ) {
580
580
// CHECK: %[[C2:.*]] = arith.constant 2 : index
581
581
// CHECK: %[[MIN:.*]] = index.mins %[[B]], %[[C2]]
@@ -590,11 +590,11 @@ func.func @transpose_store_scalable_via_za_masked(%vec: vector<2x[4]xf32>, %dest
590
590
591
591
// -----
592
592
593
- // CHECK: @transpose_store_scalable_via_za_multi_tile(
594
- // CHECK-SAME: %[[VEC:.*]]: vector<8x[4]xf32>
595
- // CHECK-SAME: %[[DEST:.*]]: memref<?x?xf32>,
596
- // CHECK-SAME: %[[I:.*]]: index,
597
- // CHECK-SAME: %[[J:.*]]: index)
593
+ // CHECK-LABEL : @transpose_store_scalable_via_za_multi_tile(
594
+ // CHECK-SAME: %[[VEC:.*]]: vector<8x[4]xf32>
595
+ // CHECK-SAME: %[[DEST:.*]]: memref<?x?xf32>,
596
+ // CHECK-SAME: %[[I:.*]]: index,
597
+ // CHECK-SAME: %[[J:.*]]: index)
598
598
func.func @transpose_store_scalable_via_za_multi_tile (%vec: vector <8 x[4 ]xf32 >, %dest: memref <?x?xf32 >, %i: index , %j: index ) {
599
599
// CHECK: %[[C4:.*]] = arith.constant 4 : index
600
600
// CHECK: %[[VSCALE:.*]] = vector.vscale
@@ -615,3 +615,34 @@ func.func @transpose_store_scalable_via_za_multi_tile(%vec: vector<8x[4]xf32>, %
615
615
vector.transfer_write %tr , %dest [%i , %j ] {in_bounds = [true , true ]} : vector <[4 ]x8 xf32 >, memref <?x?xf32 >
616
616
return
617
617
}
618
+
619
+ // -----
620
+
621
+ // CHECK-LABEL: @transpose_store_scalable_via_za_multi_tile_with_scalable_extracts
622
+ func.func @transpose_store_scalable_via_za_multi_tile_with_scalable_extracts (%vec: vector <2 x[8 ]xf32 >, %dest: memref <?x?xf32 >, %i: index , %j: index ) {
623
+ // <check extracts from lower 4 x vscale of %vec>
624
+ // CHECK: vector.scalable.extract
625
+ // CHECK: %[[ROW_2_LOWER:.*]] = vector.scalable.extract %{{.*}}[0] : vector<[4]xf32> from vector<[8]xf32>
626
+ // CHECK: %[[TILE_0:.*]] = vector.insert %[[ROW_2_LOWER]], %{{.*}}[1] : vector<[4]xf32> into vector<[4]x[4]xf32>
627
+ // CHECK: vector.transfer_write %[[TILE_0]], %{{.*}}[%[[I:.[a-z0-9]+]], %[[J:[a-z0-9]+]]]
628
+
629
+ // <check extracts from upper 4 x vscale of %vec>
630
+ // CHECK: vector.scalable.extract
631
+ // CHECK: %[[ROW_2_UPPER:.*]] = vector.scalable.extract %{{.*}}[4] : vector<[4]xf32> from vector<[8]xf32>
632
+ // CHECK: %[[TILE_0:.*]] = vector.insert %[[ROW_2_UPPER]], %{{.*}}[1] : vector<[4]xf32> into vector<[4]x[4]xf32>
633
+ // CHECK: %[[I_OFFSET:.*]] = arith.addi %c4_vscale, %[[I]] : index
634
+ // CHECK: vector.transfer_write %[[TILE_0]], %{{.*}}[%[[I_OFFSET]], %[[J]]]
635
+ %tr = vector.transpose %vec , [1 , 0 ] : vector <2 x[8 ]xf32 > to vector <[8 ]x2 xf32 >
636
+ vector.transfer_write %tr , %dest [%i , %j ] {in_bounds = [true , true ]} : vector <[8 ]x2 xf32 >, memref <?x?xf32 >
637
+ return
638
+ }
639
+
640
+ // -----
641
+
642
+ // CHECK-LABEL: @negative_transpose_store_scalable_via_za__bad_source_shape
643
+ // CHECK-NOT: arm_sme.get_tile
644
+ func.func @negative_transpose_store_scalable_via_za__bad_source_shape (%vec: vector <2 x[7 ]xf32 >, %dest: memref <?x?xf32 >, %i: index , %j: index ) {
645
+ %tr = vector.transpose %vec , [1 , 0 ] : vector <2 x[7 ]xf32 > to vector <[7 ]x2 xf32 >
646
+ vector.transfer_write %tr , %dest [%i , %j ] {in_bounds = [true , true ]} : vector <[7 ]x2 xf32 >, memref <?x?xf32 >
647
+ return
648
+ }
0 commit comments