@@ -438,6 +438,24 @@ func.func @contiguous_inner_most_dim_non_zero_idx_in_bounds(%arg0: memref<16x1xf
438
438
// CHECK: %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<8x1xf32> to vector<8xf32>
439
439
// CHECK: vector.transfer_write %[[SC]], %[[SV]]{{\[}}%[[IDX]]] {in_bounds = [true]} : vector<8xf32>, memref<16xf32, strided<[1]>>
440
440
441
+ // Same as the top example within this split, but with the outer vector
442
+ // dim scalable. Note that this example only makes sense when "8 = [8]" (i.e.
443
+ // vscale = 1). This is assumed via the `in_bounds` attribute.
444
+
445
+ // TODO: Add a similar test for xfer_write
446
+
447
+ func.func @contiguous_inner_most_non_zero_idx_in_bounds_scalable (%arg0: memref <16 x1 xf32 >, %arg1: vector <[8 ]x1 xf32 >, %i: index ) {
448
+ vector.transfer_write %arg1 , %arg0 [%i , %i ] {in_bounds = [true , true ]} : vector <[8 ]x1 xf32 >, memref <16 x1 xf32 >
449
+ return
450
+ }
451
+ // CHECK-LABEL: func.func @contiguous_inner_most_non_zero_idx_in_bounds_scalable(
452
+ // CHECK-SAME: %[[MEM:.*]]: memref<16x1xf32>,
453
+ // CHECK-SAME: %[[VEC:.*]]: vector<[8]x1xf32>
454
+ // CHECK-SAME: %[[IDX:.*]]: index) {
455
+ // CHECK: %[[SV:.*]] = memref.subview %[[MEM]][0, 0] [16, 1] [1, 1] : memref<16x1xf32> to memref<16xf32, strided<[1]>>
456
+ // CHECK: %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<[8]x1xf32> to vector<[8]xf32>
457
+ // CHECK: vector.transfer_write %[[SC]], %[[SV]]{{\[}}%[[IDX]]] {in_bounds = [true]} : vector<[8]xf32>, memref<16xf32, strided<[1]>>
458
+
441
459
// The index to be dropped is unknown and "out of bounds" - not safe to
442
460
// collapse.
443
461
func.func @negative_contiguous_inner_most_dim_non_zero_idx_out_of_bounds (%arg0: memref <16 x1 xf32 >, %arg1: vector <8 x1 xf32 >, %i: index ) {
@@ -451,6 +469,86 @@ func.func @negative_contiguous_inner_most_dim_non_zero_idx_out_of_bounds(%arg0:
451
469
452
470
// -----
453
471
472
+ // Verify that the transformation does work even when the input is a "subview"
473
+
474
+ func.func @contiguous_inner_most_dim_with_subview (%A: memref <1000 x1 xf32 >, %i:index , %ii:index , %vec: vector <4 x1 xf32 >) {
475
+ %c0 = arith.constant 0 : index
476
+ %cst = arith.constant 0.0 : f32
477
+ %0 = memref.subview %A [%i , 0 ] [40 , 1 ] [1 , 1 ] : memref <1000 x1 xf32 > to memref <40 x1 xf32 , strided <[1 , 1 ], offset : ?>>
478
+ vector.transfer_write %vec , %0 [%ii , %c0 ] {in_bounds = [true , true ]} : vector <4 x1 xf32 >, memref <40 x1 xf32 , strided <[1 , 1 ], offset : ?>>
479
+ return
480
+ }
481
+
482
+ // CHECK-LABEL: func.func @contiguous_inner_most_dim_with_subview(
483
+ // CHECK-SAME: %[[MEM:.*]]: memref<1000x1xf32>,
484
+ // CHECK-SAME: %[[IDX_1:.*]]: index, %[[IDX_2:.*]]: index,
485
+ // CHECK-SAME: %[[VEC:.*]]: vector<4x1xf32>) {
486
+ // CHECK: %[[SV_1:.*]] = memref.subview %[[MEM]]{{\[}}%[[IDX_1]], 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>>
487
+ // CHECK: %[[SV_2:.*]] = memref.subview %[[SV_1]][0, 0] [40, 1] [1, 1] : memref<40x1xf32, strided<[1, 1], offset: ?>> to memref<40xf32, strided<[1], offset: ?>>
488
+ // CHECK: %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<4x1xf32> to vector<4xf32>
489
+ // CHECK: vector.transfer_write %[[SC]], %[[SV_2]]{{\[}}%[[IDX_2]]] {in_bounds = [true]} : vector<4xf32>, memref<40xf32, strided<[1], offset: ?>>
490
+
491
+ // Same as the top example within this split, but with the outer vector
492
+ // dim scalable. Note that this example only makes sense when "4 = [4]" (i.e.
493
+ // vscale = 1). This is assumed via the `in_bounds` attribute.
494
+
495
+ func.func @contiguous_inner_most_dim_with_subview_scalable_inner_dim (%A: memref <1000 x1 xf32 >, %i:index , %ii:index , %vec: vector <[4 ]x1 xf32 >) {
496
+ %c0 = arith.constant 0 : index
497
+ %cst = arith.constant 0.0 : f32
498
+ %0 = memref.subview %A [%i , 0 ] [40 , 1 ] [1 , 1 ] : memref <1000 x1 xf32 > to memref <40 x1 xf32 , strided <[1 , 1 ], offset : ?>>
499
+ vector.transfer_write %vec , %0 [%ii , %c0 ] {in_bounds = [true , true ]} : vector <[4 ]x1 xf32 >, memref <40 x1 xf32 , strided <[1 , 1 ], offset : ?>>
500
+ return
501
+ }
502
+
503
+ // CHECK-LABEL: func.func @contiguous_inner_most_dim_with_subview_scalable_inner_dim
504
+ // CHECK-SAME: %[[MEM:.*]]: memref<1000x1xf32>,
505
+ // CHECK-SAME: %[[IDX_1:.*]]: index, %[[IDX_2:.*]]: index,
506
+ // CHECK-SAME: %[[VEC:.*]]: vector<[4]x1xf32>) {
507
+ // CHECK: %[[SV_1:.*]] = memref.subview %[[MEM]]{{\[}}%[[IDX_1]], 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>>
508
+ // CHECK: %[[SV_2:.*]] = memref.subview %[[SV_1]][0, 0] [40, 1] [1, 1] : memref<40x1xf32, strided<[1, 1], offset: ?>> to memref<40xf32, strided<[1], offset: ?>>
509
+ // CHECK: %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<[4]x1xf32> to vector<[4]xf32>
510
+ // CHECK: vector.transfer_write %[[SC]], %[[SV_2]]{{\[}}%[[IDX_2]]] {in_bounds = [true]} : vector<[4]xf32>, memref<40xf32, strided<[1], offset: ?>>
511
+
512
+ // -----
513
+
514
+ func.func @contiguous_inner_most_dim_with_subview_2d (%A: memref <1000 x1 x1 xf32 >, %i:index , %ii:index , %vec: vector <4 x1 x1 xf32 >) {
515
+ %c0 = arith.constant 0 : index
516
+ %cst = arith.constant 0.0 : f32
517
+ %0 = memref.subview %A [%i , 0 , 0 ] [40 , 1 , 1 ] [1 , 1 , 1 ] : memref <1000 x1 x1 xf32 > to memref <40 x1 x1 xf32 , strided <[1 , 1 , 1 ], offset : ?>>
518
+ vector.transfer_write %vec , %0 [%ii , %c0 , %c0 ] {in_bounds = [true , true , true ]} : vector <4 x1 x1 xf32 >, memref <40 x1 x1 xf32 , strided <[1 , 1 , 1 ], offset : ?>>
519
+ return
520
+ }
521
+ // CHECK-LABEL: func.func @contiguous_inner_most_dim_with_subview_2d(
522
+ // CHECK-SAME: %[[MEM:.*]]: memref<1000x1x1xf32>,
523
+ // CHECK-SAME: %[[IDX_1:.*]]: index, %[[IDX_2:.*]]: index,
524
+ // CHECK-SAME: %[[VEC:.*]]: vector<4x1x1xf32>) {
525
+ // CHECK: %[[SV_1:.*]] = memref.subview %[[MEM]]{{\[}}%[[IDX_1]], 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>
526
+ // CHECK: %[[SV_2:.*]] = memref.subview %[[SV_1]][0, 0, 0] [40, 1, 1] [1, 1, 1] : memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>> to memref<40xf32, strided<[1], offset: ?>>
527
+ // CHECK: %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<4x1x1xf32> to vector<4xf32>
528
+ // CHECK: vector.transfer_write %[[SC]], %[[SV_2]]{{\[}}%[[IDX_2]]] {in_bounds = [true]} : vector<4xf32>, memref<40xf32, strided<[1], offset: ?>>
529
+
530
+ // Same as the top example within this split, but with the outer vector
531
+ // dim scalable. Note that this example only makes sense when "4 = [4]" (i.e.
532
+ // vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
533
+
534
+ func.func @contiguous_inner_most_dim_with_subview_2d_scalable (%A: memref <1000 x1 x1 xf32 >, %i:index , %ii:index , %vec: vector <[4 ]x1 x1 xf32 >) {
535
+ %c0 = arith.constant 0 : index
536
+ %cst = arith.constant 0.0 : f32
537
+ %0 = memref.subview %A [%i , 0 , 0 ] [40 , 1 , 1 ] [1 , 1 , 1 ] : memref <1000 x1 x1 xf32 > to memref <40 x1 x1 xf32 , strided <[1 , 1 , 1 ], offset : ?>>
538
+ vector.transfer_write %vec , %0 [%ii , %c0 , %c0 ] {in_bounds = [true , true , true ]} : vector <[4 ]x1 x1 xf32 >, memref <40 x1 x1 xf32 , strided <[1 , 1 , 1 ], offset : ?>>
539
+ return
540
+ }
541
+ // CHECK-LABEL: func.func @contiguous_inner_most_dim_with_subview_2d_scalable
542
+ // CHECK-SAME: %[[MEM:.*]]: memref<1000x1x1xf32>,
543
+ // CHECK-SAME: %[[IDX_1:.*]]: index, %[[IDX_2:.*]]: index,
544
+ // CHECK-SAME: %[[VEC:.*]]: vector<[4]x1x1xf32>) {
545
+ // CHECK: %[[SV_1:.*]] = memref.subview %[[MEM]]{{\[}}%[[IDX_1]], 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>
546
+ // CHECK: %[[SV_2:.*]] = memref.subview %[[SV_1]][0, 0, 0] [40, 1, 1] [1, 1, 1] : memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>> to memref<40xf32, strided<[1], offset: ?>>
547
+ // CHECK: %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<[4]x1x1xf32> to vector<[4]xf32>
548
+ // CHECK: vector.transfer_write %[[SC]], %[[SV_2]]{{\[}}%[[IDX_2]]] {in_bounds = [true]} : vector<[4]xf32>, memref<40xf32, strided<[1], offset: ?>>
549
+
550
+ // -----
551
+
454
552
func.func @drop_inner_most_dim (%arg0: memref <1 x512 x16 x1 xf32 , strided <[8192 , 16 , 1 , 1 ], offset : ?>>, %arg1: vector <1 x16 x16 x1 xf32 >, %arg2: index ) {
455
553
%c0 = arith.constant 0 : index
456
554
vector.transfer_write %arg1 , %arg0 [%c0 , %arg2 , %c0 , %c0 ]
@@ -471,6 +569,30 @@ func.func @drop_inner_most_dim(%arg0: memref<1x512x16x1xf32, strided<[8192, 16,
471
569
472
570
// -----
473
571
572
+ // NOTE: This is an out-of-bounds access.
573
+
574
+ func.func @negative_non_unit_inner_vec_dim (%arg0: memref <4 x1 xf32 >, %vec: vector <4 x8 xf32 >) {
575
+ %c0 = arith.constant 0 : index
576
+ vector.transfer_write %vec , %arg0 [%c0 , %c0 ] : vector <4 x8 xf32 >, memref <4 x1 xf32 >
577
+ return
578
+ }
579
+ // CHECK: func.func @negative_non_unit_inner_vec_dim
580
+ // CHECK-NOT: memref.subview
581
+ // CHECK: vector.transfer_write
582
+
583
+ // -----
584
+
585
+ func.func @negative_non_unit_inner_memref_dim (%arg0: memref <4 x8 xf32 >, %vec: vector <4 x1 xf32 >) {
586
+ %c0 = arith.constant 0 : index
587
+ vector.transfer_write %vec , %arg0 [%c0 , %c0 ] : vector <4 x1 xf32 >, memref <4 x8 xf32 >
588
+ return
589
+ }
590
+ // CHECK: func.func @negative_non_unit_inner_memref_dim
591
+ // CHECK-NOT: memref.subview
592
+ // CHECK: vector.transfer_write
593
+
594
+ // -----
595
+
474
596
func.func @non_unit_strides (%arg0: memref <512 x16 x1 xf32 , strided <[8192 , 16 , 4 ], offset : ?>>, %arg1: vector <16 x16 x1 xf32 >, %arg2: index ) {
475
597
%c0 = arith.constant 0 : index
476
598
vector.transfer_write %arg1 , %arg0 [%arg2 , %c0 , %c0 ]
0 commit comments