@@ -392,6 +392,101 @@ func.func @negative_contiguous_inner_most_dim_non_zero_idxs(%arg0: memref<16x1xf
392
392
// CHECK-NOT: memref.shape_cast
393
393
// CHECK: vector.transfer_write
394
394
395
+ // Same as the top example within this split, but with the outer vector
396
+ // dim scalable. Note that this example only makes sense when "8 = [8]" (i.e.
397
+ // vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
398
+
399
+ func.func @contiguous_inner_most_non_zero_idxs_scalable (%arg0: memref <16 x1 xf32 >, %arg1: vector <[8 ]x1 xf32 >, %i: index ) {
400
+ %c0 = arith.constant 0 : index
401
+ vector.transfer_write %arg1 , %arg0 [%i , %c0 ] {in_bounds = [true , true ]} : vector <[8 ]x1 xf32 >, memref <16 x1 xf32 >
402
+ return
403
+ }
404
+ // CHECK-LABEL: func.func @contiguous_inner_most_non_zero_idxs_scalable(
405
+ // CHECK-SAME: %[[MEM:.*]]: memref<16x1xf32>,
406
+ // CHECK-SAME: %[[VEC:.*]]: vector<[8]x1xf32>,
407
+ // CHECK-SAME: %[[IDX:.*]]: index) {
408
+ // CHECK: %[[SV:.*]] = memref.subview %[[MEM]][0, 0] [16, 1] [1, 1] : memref<16x1xf32> to memref<16xf32, strided<[1]>>
409
+ // CHECK: %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<[8]x1xf32> to vector<[8]xf32>
410
+ // CHECK: vector.transfer_write %[[SC]], %[[SV]]{{\[}}%[[IDX]]] {in_bounds = [true]} : vector<[8]xf32>, memref<16xf32, strided<[1]>>
411
+
412
+ // -----
413
+
414
+ func.func @contiguous_inner_most_dim_with_subview (%A: memref <1000 x1 xf32 >, %i:index , %ii:index , %vec: vector <4 x1 xf32 >) {
415
+ %c0 = arith.constant 0 : index
416
+ %cst = arith.constant 0.0 : f32
417
+ %0 = memref.subview %A [%i , 0 ] [40 , 1 ] [1 , 1 ] : memref <1000 x1 xf32 > to memref <40 x1 xf32 , strided <[1 , 1 ], offset : ?>>
418
+ vector.transfer_write %vec , %0 [%ii , %c0 ] {in_bounds = [true , true ]} : vector <4 x1 xf32 >, memref <40 x1 xf32 , strided <[1 , 1 ], offset : ?>>
419
+ return
420
+ }
421
+
422
+ // CHECK-LABEL: func.func @contiguous_inner_most_dim_with_subview(
423
+ // CHECK-SAME: %[[MEM:.*]]: memref<1000x1xf32>,
424
+ // CHECK-SAME: %[[IDX_1:.*]]: index, %[[IDX_2:.*]]: index,
425
+ // CHECK-SAME: %[[VEC:.*]]: vector<4x1xf32>) {
426
+ // CHECK: %[[SV_1:.*]] = memref.subview %[[MEM]]{{\[}}%[[IDX_1]], 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>>
427
+ // CHECK: %[[SV_2:.*]] = memref.subview %[[SV_1]][0, 0] [40, 1] [1, 1] : memref<40x1xf32, strided<[1, 1], offset: ?>> to memref<40xf32, strided<[1], offset: ?>>
428
+ // CHECK: %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<4x1xf32> to vector<4xf32>
429
+ // CHECK: vector.transfer_write %[[SC]], %[[SV_2]]{{\[}}%[[IDX_2]]] {in_bounds = [true]} : vector<4xf32>, memref<40xf32, strided<[1], offset: ?>>
430
+
431
+ // Same as the top example within this split, but with the outer vector
432
+ // dim scalable. Note that this example only makes sense when "4 = [4]" (i.e.
433
+ // vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
434
+
435
+ func.func @contiguous_inner_most_dim_with_subview_scalable (%A: memref <1000 x1 xf32 >, %i:index , %ii:index , %vec: vector <[4 ]x1 xf32 >) {
436
+ %c0 = arith.constant 0 : index
437
+ %cst = arith.constant 0.0 : f32
438
+ %0 = memref.subview %A [%i , 0 ] [40 , 1 ] [1 , 1 ] : memref <1000 x1 xf32 > to memref <40 x1 xf32 , strided <[1 , 1 ], offset : ?>>
439
+ vector.transfer_write %vec , %0 [%ii , %c0 ] {in_bounds = [true , true ]} : vector <[4 ]x1 xf32 >, memref <40 x1 xf32 , strided <[1 , 1 ], offset : ?>>
440
+ return
441
+ }
442
+
443
+ // CHECK-LABEL: func.func @contiguous_inner_most_dim_with_subview_scalable
444
+ // CHECK-SAME: %[[MEM:.*]]: memref<1000x1xf32>,
445
+ // CHECK-SAME: %[[IDX_1:.*]]: index, %[[IDX_2:.*]]: index,
446
+ // CHECK-SAME: %[[VEC:.*]]: vector<[4]x1xf32>) {
447
+ // CHECK: %[[SV_1:.*]] = memref.subview %[[MEM]]{{\[}}%[[IDX_1]], 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>>
448
+ // CHECK: %[[SV_2:.*]] = memref.subview %[[SV_1]][0, 0] [40, 1] [1, 1] : memref<40x1xf32, strided<[1, 1], offset: ?>> to memref<40xf32, strided<[1], offset: ?>>
449
+ // CHECK: %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<[4]x1xf32> to vector<[4]xf32>
450
+ // CHECK: vector.transfer_write %[[SC]], %[[SV_2]]{{\[}}%[[IDX_2]]] {in_bounds = [true]} : vector<[4]xf32>, memref<40xf32, strided<[1], offset: ?>>
451
+
452
+ // -----
453
+
454
+ func.func @contiguous_inner_most_dim_with_subview_2d (%A: memref <1000 x1 x1 xf32 >, %i:index , %ii:index , %vec: vector <4 x1 x1 xf32 >) {
455
+ %c0 = arith.constant 0 : index
456
+ %cst = arith.constant 0.0 : f32
457
+ %0 = memref.subview %A [%i , 0 , 0 ] [40 , 1 , 1 ] [1 , 1 , 1 ] : memref <1000 x1 x1 xf32 > to memref <40 x1 x1 xf32 , strided <[1 , 1 , 1 ], offset : ?>>
458
+ vector.transfer_write %vec , %0 [%ii , %c0 , %c0 ] {in_bounds = [true , true , true ]} : vector <4 x1 x1 xf32 >, memref <40 x1 x1 xf32 , strided <[1 , 1 , 1 ], offset : ?>>
459
+ return
460
+ }
461
+ // CHECK-LABEL: func.func @contiguous_inner_most_dim_with_subview_2d(
462
+ // CHECK-SAME: %[[MEM:.*]]: memref<1000x1x1xf32>,
463
+ // CHECK-SAME: %[[IDX_1:.*]]: index, %[[IDX_2:.*]]: index,
464
+ // CHECK-SAME: %[[VEC:.*]]: vector<4x1x1xf32>) {
465
+ // CHECK: %[[SV_1:.*]] = memref.subview %[[MEM]]{{\[}}%[[IDX_1]], 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>
466
+ // CHECK: %[[SV_2:.*]] = memref.subview %[[SV_1]][0, 0, 0] [40, 1, 1] [1, 1, 1] : memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>> to memref<40xf32, strided<[1], offset: ?>>
467
+ // CHECK: %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<4x1x1xf32> to vector<4xf32>
468
+ // CHECK: vector.transfer_write %[[SC]], %[[SV_2]]{{\[}}%[[IDX_2]]] {in_bounds = [true]} : vector<4xf32>, memref<40xf32, strided<[1], offset: ?>>
469
+
470
+ // Same as the top example within this split, but with the outer vector
471
+ // dim scalable. Note that this example only makes sense when "4 = [4]" (i.e.
472
+ // vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
473
+
474
+ func.func @contiguous_inner_most_dim_with_subview_2d_scalable (%A: memref <1000 x1 x1 xf32 >, %i:index , %ii:index , %vec: vector <[4 ]x1 x1 xf32 >) {
475
+ %c0 = arith.constant 0 : index
476
+ %cst = arith.constant 0.0 : f32
477
+ %0 = memref.subview %A [%i , 0 , 0 ] [40 , 1 , 1 ] [1 , 1 , 1 ] : memref <1000 x1 x1 xf32 > to memref <40 x1 x1 xf32 , strided <[1 , 1 , 1 ], offset : ?>>
478
+ vector.transfer_write %vec , %0 [%ii , %c0 , %c0 ] {in_bounds = [true , true , true ]} : vector <[4 ]x1 x1 xf32 >, memref <40 x1 x1 xf32 , strided <[1 , 1 , 1 ], offset : ?>>
479
+ return
480
+ }
481
+ // CHECK-LABEL: func.func @contiguous_inner_most_dim_with_subview_2d_scalable
482
+ // CHECK-SAME: %[[MEM:.*]]: memref<1000x1x1xf32>,
483
+ // CHECK-SAME: %[[IDX_1:.*]]: index, %[[IDX_2:.*]]: index,
484
+ // CHECK-SAME: %[[VEC:.*]]: vector<[4]x1x1xf32>) {
485
+ // CHECK: %[[SV_1:.*]] = memref.subview %[[MEM]]{{\[}}%[[IDX_1]], 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>
486
+ // CHECK: %[[SV_2:.*]] = memref.subview %[[SV_1]][0, 0, 0] [40, 1, 1] [1, 1, 1] : memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>> to memref<40xf32, strided<[1], offset: ?>>
487
+ // CHECK: %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<[4]x1x1xf32> to vector<[4]xf32>
488
+ // CHECK: vector.transfer_write %[[SC]], %[[SV_2]]{{\[}}%[[IDX_2]]] {in_bounds = [true]} : vector<[4]xf32>, memref<40xf32, strided<[1], offset: ?>>
489
+
395
490
// -----
396
491
397
492
func.func @drop_inner_most_dim (%arg0: memref <1 x512 x16 x1 xf32 , strided <[8192 , 16 , 1 , 1 ], offset : ?>>, %arg1: vector <1 x16 x16 x1 xf32 >, %arg2: index ) {
@@ -414,6 +509,30 @@ func.func @drop_inner_most_dim(%arg0: memref<1x512x16x1xf32, strided<[8192, 16,
414
509
415
510
// -----
416
511
512
+ // NOTE: This is an out-of-bounds access.
513
+
514
+ func.func @negative_non_unit_inner_vec_dim (%arg0: memref <4 x1 xf32 >, %vec: vector <4 x8 xf32 >) {
515
+ %c0 = arith.constant 0 : index
516
+ vector.transfer_write %vec , %arg0 [%c0 , %c0 ] : vector <4 x8 xf32 >, memref <4 x1 xf32 >
517
+ return
518
+ }
519
+ // CHECK: func.func @negative_non_unit_inner_vec_dim
520
+ // CHECK-NOT: memref.subview
521
+ // CHECK: vector.transfer_write
522
+
523
+ // -----
524
+
525
+ func.func @negative_non_unit_inner_memref_dim (%arg0: memref <4 x8 xf32 >, %vec: vector <4 x1 xf32 >) {
526
+ %c0 = arith.constant 0 : index
527
+ vector.transfer_write %vec , %arg0 [%c0 , %c0 ] : vector <4 x1 xf32 >, memref <4 x8 xf32 >
528
+ return
529
+ }
530
+ // CHECK: func.func @negative_non_unit_inner_memref_dim
531
+ // CHECK-NOT: memref.subview
532
+ // CHECK: vector.transfer_write
533
+
534
+ // -----
535
+
417
536
func.func @non_unit_strides (%arg0: memref <512 x16 x1 xf32 , strided <[8192 , 16 , 4 ], offset : ?>>, %arg1: vector <16 x16 x1 xf32 >, %arg2: index ) {
418
537
%c0 = arith.constant 0 : index
419
538
vector.transfer_write %arg1 , %arg0 [%arg2 , %c0 , %c0 ]
0 commit comments