@@ -96,6 +96,17 @@ define <8 x i8> @strided_vpload_v8i8(ptr %ptr, i32 signext %stride, <8 x i1> %m,
96
96
ret <8 x i8 > %load
97
97
}
98
98
99
+ define <8 x i8 > @strided_vpload_v8i8_unit_stride (ptr %ptr , <8 x i1 > %m , i32 zeroext %evl ) {
100
+ ; CHECK-LABEL: strided_vpload_v8i8_unit_stride:
101
+ ; CHECK: # %bb.0:
102
+ ; CHECK-NEXT: li a2, 1
103
+ ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
104
+ ; CHECK-NEXT: vlse8.v v8, (a0), a2, v0.t
105
+ ; CHECK-NEXT: ret
106
+ %load = call <8 x i8 > @llvm.experimental.vp.strided.load.v8i8.p0.i32 (ptr %ptr , i32 1 , <8 x i1 > %m , i32 %evl )
107
+ ret <8 x i8 > %load
108
+ }
109
+
99
110
declare <2 x i16 > @llvm.experimental.vp.strided.load.v2i16.p0.i32 (ptr , i32 , <2 x i1 >, i32 )
100
111
101
112
define <2 x i16 > @strided_vpload_v2i16 (ptr %ptr , i32 signext %stride , <2 x i1 > %m , i32 zeroext %evl ) {
@@ -132,6 +143,17 @@ define <8 x i16> @strided_vpload_v8i16(ptr %ptr, i32 signext %stride, <8 x i1> %
132
143
ret <8 x i16 > %load
133
144
}
134
145
146
+ define <8 x i16 > @strided_vpload_v8i16_unit_stride (ptr %ptr , <8 x i1 > %m , i32 zeroext %evl ) {
147
+ ; CHECK-LABEL: strided_vpload_v8i16_unit_stride:
148
+ ; CHECK: # %bb.0:
149
+ ; CHECK-NEXT: li a2, 2
150
+ ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
151
+ ; CHECK-NEXT: vlse16.v v8, (a0), a2, v0.t
152
+ ; CHECK-NEXT: ret
153
+ %load = call <8 x i16 > @llvm.experimental.vp.strided.load.v8i16.p0.i32 (ptr %ptr , i32 2 , <8 x i1 > %m , i32 %evl )
154
+ ret <8 x i16 > %load
155
+ }
156
+
135
157
define <8 x i16 > @strided_vpload_v8i16_allones_mask (ptr %ptr , i32 signext %stride , i32 zeroext %evl ) {
136
158
; CHECK-LABEL: strided_vpload_v8i16_allones_mask:
137
159
; CHECK: # %bb.0:
@@ -168,6 +190,17 @@ define <4 x i32> @strided_vpload_v4i32(ptr %ptr, i32 signext %stride, <4 x i1> %
168
190
ret <4 x i32 > %load
169
191
}
170
192
193
+ define <4 x i32 > @strided_vpload_v4i32_unit_stride (ptr %ptr , <4 x i1 > %m , i32 zeroext %evl ) {
194
+ ; CHECK-LABEL: strided_vpload_v4i32_unit_stride:
195
+ ; CHECK: # %bb.0:
196
+ ; CHECK-NEXT: li a2, 4
197
+ ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
198
+ ; CHECK-NEXT: vlse32.v v8, (a0), a2, v0.t
199
+ ; CHECK-NEXT: ret
200
+ %load = call <4 x i32 > @llvm.experimental.vp.strided.load.v4i32.p0.i32 (ptr %ptr , i32 4 , <4 x i1 > %m , i32 %evl )
201
+ ret <4 x i32 > %load
202
+ }
203
+
171
204
declare <8 x i32 > @llvm.experimental.vp.strided.load.v8i32.p0.i32 (ptr , i32 , <8 x i1 >, i32 )
172
205
173
206
define <8 x i32 > @strided_vpload_v8i32 (ptr %ptr , i32 signext %stride , <8 x i1 > %m , i32 zeroext %evl ) {
@@ -204,6 +237,17 @@ define <2 x i64> @strided_vpload_v2i64(ptr %ptr, i32 signext %stride, <2 x i1> %
204
237
ret <2 x i64 > %load
205
238
}
206
239
240
+ define <2 x i64 > @strided_vpload_v2i64_unit_stride (ptr %ptr , <2 x i1 > %m , i32 zeroext %evl ) {
241
+ ; CHECK-LABEL: strided_vpload_v2i64_unit_stride:
242
+ ; CHECK: # %bb.0:
243
+ ; CHECK-NEXT: li a2, 8
244
+ ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
245
+ ; CHECK-NEXT: vlse64.v v8, (a0), a2, v0.t
246
+ ; CHECK-NEXT: ret
247
+ %load = call <2 x i64 > @llvm.experimental.vp.strided.load.v2i64.p0.i32 (ptr %ptr , i32 8 , <2 x i1 > %m , i32 %evl )
248
+ ret <2 x i64 > %load
249
+ }
250
+
207
251
declare <4 x i64 > @llvm.experimental.vp.strided.load.v4i64.p0.i32 (ptr , i32 , <4 x i1 >, i32 )
208
252
209
253
define <4 x i64 > @strided_vpload_v4i64 (ptr %ptr , i32 signext %stride , <4 x i1 > %m , i32 zeroext %evl ) {
@@ -288,6 +332,17 @@ define <8 x half> @strided_vpload_v8f16(ptr %ptr, i32 signext %stride, <8 x i1>
288
332
ret <8 x half > %load
289
333
}
290
334
335
+ define <8 x half > @strided_vpload_v8f16_unit_stride (ptr %ptr , <8 x i1 > %m , i32 zeroext %evl ) {
336
+ ; CHECK-LABEL: strided_vpload_v8f16_unit_stride:
337
+ ; CHECK: # %bb.0:
338
+ ; CHECK-NEXT: li a2, 2
339
+ ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
340
+ ; CHECK-NEXT: vlse16.v v8, (a0), a2, v0.t
341
+ ; CHECK-NEXT: ret
342
+ %load = call <8 x half > @llvm.experimental.vp.strided.load.v8f16.p0.i32 (ptr %ptr , i32 2 , <8 x i1 > %m , i32 %evl )
343
+ ret <8 x half > %load
344
+ }
345
+
291
346
declare <2 x float > @llvm.experimental.vp.strided.load.v2f32.p0.i32 (ptr , i32 , <2 x i1 >, i32 )
292
347
293
348
define <2 x float > @strided_vpload_v2f32 (ptr %ptr , i32 signext %stride , <2 x i1 > %m , i32 zeroext %evl ) {
@@ -312,6 +367,17 @@ define <4 x float> @strided_vpload_v4f32(ptr %ptr, i32 signext %stride, <4 x i1>
312
367
ret <4 x float > %load
313
368
}
314
369
370
+ define <4 x float > @strided_vpload_v4f32_unit_stride (ptr %ptr , <4 x i1 > %m , i32 zeroext %evl ) {
371
+ ; CHECK-LABEL: strided_vpload_v4f32_unit_stride:
372
+ ; CHECK: # %bb.0:
373
+ ; CHECK-NEXT: li a2, 4
374
+ ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
375
+ ; CHECK-NEXT: vlse32.v v8, (a0), a2, v0.t
376
+ ; CHECK-NEXT: ret
377
+ %load = call <4 x float > @llvm.experimental.vp.strided.load.v4f32.p0.i32 (ptr %ptr , i32 4 , <4 x i1 > %m , i32 %evl )
378
+ ret <4 x float > %load
379
+ }
380
+
315
381
declare <8 x float > @llvm.experimental.vp.strided.load.v8f32.p0.i32 (ptr , i32 , <8 x i1 >, i32 )
316
382
317
383
define <8 x float > @strided_vpload_v8f32 (ptr %ptr , i32 signext %stride , <8 x i1 > %m , i32 zeroext %evl ) {
@@ -348,6 +414,18 @@ define <2 x double> @strided_vpload_v2f64(ptr %ptr, i32 signext %stride, <2 x i1
348
414
ret <2 x double > %load
349
415
}
350
416
417
+ define <2 x double > @strided_vpload_v2f64_unit_stride (ptr %ptr , <2 x i1 > %m , i32 zeroext %evl ) {
418
+ ; CHECK-LABEL: strided_vpload_v2f64_unit_stride:
419
+ ; CHECK: # %bb.0:
420
+ ; CHECK-NEXT: li a2, 8
421
+ ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
422
+ ; CHECK-NEXT: vlse64.v v8, (a0), a2, v0.t
423
+ ; CHECK-NEXT: ret
424
+ %load = call <2 x double > @llvm.experimental.vp.strided.load.v2f64.p0.i32 (ptr %ptr , i32 8 , <2 x i1 > %m , i32 %evl )
425
+ ret <2 x double > %load
426
+ }
427
+
428
+
351
429
declare <4 x double > @llvm.experimental.vp.strided.load.v4f64.p0.i32 (ptr , i32 , <4 x i1 >, i32 )
352
430
353
431
define <4 x double > @strided_vpload_v4f64 (ptr %ptr , i32 signext %stride , <4 x i1 > %m , i32 zeroext %evl ) {
@@ -416,10 +494,10 @@ define <32 x double> @strided_vpload_v32f64(ptr %ptr, i32 signext %stride, <32 x
416
494
; CHECK-NEXT: li a4, 16
417
495
; CHECK-NEXT: vmv1r.v v9, v0
418
496
; CHECK-NEXT: mv a3, a2
419
- ; CHECK-NEXT: bltu a2, a4, .LBB33_2
497
+ ; CHECK-NEXT: bltu a2, a4, .LBB40_2
420
498
; CHECK-NEXT: # %bb.1:
421
499
; CHECK-NEXT: li a3, 16
422
- ; CHECK-NEXT: .LBB33_2 :
500
+ ; CHECK-NEXT: .LBB40_2 :
423
501
; CHECK-NEXT: mul a4, a3, a1
424
502
; CHECK-NEXT: add a4, a0, a4
425
503
; CHECK-NEXT: addi a5, a2, -16
@@ -444,10 +522,10 @@ define <32 x double> @strided_vpload_v32f64_allones_mask(ptr %ptr, i32 signext %
444
522
; CHECK: # %bb.0:
445
523
; CHECK-NEXT: li a4, 16
446
524
; CHECK-NEXT: mv a3, a2
447
- ; CHECK-NEXT: bltu a2, a4, .LBB34_2
525
+ ; CHECK-NEXT: bltu a2, a4, .LBB41_2
448
526
; CHECK-NEXT: # %bb.1:
449
527
; CHECK-NEXT: li a3, 16
450
- ; CHECK-NEXT: .LBB34_2 :
528
+ ; CHECK-NEXT: .LBB41_2 :
451
529
; CHECK-NEXT: mul a4, a3, a1
452
530
; CHECK-NEXT: add a4, a0, a4
453
531
; CHECK-NEXT: addi a5, a2, -16
@@ -474,21 +552,21 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask
474
552
; CHECK-RV32-NEXT: li a5, 32
475
553
; CHECK-RV32-NEXT: vmv1r.v v8, v0
476
554
; CHECK-RV32-NEXT: mv a3, a4
477
- ; CHECK-RV32-NEXT: bltu a4, a5, .LBB35_2
555
+ ; CHECK-RV32-NEXT: bltu a4, a5, .LBB42_2
478
556
; CHECK-RV32-NEXT: # %bb.1:
479
557
; CHECK-RV32-NEXT: li a3, 32
480
- ; CHECK-RV32-NEXT: .LBB35_2 :
558
+ ; CHECK-RV32-NEXT: .LBB42_2 :
481
559
; CHECK-RV32-NEXT: mul a5, a3, a2
482
560
; CHECK-RV32-NEXT: addi a6, a4, -32
483
561
; CHECK-RV32-NEXT: sltu a4, a4, a6
484
562
; CHECK-RV32-NEXT: addi a4, a4, -1
485
563
; CHECK-RV32-NEXT: and a6, a4, a6
486
564
; CHECK-RV32-NEXT: li a4, 16
487
565
; CHECK-RV32-NEXT: add a5, a1, a5
488
- ; CHECK-RV32-NEXT: bltu a6, a4, .LBB35_4
566
+ ; CHECK-RV32-NEXT: bltu a6, a4, .LBB42_4
489
567
; CHECK-RV32-NEXT: # %bb.3:
490
568
; CHECK-RV32-NEXT: li a6, 16
491
- ; CHECK-RV32-NEXT: .LBB35_4 :
569
+ ; CHECK-RV32-NEXT: .LBB42_4 :
492
570
; CHECK-RV32-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
493
571
; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 4
494
572
; CHECK-RV32-NEXT: vsetvli zero, a6, e64, m8, ta, ma
@@ -497,10 +575,10 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask
497
575
; CHECK-RV32-NEXT: sltu a6, a3, a5
498
576
; CHECK-RV32-NEXT: addi a6, a6, -1
499
577
; CHECK-RV32-NEXT: and a5, a6, a5
500
- ; CHECK-RV32-NEXT: bltu a3, a4, .LBB35_6
578
+ ; CHECK-RV32-NEXT: bltu a3, a4, .LBB42_6
501
579
; CHECK-RV32-NEXT: # %bb.5:
502
580
; CHECK-RV32-NEXT: li a3, 16
503
- ; CHECK-RV32-NEXT: .LBB35_6 :
581
+ ; CHECK-RV32-NEXT: .LBB42_6 :
504
582
; CHECK-RV32-NEXT: mul a4, a3, a2
505
583
; CHECK-RV32-NEXT: add a4, a1, a4
506
584
; CHECK-RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
@@ -524,21 +602,21 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask
524
602
; CHECK-RV64-NEXT: li a5, 32
525
603
; CHECK-RV64-NEXT: vmv1r.v v8, v0
526
604
; CHECK-RV64-NEXT: mv a4, a3
527
- ; CHECK-RV64-NEXT: bltu a3, a5, .LBB35_2
605
+ ; CHECK-RV64-NEXT: bltu a3, a5, .LBB42_2
528
606
; CHECK-RV64-NEXT: # %bb.1:
529
607
; CHECK-RV64-NEXT: li a4, 32
530
- ; CHECK-RV64-NEXT: .LBB35_2 :
608
+ ; CHECK-RV64-NEXT: .LBB42_2 :
531
609
; CHECK-RV64-NEXT: mul a5, a4, a2
532
610
; CHECK-RV64-NEXT: addi a6, a3, -32
533
611
; CHECK-RV64-NEXT: sltu a3, a3, a6
534
612
; CHECK-RV64-NEXT: addi a3, a3, -1
535
613
; CHECK-RV64-NEXT: and a6, a3, a6
536
614
; CHECK-RV64-NEXT: li a3, 16
537
615
; CHECK-RV64-NEXT: add a5, a1, a5
538
- ; CHECK-RV64-NEXT: bltu a6, a3, .LBB35_4
616
+ ; CHECK-RV64-NEXT: bltu a6, a3, .LBB42_4
539
617
; CHECK-RV64-NEXT: # %bb.3:
540
618
; CHECK-RV64-NEXT: li a6, 16
541
- ; CHECK-RV64-NEXT: .LBB35_4 :
619
+ ; CHECK-RV64-NEXT: .LBB42_4 :
542
620
; CHECK-RV64-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
543
621
; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 4
544
622
; CHECK-RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma
@@ -547,10 +625,10 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask
547
625
; CHECK-RV64-NEXT: sltu a6, a4, a5
548
626
; CHECK-RV64-NEXT: addi a6, a6, -1
549
627
; CHECK-RV64-NEXT: and a5, a6, a5
550
- ; CHECK-RV64-NEXT: bltu a4, a3, .LBB35_6
628
+ ; CHECK-RV64-NEXT: bltu a4, a3, .LBB42_6
551
629
; CHECK-RV64-NEXT: # %bb.5:
552
630
; CHECK-RV64-NEXT: li a4, 16
553
- ; CHECK-RV64-NEXT: .LBB35_6 :
631
+ ; CHECK-RV64-NEXT: .LBB42_6 :
554
632
; CHECK-RV64-NEXT: mul a3, a4, a2
555
633
; CHECK-RV64-NEXT: add a3, a1, a3
556
634
; CHECK-RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
0 commit comments