@@ -96,6 +96,17 @@ define void @strided_vpstore_nxv8i8(<vscale x 8 x i8> %val, ptr %ptr, i32 signex
96
96
ret void
97
97
}
98
98
99
+ define void @strided_vpstore_nxv8i8_unit_stride (<vscale x 8 x i8 > %val , ptr %ptr , <vscale x 8 x i1 > %m , i32 zeroext %evl ) {
100
+ ; CHECK-LABEL: strided_vpstore_nxv8i8_unit_stride:
101
+ ; CHECK: # %bb.0:
102
+ ; CHECK-NEXT: li a2, 1
103
+ ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
104
+ ; CHECK-NEXT: vsse8.v v8, (a0), a2, v0.t
105
+ ; CHECK-NEXT: ret
106
+ call void @llvm.experimental.vp.strided.store.nxv8i8.p0.i32 (<vscale x 8 x i8 > %val , ptr %ptr , i32 1 , <vscale x 8 x i1 > %m , i32 %evl )
107
+ ret void
108
+ }
109
+
99
110
declare void @llvm.experimental.vp.strided.store.nxv1i16.p0.i32 (<vscale x 1 x i16 >, ptr , i32 , <vscale x 1 x i1 >, i32 )
100
111
101
112
define void @strided_vpstore_nxv1i16 (<vscale x 1 x i16 > %val , ptr %ptr , i32 signext %strided , <vscale x 1 x i1 > %m , i32 zeroext %evl ) {
@@ -132,6 +143,17 @@ define void @strided_vpstore_nxv4i16(<vscale x 4 x i16> %val, ptr %ptr, i32 sign
132
143
ret void
133
144
}
134
145
146
+ define void @strided_vpstore_nxv4i16_unit_stride (<vscale x 4 x i16 > %val , ptr %ptr , <vscale x 4 x i1 > %m , i32 zeroext %evl ) {
147
+ ; CHECK-LABEL: strided_vpstore_nxv4i16_unit_stride:
148
+ ; CHECK: # %bb.0:
149
+ ; CHECK-NEXT: li a2, 2
150
+ ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
151
+ ; CHECK-NEXT: vsse16.v v8, (a0), a2, v0.t
152
+ ; CHECK-NEXT: ret
153
+ call void @llvm.experimental.vp.strided.store.nxv4i16.p0.i32 (<vscale x 4 x i16 > %val , ptr %ptr , i32 2 , <vscale x 4 x i1 > %m , i32 %evl )
154
+ ret void
155
+ }
156
+
135
157
declare void @llvm.experimental.vp.strided.store.nxv8i16.p0.i32 (<vscale x 8 x i16 >, ptr , i32 , <vscale x 8 x i1 >, i32 )
136
158
137
159
define void @strided_vpstore_nxv8i16 (<vscale x 8 x i16 > %val , ptr %ptr , i32 signext %strided , <vscale x 8 x i1 > %m , i32 zeroext %evl ) {
@@ -180,6 +202,17 @@ define void @strided_vpstore_nxv4i32(<vscale x 4 x i32> %val, ptr %ptr, i32 sign
180
202
ret void
181
203
}
182
204
205
+ define void @strided_vpstore_nxv4i32_unit_stride (<vscale x 4 x i32 > %val , ptr %ptr , <vscale x 4 x i1 > %m , i32 zeroext %evl ) {
206
+ ; CHECK-LABEL: strided_vpstore_nxv4i32_unit_stride:
207
+ ; CHECK: # %bb.0:
208
+ ; CHECK-NEXT: li a2, 4
209
+ ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
210
+ ; CHECK-NEXT: vsse32.v v8, (a0), a2, v0.t
211
+ ; CHECK-NEXT: ret
212
+ call void @llvm.experimental.vp.strided.store.nxv4i32.p0.i32 (<vscale x 4 x i32 > %val , ptr %ptr , i32 4 , <vscale x 4 x i1 > %m , i32 %evl )
213
+ ret void
214
+ }
215
+
183
216
declare void @llvm.experimental.vp.strided.store.nxv8i32.p0.i32 (<vscale x 8 x i32 >, ptr , i32 , <vscale x 8 x i1 >, i32 )
184
217
185
218
define void @strided_vpstore_nxv8i32 (<vscale x 8 x i32 > %val , ptr %ptr , i32 signext %strided , <vscale x 8 x i1 > %m , i32 zeroext %evl ) {
@@ -204,6 +237,17 @@ define void @strided_vpstore_nxv1i64(<vscale x 1 x i64> %val, ptr %ptr, i32 sign
204
237
ret void
205
238
}
206
239
240
+ define void @strided_vpstore_nxv1i64_unit_stride (<vscale x 1 x i64 > %val , ptr %ptr , <vscale x 1 x i1 > %m , i32 zeroext %evl ) {
241
+ ; CHECK-LABEL: strided_vpstore_nxv1i64_unit_stride:
242
+ ; CHECK: # %bb.0:
243
+ ; CHECK-NEXT: li a2, 8
244
+ ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
245
+ ; CHECK-NEXT: vsse64.v v8, (a0), a2, v0.t
246
+ ; CHECK-NEXT: ret
247
+ call void @llvm.experimental.vp.strided.store.nxv1i64.p0.i32 (<vscale x 1 x i64 > %val , ptr %ptr , i32 8 , <vscale x 1 x i1 > %m , i32 %evl )
248
+ ret void
249
+ }
250
+
207
251
declare void @llvm.experimental.vp.strided.store.nxv2i64.p0.i32 (<vscale x 2 x i64 >, ptr , i32 , <vscale x 2 x i1 >, i32 )
208
252
209
253
define void @strided_vpstore_nxv2i64 (<vscale x 2 x i64 > %val , ptr %ptr , i32 signext %strided , <vscale x 2 x i1 > %m , i32 zeroext %evl ) {
@@ -276,6 +320,17 @@ define void @strided_vpstore_nxv4f16(<vscale x 4 x half> %val, ptr %ptr, i32 sig
276
320
ret void
277
321
}
278
322
323
+ define void @strided_vpstore_nxv4f16_unit_stride (<vscale x 4 x half > %val , ptr %ptr , <vscale x 4 x i1 > %m , i32 zeroext %evl ) {
324
+ ; CHECK-LABEL: strided_vpstore_nxv4f16_unit_stride:
325
+ ; CHECK: # %bb.0:
326
+ ; CHECK-NEXT: li a2, 2
327
+ ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
328
+ ; CHECK-NEXT: vsse16.v v8, (a0), a2, v0.t
329
+ ; CHECK-NEXT: ret
330
+ call void @llvm.experimental.vp.strided.store.nxv4f16.p0.i32 (<vscale x 4 x half > %val , ptr %ptr , i32 2 , <vscale x 4 x i1 > %m , i32 %evl )
331
+ ret void
332
+ }
333
+
279
334
declare void @llvm.experimental.vp.strided.store.nxv8f16.p0.i32 (<vscale x 8 x half >, ptr , i32 , <vscale x 8 x i1 >, i32 )
280
335
281
336
define void @strided_vpstore_nxv8f16 (<vscale x 8 x half > %val , ptr %ptr , i32 signext %strided , <vscale x 8 x i1 > %m , i32 zeroext %evl ) {
@@ -324,6 +379,17 @@ define void @strided_vpstore_nxv4f32(<vscale x 4 x float> %val, ptr %ptr, i32 si
324
379
ret void
325
380
}
326
381
382
+ define void @strided_vpstore_nxv4f32_unit_stride (<vscale x 4 x float > %val , ptr %ptr , <vscale x 4 x i1 > %m , i32 zeroext %evl ) {
383
+ ; CHECK-LABEL: strided_vpstore_nxv4f32_unit_stride:
384
+ ; CHECK: # %bb.0:
385
+ ; CHECK-NEXT: li a2, 4
386
+ ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
387
+ ; CHECK-NEXT: vsse32.v v8, (a0), a2, v0.t
388
+ ; CHECK-NEXT: ret
389
+ call void @llvm.experimental.vp.strided.store.nxv4f32.p0.i32 (<vscale x 4 x float > %val , ptr %ptr , i32 4 , <vscale x 4 x i1 > %m , i32 %evl )
390
+ ret void
391
+ }
392
+
327
393
declare void @llvm.experimental.vp.strided.store.nxv8f32.p0.i32 (<vscale x 8 x float >, ptr , i32 , <vscale x 8 x i1 >, i32 )
328
394
329
395
define void @strided_vpstore_nxv8f32 (<vscale x 8 x float > %val , ptr %ptr , i32 signext %strided , <vscale x 8 x i1 > %m , i32 zeroext %evl ) {
@@ -348,6 +414,17 @@ define void @strided_vpstore_nxv1f64(<vscale x 1 x double> %val, ptr %ptr, i32 s
348
414
ret void
349
415
}
350
416
417
+ define void @strided_vpstore_nxv1f64_unit_stride (<vscale x 1 x double > %val , ptr %ptr , <vscale x 1 x i1 > %m , i32 zeroext %evl ) {
418
+ ; CHECK-LABEL: strided_vpstore_nxv1f64_unit_stride:
419
+ ; CHECK: # %bb.0:
420
+ ; CHECK-NEXT: li a2, 8
421
+ ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
422
+ ; CHECK-NEXT: vsse64.v v8, (a0), a2, v0.t
423
+ ; CHECK-NEXT: ret
424
+ call void @llvm.experimental.vp.strided.store.nxv1f64.p0.i32 (<vscale x 1 x double > %val , ptr %ptr , i32 8 , <vscale x 1 x i1 > %m , i32 %evl )
425
+ ret void
426
+ }
427
+
351
428
declare void @llvm.experimental.vp.strided.store.nxv2f64.p0.i32 (<vscale x 2 x double >, ptr , i32 , <vscale x 2 x i1 >, i32 )
352
429
353
430
define void @strided_vpstore_nxv2f64 (<vscale x 2 x double > %val , ptr %ptr , i32 signext %strided , <vscale x 2 x i1 > %m , i32 zeroext %evl ) {
@@ -427,10 +504,10 @@ define void @strided_store_nxv16f64(<vscale x 16 x double> %v, ptr %ptr, i32 sig
427
504
; CHECK: # %bb.0:
428
505
; CHECK-NEXT: csrr a3, vlenb
429
506
; CHECK-NEXT: mv a4, a2
430
- ; CHECK-NEXT: bltu a2, a3, .LBB34_2
507
+ ; CHECK-NEXT: bltu a2, a3, .LBB41_2
431
508
; CHECK-NEXT: # %bb.1:
432
509
; CHECK-NEXT: mv a4, a3
433
- ; CHECK-NEXT: .LBB34_2 :
510
+ ; CHECK-NEXT: .LBB41_2 :
434
511
; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma
435
512
; CHECK-NEXT: vsse64.v v8, (a0), a1, v0.t
436
513
; CHECK-NEXT: sub a5, a2, a3
@@ -454,10 +531,10 @@ define void @strided_store_nxv16f64_allones_mask(<vscale x 16 x double> %v, ptr
454
531
; CHECK: # %bb.0:
455
532
; CHECK-NEXT: csrr a3, vlenb
456
533
; CHECK-NEXT: mv a4, a2
457
- ; CHECK-NEXT: bltu a2, a3, .LBB35_2
534
+ ; CHECK-NEXT: bltu a2, a3, .LBB42_2
458
535
; CHECK-NEXT: # %bb.1:
459
536
; CHECK-NEXT: mv a4, a3
460
- ; CHECK-NEXT: .LBB35_2 :
537
+ ; CHECK-NEXT: .LBB42_2 :
461
538
; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma
462
539
; CHECK-NEXT: vsse64.v v8, (a0), a1
463
540
; CHECK-NEXT: sub a3, a2, a3
@@ -485,15 +562,15 @@ define void @strided_store_nxv17f64(<vscale x 17 x double> %v, ptr %ptr, i32 sig
485
562
; CHECK-NEXT: slli a6, a4, 1
486
563
; CHECK-NEXT: vmv1r.v v24, v0
487
564
; CHECK-NEXT: mv a5, a3
488
- ; CHECK-NEXT: bltu a3, a6, .LBB36_2
565
+ ; CHECK-NEXT: bltu a3, a6, .LBB43_2
489
566
; CHECK-NEXT: # %bb.1:
490
567
; CHECK-NEXT: mv a5, a6
491
- ; CHECK-NEXT: .LBB36_2 :
568
+ ; CHECK-NEXT: .LBB43_2 :
492
569
; CHECK-NEXT: mv a7, a5
493
- ; CHECK-NEXT: bltu a5, a4, .LBB36_4
570
+ ; CHECK-NEXT: bltu a5, a4, .LBB43_4
494
571
; CHECK-NEXT: # %bb.3:
495
572
; CHECK-NEXT: mv a7, a4
496
- ; CHECK-NEXT: .LBB36_4 :
573
+ ; CHECK-NEXT: .LBB43_4 :
497
574
; CHECK-NEXT: addi sp, sp, -16
498
575
; CHECK-NEXT: .cfi_def_cfa_offset 16
499
576
; CHECK-NEXT: csrr t0, vlenb
@@ -521,10 +598,10 @@ define void @strided_store_nxv17f64(<vscale x 17 x double> %v, ptr %ptr, i32 sig
521
598
; CHECK-NEXT: addi a3, a3, -1
522
599
; CHECK-NEXT: and a0, a3, a0
523
600
; CHECK-NEXT: vsse64.v v16, (a7), a2, v0.t
524
- ; CHECK-NEXT: bltu a0, a4, .LBB36_6
601
+ ; CHECK-NEXT: bltu a0, a4, .LBB43_6
525
602
; CHECK-NEXT: # %bb.5:
526
603
; CHECK-NEXT: mv a0, a4
527
- ; CHECK-NEXT: .LBB36_6 :
604
+ ; CHECK-NEXT: .LBB43_6 :
528
605
; CHECK-NEXT: mul a3, a5, a2
529
606
; CHECK-NEXT: add a1, a1, a3
530
607
; CHECK-NEXT: srli a4, a4, 2
0 commit comments