@@ -175,21 +175,59 @@ for.cond.cleanup: ; preds = %vector.body
175
175
ret void
176
176
}
177
177
178
+ define void @gather_zero_stride_i32 (ptr noalias nocapture %A , ptr noalias nocapture readonly %B ) {
179
+ ; CHECK-LABEL: gather_zero_stride_i32:
180
+ ; CHECK: # %bb.0: # %entry
181
+ ; CHECK-NEXT: addi a2, a0, 1024
182
+ ; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma
183
+ ; CHECK-NEXT: .LBB4_1: # %vector.body
184
+ ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
185
+ ; CHECK-NEXT: lw a3, 0(a1)
186
+ ; CHECK-NEXT: vle32.v v8, (a0)
187
+ ; CHECK-NEXT: vadd.vx v8, v8, a3
188
+ ; CHECK-NEXT: vse32.v v8, (a0)
189
+ ; CHECK-NEXT: addi a0, a0, 8
190
+ ; CHECK-NEXT: addi a1, a1, 160
191
+ ; CHECK-NEXT: bne a0, a2, .LBB4_1
192
+ ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
193
+ ; CHECK-NEXT: ret
194
+ entry:
195
+ br label %vector.body
196
+
197
+ vector.body: ; preds = %vector.body, %entry
198
+ %index = phi i64 [ 0 , %entry ], [ %index.next , %vector.body ]
199
+ %vec.ind = phi <8 x i64 > [ zeroinitializer , %entry ], [ %vec.ind.next , %vector.body ]
200
+ %i = mul nuw nsw <8 x i64 > %vec.ind , <i64 5 , i64 5 , i64 5 , i64 5 , i64 5 , i64 5 , i64 5 , i64 5 >
201
+ %i1 = getelementptr inbounds i8 , ptr %B , <8 x i64 > %i
202
+ %wide.masked.gather = call <8 x i32 > @llvm.masked.gather.v8i32.v8p0 (<8 x ptr > %i1 , i32 4 , <8 x i1 > <i1 true , i1 true , i1 true , i1 true , i1 true , i1 true , i1 true , i1 true >, <8 x i32 > undef )
203
+ %i2 = getelementptr inbounds i8 , ptr %A , i64 %index
204
+ %wide.load = load <8 x i32 >, ptr %i2 , align 4
205
+ %i4 = add <8 x i32 > %wide.load , %wide.masked.gather
206
+ store <8 x i32 > %i4 , ptr %i2 , align 4
207
+ %index.next = add nuw i64 %index , 8
208
+ %vec.ind.next = add <8 x i64 > %vec.ind , <i64 32 , i64 32 , i64 32 , i64 32 , i64 32 , i64 32 , i64 32 , i64 32 >
209
+ %i6 = icmp eq i64 %index.next , 1024
210
+ br i1 %i6 , label %for.cond.cleanup , label %vector.body
211
+
212
+ for.cond.cleanup: ; preds = %vector.body
213
+ ret void
214
+ }
215
+
178
216
define void @gather_zero_stride_unfold (ptr noalias nocapture %A , ptr noalias nocapture readonly %B ) {
179
217
; V-LABEL: gather_zero_stride_unfold:
180
218
; V: # %bb.0: # %entry
181
219
; V-NEXT: addi a2, a0, 1024
182
220
; V-NEXT: li a3, 32
183
221
; V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
184
- ; V-NEXT: .LBB4_1 : # %vector.body
222
+ ; V-NEXT: .LBB5_1 : # %vector.body
185
223
; V-NEXT: # =>This Inner Loop Header: Depth=1
186
224
; V-NEXT: vlse8.v v8, (a1), zero
187
225
; V-NEXT: vle8.v v9, (a0)
188
226
; V-NEXT: vdivu.vv v8, v8, v9
189
227
; V-NEXT: vse8.v v8, (a0)
190
228
; V-NEXT: addi a0, a0, 32
191
229
; V-NEXT: addi a1, a1, 160
192
- ; V-NEXT: bne a0, a2, .LBB4_1
230
+ ; V-NEXT: bne a0, a2, .LBB5_1
193
231
; V-NEXT: # %bb.2: # %for.cond.cleanup
194
232
; V-NEXT: ret
195
233
;
@@ -198,15 +236,15 @@ define void @gather_zero_stride_unfold(ptr noalias nocapture %A, ptr noalias noc
198
236
; ZVE32F-NEXT: addi a2, a0, 1024
199
237
; ZVE32F-NEXT: li a3, 32
200
238
; ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma
201
- ; ZVE32F-NEXT: .LBB4_1 : # %vector.body
239
+ ; ZVE32F-NEXT: .LBB5_1 : # %vector.body
202
240
; ZVE32F-NEXT: # =>This Inner Loop Header: Depth=1
203
241
; ZVE32F-NEXT: vlse8.v v8, (a1), zero
204
242
; ZVE32F-NEXT: vle8.v v9, (a0)
205
243
; ZVE32F-NEXT: vdivu.vv v8, v8, v9
206
244
; ZVE32F-NEXT: vse8.v v8, (a0)
207
245
; ZVE32F-NEXT: addi a0, a0, 32
208
246
; ZVE32F-NEXT: addi a1, a1, 160
209
- ; ZVE32F-NEXT: bne a0, a2, .LBB4_1
247
+ ; ZVE32F-NEXT: bne a0, a2, .LBB5_1
210
248
; ZVE32F-NEXT: # %bb.2: # %for.cond.cleanup
211
249
; ZVE32F-NEXT: ret
212
250
;
@@ -215,7 +253,7 @@ define void @gather_zero_stride_unfold(ptr noalias nocapture %A, ptr noalias noc
215
253
; NOT-OPTIMIZED-NEXT: addi a2, a0, 1024
216
254
; NOT-OPTIMIZED-NEXT: li a3, 32
217
255
; NOT-OPTIMIZED-NEXT: vsetvli zero, a3, e8, m1, ta, ma
218
- ; NOT-OPTIMIZED-NEXT: .LBB4_1 : # %vector.body
256
+ ; NOT-OPTIMIZED-NEXT: .LBB5_1 : # %vector.body
219
257
; NOT-OPTIMIZED-NEXT: # =>This Inner Loop Header: Depth=1
220
258
; NOT-OPTIMIZED-NEXT: lbu a3, 0(a1)
221
259
; NOT-OPTIMIZED-NEXT: vle8.v v8, (a0)
@@ -224,7 +262,7 @@ define void @gather_zero_stride_unfold(ptr noalias nocapture %A, ptr noalias noc
224
262
; NOT-OPTIMIZED-NEXT: vse8.v v8, (a0)
225
263
; NOT-OPTIMIZED-NEXT: addi a0, a0, 32
226
264
; NOT-OPTIMIZED-NEXT: addi a1, a1, 160
227
- ; NOT-OPTIMIZED-NEXT: bne a0, a2, .LBB4_1
265
+ ; NOT-OPTIMIZED-NEXT: bne a0, a2, .LBB5_1
228
266
; NOT-OPTIMIZED-NEXT: # %bb.2: # %for.cond.cleanup
229
267
; NOT-OPTIMIZED-NEXT: ret
230
268
entry:
@@ -260,15 +298,15 @@ define void @scatter(ptr noalias nocapture %A, ptr noalias nocapture readonly %B
260
298
; CHECK-NEXT: li a4, 32
261
299
; CHECK-NEXT: li a3, 5
262
300
; CHECK-NEXT: vsetvli zero, a4, e8, m1, ta, ma
263
- ; CHECK-NEXT: .LBB5_1 : # %vector.body
301
+ ; CHECK-NEXT: .LBB6_1 : # %vector.body
264
302
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
265
303
; CHECK-NEXT: vle8.v v8, (a1)
266
304
; CHECK-NEXT: vlse8.v v9, (a0), a3
267
305
; CHECK-NEXT: vadd.vv v8, v9, v8
268
306
; CHECK-NEXT: vsse8.v v8, (a0), a3
269
307
; CHECK-NEXT: addi a1, a1, 32
270
308
; CHECK-NEXT: addi a0, a0, 160
271
- ; CHECK-NEXT: bne a1, a2, .LBB5_1
309
+ ; CHECK-NEXT: bne a1, a2, .LBB6_1
272
310
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
273
311
; CHECK-NEXT: ret
274
312
entry:
@@ -303,7 +341,7 @@ define void @scatter_masked(ptr noalias nocapture %A, ptr noalias nocapture read
303
341
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
304
342
; CHECK-NEXT: vmv.s.x v0, a4
305
343
; CHECK-NEXT: li a4, 5
306
- ; CHECK-NEXT: .LBB6_1 : # %vector.body
344
+ ; CHECK-NEXT: .LBB7_1 : # %vector.body
307
345
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
308
346
; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, mu
309
347
; CHECK-NEXT: vle8.v v9, (a1)
@@ -313,7 +351,7 @@ define void @scatter_masked(ptr noalias nocapture %A, ptr noalias nocapture read
313
351
; CHECK-NEXT: vsse8.v v9, (a0), a4, v0.t
314
352
; CHECK-NEXT: addi a1, a1, 32
315
353
; CHECK-NEXT: addi a0, a0, 160
316
- ; CHECK-NEXT: bne a1, a2, .LBB6_1
354
+ ; CHECK-NEXT: bne a1, a2, .LBB7_1
317
355
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
318
356
; CHECK-NEXT: ret
319
357
entry:
@@ -349,7 +387,7 @@ define void @gather_pow2(ptr noalias nocapture %A, ptr noalias nocapture readonl
349
387
; CHECK-NEXT: add a2, a0, a2
350
388
; CHECK-NEXT: li a3, 16
351
389
; CHECK-NEXT: li a4, 32
352
- ; CHECK-NEXT: .LBB7_1 : # %vector.body
390
+ ; CHECK-NEXT: .LBB8_1 : # %vector.body
353
391
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
354
392
; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma
355
393
; CHECK-NEXT: vlse32.v v8, (a1), a3
@@ -361,7 +399,7 @@ define void @gather_pow2(ptr noalias nocapture %A, ptr noalias nocapture readonl
361
399
; CHECK-NEXT: vse8.v v8, (a0)
362
400
; CHECK-NEXT: addi a0, a0, 32
363
401
; CHECK-NEXT: addi a1, a1, 128
364
- ; CHECK-NEXT: bne a0, a2, .LBB7_1
402
+ ; CHECK-NEXT: bne a0, a2, .LBB8_1
365
403
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
366
404
; CHECK-NEXT: ret
367
405
entry:
@@ -397,7 +435,7 @@ define void @scatter_pow2(ptr noalias nocapture %A, ptr noalias nocapture readon
397
435
; CHECK-NEXT: add a2, a1, a2
398
436
; CHECK-NEXT: li a3, 32
399
437
; CHECK-NEXT: li a4, 16
400
- ; CHECK-NEXT: .LBB8_1 : # %vector.body
438
+ ; CHECK-NEXT: .LBB9_1 : # %vector.body
401
439
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
402
440
; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma
403
441
; CHECK-NEXT: vle8.v v8, (a1)
@@ -407,7 +445,7 @@ define void @scatter_pow2(ptr noalias nocapture %A, ptr noalias nocapture readon
407
445
; CHECK-NEXT: vsse32.v v8, (a0), a4
408
446
; CHECK-NEXT: addi a1, a1, 32
409
447
; CHECK-NEXT: addi a0, a0, 128
410
- ; CHECK-NEXT: bne a1, a2, .LBB8_1
448
+ ; CHECK-NEXT: bne a1, a2, .LBB9_1
411
449
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
412
450
; CHECK-NEXT: ret
413
451
entry:
@@ -448,7 +486,7 @@ define void @struct_gather(ptr noalias nocapture %A, ptr noalias nocapture reado
448
486
; CHECK-NEXT: add a2, a0, a2
449
487
; CHECK-NEXT: li a3, 16
450
488
; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma
451
- ; CHECK-NEXT: .LBB9_1 : # %vector.body
489
+ ; CHECK-NEXT: .LBB10_1 : # %vector.body
452
490
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
453
491
; CHECK-NEXT: addi a4, a0, 32
454
492
; CHECK-NEXT: addi a5, a1, -128
@@ -462,7 +500,7 @@ define void @struct_gather(ptr noalias nocapture %A, ptr noalias nocapture reado
462
500
; CHECK-NEXT: vse32.v v9, (a4)
463
501
; CHECK-NEXT: addi a0, a0, 64
464
502
; CHECK-NEXT: addi a1, a1, 256
465
- ; CHECK-NEXT: bne a0, a2, .LBB9_1
503
+ ; CHECK-NEXT: bne a0, a2, .LBB10_1
466
504
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
467
505
; CHECK-NEXT: ret
468
506
entry:
@@ -508,7 +546,7 @@ define void @gather_unroll(ptr noalias nocapture %A, ptr noalias nocapture reado
508
546
; CHECK-NEXT: li a3, 64
509
547
; CHECK-NEXT: li a4, 16
510
548
; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma
511
- ; CHECK-NEXT: .LBB10_1 : # %vector.body
549
+ ; CHECK-NEXT: .LBB11_1 : # %vector.body
512
550
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
513
551
; CHECK-NEXT: vlse32.v v8, (a1), a3
514
552
; CHECK-NEXT: vlse32.v v9, (a0), a4
@@ -535,7 +573,7 @@ define void @gather_unroll(ptr noalias nocapture %A, ptr noalias nocapture reado
535
573
; CHECK-NEXT: addi a2, a2, -8
536
574
; CHECK-NEXT: addi a1, a1, 512
537
575
; CHECK-NEXT: addi a0, a0, 128
538
- ; CHECK-NEXT: bnez a2, .LBB10_1
576
+ ; CHECK-NEXT: bnez a2, .LBB11_1
539
577
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
540
578
; CHECK-NEXT: ret
541
579
entry:
@@ -597,7 +635,7 @@ define void @gather_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptur
597
635
; V-NEXT: add a2, a0, a2
598
636
; V-NEXT: li a3, 40
599
637
; V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
600
- ; V-NEXT: .LBB11_1 : # %bb2
638
+ ; V-NEXT: .LBB12_1 : # %bb2
601
639
; V-NEXT: # =>This Inner Loop Header: Depth=1
602
640
; V-NEXT: addi a4, a1, 80
603
641
; V-NEXT: vlse64.v v8, (a1), a3
@@ -607,7 +645,7 @@ define void @gather_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptur
607
645
; V-NEXT: vse64.v v9, (a4)
608
646
; V-NEXT: addi a0, a0, 32
609
647
; V-NEXT: addi a1, a1, 160
610
- ; V-NEXT: bne a0, a2, .LBB11_1
648
+ ; V-NEXT: bne a0, a2, .LBB12_1
611
649
; V-NEXT: # %bb.2: # %bb18
612
650
; V-NEXT: ret
613
651
;
@@ -618,7 +656,7 @@ define void @gather_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptur
618
656
; ZVE32F-NEXT: add a3, a0, a3
619
657
; ZVE32F-NEXT: li a4, 1
620
658
; ZVE32F-NEXT: li a5, 40
621
- ; ZVE32F-NEXT: .LBB11_1 : # %bb2
659
+ ; ZVE32F-NEXT: .LBB12_1 : # %bb2
622
660
; ZVE32F-NEXT: # =>This Inner Loop Header: Depth=1
623
661
; ZVE32F-NEXT: mul a6, a4, a5
624
662
; ZVE32F-NEXT: add a6, a1, a6
@@ -635,7 +673,7 @@ define void @gather_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptur
635
673
; ZVE32F-NEXT: addi a2, a2, 4
636
674
; ZVE32F-NEXT: addi a0, a0, 32
637
675
; ZVE32F-NEXT: addi a4, a4, 4
638
- ; ZVE32F-NEXT: bne a0, a3, .LBB11_1
676
+ ; ZVE32F-NEXT: bne a0, a3, .LBB12_1
639
677
; ZVE32F-NEXT: # %bb.2: # %bb18
640
678
; ZVE32F-NEXT: ret
641
679
bb:
@@ -674,7 +712,7 @@ define void @scatter_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptu
674
712
; V-NEXT: add a2, a1, a2
675
713
; V-NEXT: li a3, 40
676
714
; V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
677
- ; V-NEXT: .LBB12_1 : # %bb2
715
+ ; V-NEXT: .LBB13_1 : # %bb2
678
716
; V-NEXT: # =>This Inner Loop Header: Depth=1
679
717
; V-NEXT: addi a4, a1, 16
680
718
; V-NEXT: vle64.v v8, (a1)
@@ -684,7 +722,7 @@ define void @scatter_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptu
684
722
; V-NEXT: vsse64.v v9, (a4), a3
685
723
; V-NEXT: addi a1, a1, 32
686
724
; V-NEXT: addi a0, a0, 160
687
- ; V-NEXT: bne a1, a2, .LBB12_1
725
+ ; V-NEXT: bne a1, a2, .LBB13_1
688
726
; V-NEXT: # %bb.2: # %bb18
689
727
; V-NEXT: ret
690
728
;
@@ -695,7 +733,7 @@ define void @scatter_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptu
695
733
; ZVE32F-NEXT: add a3, a1, a3
696
734
; ZVE32F-NEXT: li a4, 1
697
735
; ZVE32F-NEXT: li a5, 40
698
- ; ZVE32F-NEXT: .LBB12_1 : # %bb2
736
+ ; ZVE32F-NEXT: .LBB13_1 : # %bb2
699
737
; ZVE32F-NEXT: # =>This Inner Loop Header: Depth=1
700
738
; ZVE32F-NEXT: ld a6, 8(a1)
701
739
; ZVE32F-NEXT: ld a7, 0(a1)
@@ -712,7 +750,7 @@ define void @scatter_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptu
712
750
; ZVE32F-NEXT: addi a2, a2, 4
713
751
; ZVE32F-NEXT: addi a1, a1, 32
714
752
; ZVE32F-NEXT: addi a4, a4, 4
715
- ; ZVE32F-NEXT: bne a1, a3, .LBB12_1
753
+ ; ZVE32F-NEXT: bne a1, a3, .LBB13_1
716
754
; ZVE32F-NEXT: # %bb.2: # %bb18
717
755
; ZVE32F-NEXT: ret
718
756
bb:
@@ -747,13 +785,13 @@ define void @strided_load_startval_add_with_splat(ptr noalias nocapture %arg, pt
747
785
; CHECK-LABEL: strided_load_startval_add_with_splat:
748
786
; CHECK: # %bb.0: # %bb
749
787
; CHECK-NEXT: li a3, 1024
750
- ; CHECK-NEXT: beq a2, a3, .LBB13_7
788
+ ; CHECK-NEXT: beq a2, a3, .LBB14_7
751
789
; CHECK-NEXT: # %bb.1: # %bb3
752
790
; CHECK-NEXT: li a3, 1023
753
791
; CHECK-NEXT: subw a5, a3, a2
754
792
; CHECK-NEXT: li a6, 31
755
793
; CHECK-NEXT: mv a4, a2
756
- ; CHECK-NEXT: bltu a5, a6, .LBB13_5
794
+ ; CHECK-NEXT: bltu a5, a6, .LBB14_5
757
795
; CHECK-NEXT: # %bb.2: # %bb9
758
796
; CHECK-NEXT: slli a5, a5, 32
759
797
; CHECK-NEXT: srli a5, a5, 32
@@ -768,18 +806,18 @@ define void @strided_load_startval_add_with_splat(ptr noalias nocapture %arg, pt
768
806
; CHECK-NEXT: li t2, 32
769
807
; CHECK-NEXT: li t1, 5
770
808
; CHECK-NEXT: vsetvli zero, t2, e8, m1, ta, ma
771
- ; CHECK-NEXT: .LBB13_3 : # %bb15
809
+ ; CHECK-NEXT: .LBB14_3 : # %bb15
772
810
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
773
811
; CHECK-NEXT: vlse8.v v8, (a2), t1
774
812
; CHECK-NEXT: vle8.v v9, (a7)
775
813
; CHECK-NEXT: vadd.vv v8, v9, v8
776
814
; CHECK-NEXT: vse8.v v8, (a7)
777
815
; CHECK-NEXT: addi a7, a7, 32
778
816
; CHECK-NEXT: addi a2, a2, 160
779
- ; CHECK-NEXT: bne a7, t0, .LBB13_3
817
+ ; CHECK-NEXT: bne a7, t0, .LBB14_3
780
818
; CHECK-NEXT: # %bb.4: # %bb30
781
- ; CHECK-NEXT: beq a5, a6, .LBB13_7
782
- ; CHECK-NEXT: .LBB13_5 : # %bb32
819
+ ; CHECK-NEXT: beq a5, a6, .LBB14_7
820
+ ; CHECK-NEXT: .LBB14_5 : # %bb32
783
821
; CHECK-NEXT: add a2, a0, a4
784
822
; CHECK-NEXT: slli a5, a4, 2
785
823
; CHECK-NEXT: add a1, a1, a4
@@ -790,16 +828,16 @@ define void @strided_load_startval_add_with_splat(ptr noalias nocapture %arg, pt
790
828
; CHECK-NEXT: add a0, a4, a0
791
829
; CHECK-NEXT: add a0, a0, a3
792
830
; CHECK-NEXT: addi a0, a0, 1
793
- ; CHECK-NEXT: .LBB13_6 : # %bb35
831
+ ; CHECK-NEXT: .LBB14_6 : # %bb35
794
832
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
795
833
; CHECK-NEXT: lbu a3, 0(a1)
796
834
; CHECK-NEXT: lbu a4, 0(a2)
797
835
; CHECK-NEXT: add a3, a4, a3
798
836
; CHECK-NEXT: sb a3, 0(a2)
799
837
; CHECK-NEXT: addi a2, a2, 1
800
838
; CHECK-NEXT: addi a1, a1, 5
801
- ; CHECK-NEXT: bne a2, a0, .LBB13_6
802
- ; CHECK-NEXT: .LBB13_7 : # %bb34
839
+ ; CHECK-NEXT: bne a2, a0, .LBB14_6
840
+ ; CHECK-NEXT: .LBB14_7 : # %bb34
803
841
; CHECK-NEXT: ret
804
842
bb:
805
843
%i = icmp eq i32 %arg2 , 1024
@@ -870,24 +908,24 @@ define void @gather_no_scalar_remainder(ptr noalias nocapture noundef %arg, ptr
870
908
; CHECK-LABEL: gather_no_scalar_remainder:
871
909
; CHECK: # %bb.0: # %bb
872
910
; CHECK-NEXT: slli a2, a2, 4
873
- ; CHECK-NEXT: beqz a2, .LBB14_3
911
+ ; CHECK-NEXT: beqz a2, .LBB15_3
874
912
; CHECK-NEXT: # %bb.1: # %bb2
875
913
; CHECK-NEXT: addi a2, a2, -16
876
914
; CHECK-NEXT: andi a2, a2, -16
877
915
; CHECK-NEXT: add a2, a2, a0
878
916
; CHECK-NEXT: addi a2, a2, 16
879
917
; CHECK-NEXT: li a3, 5
880
918
; CHECK-NEXT: vsetivli zero, 16, e8, mf2, ta, ma
881
- ; CHECK-NEXT: .LBB14_2 : # %bb4
919
+ ; CHECK-NEXT: .LBB15_2 : # %bb4
882
920
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
883
921
; CHECK-NEXT: vlse8.v v8, (a1), a3
884
922
; CHECK-NEXT: vle8.v v9, (a0)
885
923
; CHECK-NEXT: vadd.vv v8, v9, v8
886
924
; CHECK-NEXT: vse8.v v8, (a0)
887
925
; CHECK-NEXT: addi a0, a0, 16
888
926
; CHECK-NEXT: addi a1, a1, 80
889
- ; CHECK-NEXT: bne a0, a2, .LBB14_2
890
- ; CHECK-NEXT: .LBB14_3 : # %bb16
927
+ ; CHECK-NEXT: bne a0, a2, .LBB15_2
928
+ ; CHECK-NEXT: .LBB15_3 : # %bb16
891
929
; CHECK-NEXT: ret
892
930
bb:
893
931
%i = shl i64 %arg2 , 4
@@ -922,15 +960,15 @@ define void @gather_zero_stride_fp(ptr noalias nocapture %A, ptr noalias nocaptu
922
960
; CHECK-NEXT: lui a2, 1
923
961
; CHECK-NEXT: add a2, a0, a2
924
962
; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma
925
- ; CHECK-NEXT: .LBB15_1 : # %vector.body
963
+ ; CHECK-NEXT: .LBB16_1 : # %vector.body
926
964
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
927
965
; CHECK-NEXT: flw fa5, 0(a1)
928
966
; CHECK-NEXT: vle32.v v8, (a0)
929
967
; CHECK-NEXT: vfadd.vf v8, v8, fa5
930
968
; CHECK-NEXT: vse32.v v8, (a0)
931
969
; CHECK-NEXT: addi a0, a0, 128
932
970
; CHECK-NEXT: addi a1, a1, 640
933
- ; CHECK-NEXT: bne a0, a2, .LBB15_1
971
+ ; CHECK-NEXT: bne a0, a2, .LBB16_1
934
972
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
935
973
; CHECK-NEXT: ret
936
974
entry:
0 commit comments