Skip to content

Commit 7e6e498

Browse files
authored
[RISCV] Use EXTLOAD instead of ZEXTLOAD when lowering riscv_masked_strided_load with zero stride. (#97317)
The splat we generate after the load doesn't use the extended bits, so it shouldn't matter which extend type we use. EXTLOAD is lowered as SEXTLOAD on every element type except i8.
1 parent ae6549a commit 7e6e498

File tree

2 files changed

+80
-42
lines changed

2 files changed

+80
-42
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9326,7 +9326,7 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
93269326
MVT ScalarVT = ContainerVT.getVectorElementType();
93279327
if (IsUnmasked && isNullConstant(Stride) && ContainerVT.isInteger()) {
93289328
SDValue ScalarLoad =
9329-
DAG.getExtLoad(ISD::ZEXTLOAD, DL, XLenVT, Load->getChain(), Ptr,
9329+
DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Load->getChain(), Ptr,
93309330
ScalarVT, Load->getMemOperand());
93319331
Chain = ScalarLoad.getValue(1);
93329332
Result = lowerScalarSplat(SDValue(), ScalarLoad, VL, ContainerVT, DL, DAG,

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll

Lines changed: 79 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -175,21 +175,59 @@ for.cond.cleanup: ; preds = %vector.body
175175
ret void
176176
}
177177

178+
define void @gather_zero_stride_i32(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) {
179+
; CHECK-LABEL: gather_zero_stride_i32:
180+
; CHECK: # %bb.0: # %entry
181+
; CHECK-NEXT: addi a2, a0, 1024
182+
; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma
183+
; CHECK-NEXT: .LBB4_1: # %vector.body
184+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
185+
; CHECK-NEXT: lw a3, 0(a1)
186+
; CHECK-NEXT: vle32.v v8, (a0)
187+
; CHECK-NEXT: vadd.vx v8, v8, a3
188+
; CHECK-NEXT: vse32.v v8, (a0)
189+
; CHECK-NEXT: addi a0, a0, 8
190+
; CHECK-NEXT: addi a1, a1, 160
191+
; CHECK-NEXT: bne a0, a2, .LBB4_1
192+
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
193+
; CHECK-NEXT: ret
194+
entry:
195+
br label %vector.body
196+
197+
vector.body: ; preds = %vector.body, %entry
198+
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
199+
%vec.ind = phi <8 x i64> [ zeroinitializer, %entry ], [ %vec.ind.next, %vector.body ]
200+
%i = mul nuw nsw <8 x i64> %vec.ind, <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>
201+
%i1 = getelementptr inbounds i8, ptr %B, <8 x i64> %i
202+
%wide.masked.gather = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %i1, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
203+
%i2 = getelementptr inbounds i8, ptr %A, i64 %index
204+
%wide.load = load <8 x i32>, ptr %i2, align 4
205+
%i4 = add <8 x i32> %wide.load, %wide.masked.gather
206+
store <8 x i32> %i4, ptr %i2, align 4
207+
%index.next = add nuw i64 %index, 8
208+
%vec.ind.next = add <8 x i64> %vec.ind, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
209+
%i6 = icmp eq i64 %index.next, 1024
210+
br i1 %i6, label %for.cond.cleanup, label %vector.body
211+
212+
for.cond.cleanup: ; preds = %vector.body
213+
ret void
214+
}
215+
178216
define void @gather_zero_stride_unfold(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) {
179217
; V-LABEL: gather_zero_stride_unfold:
180218
; V: # %bb.0: # %entry
181219
; V-NEXT: addi a2, a0, 1024
182220
; V-NEXT: li a3, 32
183221
; V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
184-
; V-NEXT: .LBB4_1: # %vector.body
222+
; V-NEXT: .LBB5_1: # %vector.body
185223
; V-NEXT: # =>This Inner Loop Header: Depth=1
186224
; V-NEXT: vlse8.v v8, (a1), zero
187225
; V-NEXT: vle8.v v9, (a0)
188226
; V-NEXT: vdivu.vv v8, v8, v9
189227
; V-NEXT: vse8.v v8, (a0)
190228
; V-NEXT: addi a0, a0, 32
191229
; V-NEXT: addi a1, a1, 160
192-
; V-NEXT: bne a0, a2, .LBB4_1
230+
; V-NEXT: bne a0, a2, .LBB5_1
193231
; V-NEXT: # %bb.2: # %for.cond.cleanup
194232
; V-NEXT: ret
195233
;
@@ -198,15 +236,15 @@ define void @gather_zero_stride_unfold(ptr noalias nocapture %A, ptr noalias noc
198236
; ZVE32F-NEXT: addi a2, a0, 1024
199237
; ZVE32F-NEXT: li a3, 32
200238
; ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma
201-
; ZVE32F-NEXT: .LBB4_1: # %vector.body
239+
; ZVE32F-NEXT: .LBB5_1: # %vector.body
202240
; ZVE32F-NEXT: # =>This Inner Loop Header: Depth=1
203241
; ZVE32F-NEXT: vlse8.v v8, (a1), zero
204242
; ZVE32F-NEXT: vle8.v v9, (a0)
205243
; ZVE32F-NEXT: vdivu.vv v8, v8, v9
206244
; ZVE32F-NEXT: vse8.v v8, (a0)
207245
; ZVE32F-NEXT: addi a0, a0, 32
208246
; ZVE32F-NEXT: addi a1, a1, 160
209-
; ZVE32F-NEXT: bne a0, a2, .LBB4_1
247+
; ZVE32F-NEXT: bne a0, a2, .LBB5_1
210248
; ZVE32F-NEXT: # %bb.2: # %for.cond.cleanup
211249
; ZVE32F-NEXT: ret
212250
;
@@ -215,7 +253,7 @@ define void @gather_zero_stride_unfold(ptr noalias nocapture %A, ptr noalias noc
215253
; NOT-OPTIMIZED-NEXT: addi a2, a0, 1024
216254
; NOT-OPTIMIZED-NEXT: li a3, 32
217255
; NOT-OPTIMIZED-NEXT: vsetvli zero, a3, e8, m1, ta, ma
218-
; NOT-OPTIMIZED-NEXT: .LBB4_1: # %vector.body
256+
; NOT-OPTIMIZED-NEXT: .LBB5_1: # %vector.body
219257
; NOT-OPTIMIZED-NEXT: # =>This Inner Loop Header: Depth=1
220258
; NOT-OPTIMIZED-NEXT: lbu a3, 0(a1)
221259
; NOT-OPTIMIZED-NEXT: vle8.v v8, (a0)
@@ -224,7 +262,7 @@ define void @gather_zero_stride_unfold(ptr noalias nocapture %A, ptr noalias noc
224262
; NOT-OPTIMIZED-NEXT: vse8.v v8, (a0)
225263
; NOT-OPTIMIZED-NEXT: addi a0, a0, 32
226264
; NOT-OPTIMIZED-NEXT: addi a1, a1, 160
227-
; NOT-OPTIMIZED-NEXT: bne a0, a2, .LBB4_1
265+
; NOT-OPTIMIZED-NEXT: bne a0, a2, .LBB5_1
228266
; NOT-OPTIMIZED-NEXT: # %bb.2: # %for.cond.cleanup
229267
; NOT-OPTIMIZED-NEXT: ret
230268
entry:
@@ -260,15 +298,15 @@ define void @scatter(ptr noalias nocapture %A, ptr noalias nocapture readonly %B
260298
; CHECK-NEXT: li a4, 32
261299
; CHECK-NEXT: li a3, 5
262300
; CHECK-NEXT: vsetvli zero, a4, e8, m1, ta, ma
263-
; CHECK-NEXT: .LBB5_1: # %vector.body
301+
; CHECK-NEXT: .LBB6_1: # %vector.body
264302
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
265303
; CHECK-NEXT: vle8.v v8, (a1)
266304
; CHECK-NEXT: vlse8.v v9, (a0), a3
267305
; CHECK-NEXT: vadd.vv v8, v9, v8
268306
; CHECK-NEXT: vsse8.v v8, (a0), a3
269307
; CHECK-NEXT: addi a1, a1, 32
270308
; CHECK-NEXT: addi a0, a0, 160
271-
; CHECK-NEXT: bne a1, a2, .LBB5_1
309+
; CHECK-NEXT: bne a1, a2, .LBB6_1
272310
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
273311
; CHECK-NEXT: ret
274312
entry:
@@ -303,7 +341,7 @@ define void @scatter_masked(ptr noalias nocapture %A, ptr noalias nocapture read
303341
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
304342
; CHECK-NEXT: vmv.s.x v0, a4
305343
; CHECK-NEXT: li a4, 5
306-
; CHECK-NEXT: .LBB6_1: # %vector.body
344+
; CHECK-NEXT: .LBB7_1: # %vector.body
307345
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
308346
; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, mu
309347
; CHECK-NEXT: vle8.v v9, (a1)
@@ -313,7 +351,7 @@ define void @scatter_masked(ptr noalias nocapture %A, ptr noalias nocapture read
313351
; CHECK-NEXT: vsse8.v v9, (a0), a4, v0.t
314352
; CHECK-NEXT: addi a1, a1, 32
315353
; CHECK-NEXT: addi a0, a0, 160
316-
; CHECK-NEXT: bne a1, a2, .LBB6_1
354+
; CHECK-NEXT: bne a1, a2, .LBB7_1
317355
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
318356
; CHECK-NEXT: ret
319357
entry:
@@ -349,7 +387,7 @@ define void @gather_pow2(ptr noalias nocapture %A, ptr noalias nocapture readonl
349387
; CHECK-NEXT: add a2, a0, a2
350388
; CHECK-NEXT: li a3, 16
351389
; CHECK-NEXT: li a4, 32
352-
; CHECK-NEXT: .LBB7_1: # %vector.body
390+
; CHECK-NEXT: .LBB8_1: # %vector.body
353391
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
354392
; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma
355393
; CHECK-NEXT: vlse32.v v8, (a1), a3
@@ -361,7 +399,7 @@ define void @gather_pow2(ptr noalias nocapture %A, ptr noalias nocapture readonl
361399
; CHECK-NEXT: vse8.v v8, (a0)
362400
; CHECK-NEXT: addi a0, a0, 32
363401
; CHECK-NEXT: addi a1, a1, 128
364-
; CHECK-NEXT: bne a0, a2, .LBB7_1
402+
; CHECK-NEXT: bne a0, a2, .LBB8_1
365403
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
366404
; CHECK-NEXT: ret
367405
entry:
@@ -397,7 +435,7 @@ define void @scatter_pow2(ptr noalias nocapture %A, ptr noalias nocapture readon
397435
; CHECK-NEXT: add a2, a1, a2
398436
; CHECK-NEXT: li a3, 32
399437
; CHECK-NEXT: li a4, 16
400-
; CHECK-NEXT: .LBB8_1: # %vector.body
438+
; CHECK-NEXT: .LBB9_1: # %vector.body
401439
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
402440
; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma
403441
; CHECK-NEXT: vle8.v v8, (a1)
@@ -407,7 +445,7 @@ define void @scatter_pow2(ptr noalias nocapture %A, ptr noalias nocapture readon
407445
; CHECK-NEXT: vsse32.v v8, (a0), a4
408446
; CHECK-NEXT: addi a1, a1, 32
409447
; CHECK-NEXT: addi a0, a0, 128
410-
; CHECK-NEXT: bne a1, a2, .LBB8_1
448+
; CHECK-NEXT: bne a1, a2, .LBB9_1
411449
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
412450
; CHECK-NEXT: ret
413451
entry:
@@ -448,7 +486,7 @@ define void @struct_gather(ptr noalias nocapture %A, ptr noalias nocapture reado
448486
; CHECK-NEXT: add a2, a0, a2
449487
; CHECK-NEXT: li a3, 16
450488
; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma
451-
; CHECK-NEXT: .LBB9_1: # %vector.body
489+
; CHECK-NEXT: .LBB10_1: # %vector.body
452490
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
453491
; CHECK-NEXT: addi a4, a0, 32
454492
; CHECK-NEXT: addi a5, a1, -128
@@ -462,7 +500,7 @@ define void @struct_gather(ptr noalias nocapture %A, ptr noalias nocapture reado
462500
; CHECK-NEXT: vse32.v v9, (a4)
463501
; CHECK-NEXT: addi a0, a0, 64
464502
; CHECK-NEXT: addi a1, a1, 256
465-
; CHECK-NEXT: bne a0, a2, .LBB9_1
503+
; CHECK-NEXT: bne a0, a2, .LBB10_1
466504
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
467505
; CHECK-NEXT: ret
468506
entry:
@@ -508,7 +546,7 @@ define void @gather_unroll(ptr noalias nocapture %A, ptr noalias nocapture reado
508546
; CHECK-NEXT: li a3, 64
509547
; CHECK-NEXT: li a4, 16
510548
; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma
511-
; CHECK-NEXT: .LBB10_1: # %vector.body
549+
; CHECK-NEXT: .LBB11_1: # %vector.body
512550
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
513551
; CHECK-NEXT: vlse32.v v8, (a1), a3
514552
; CHECK-NEXT: vlse32.v v9, (a0), a4
@@ -535,7 +573,7 @@ define void @gather_unroll(ptr noalias nocapture %A, ptr noalias nocapture reado
535573
; CHECK-NEXT: addi a2, a2, -8
536574
; CHECK-NEXT: addi a1, a1, 512
537575
; CHECK-NEXT: addi a0, a0, 128
538-
; CHECK-NEXT: bnez a2, .LBB10_1
576+
; CHECK-NEXT: bnez a2, .LBB11_1
539577
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
540578
; CHECK-NEXT: ret
541579
entry:
@@ -597,7 +635,7 @@ define void @gather_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptur
597635
; V-NEXT: add a2, a0, a2
598636
; V-NEXT: li a3, 40
599637
; V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
600-
; V-NEXT: .LBB11_1: # %bb2
638+
; V-NEXT: .LBB12_1: # %bb2
601639
; V-NEXT: # =>This Inner Loop Header: Depth=1
602640
; V-NEXT: addi a4, a1, 80
603641
; V-NEXT: vlse64.v v8, (a1), a3
@@ -607,7 +645,7 @@ define void @gather_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptur
607645
; V-NEXT: vse64.v v9, (a4)
608646
; V-NEXT: addi a0, a0, 32
609647
; V-NEXT: addi a1, a1, 160
610-
; V-NEXT: bne a0, a2, .LBB11_1
648+
; V-NEXT: bne a0, a2, .LBB12_1
611649
; V-NEXT: # %bb.2: # %bb18
612650
; V-NEXT: ret
613651
;
@@ -618,7 +656,7 @@ define void @gather_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptur
618656
; ZVE32F-NEXT: add a3, a0, a3
619657
; ZVE32F-NEXT: li a4, 1
620658
; ZVE32F-NEXT: li a5, 40
621-
; ZVE32F-NEXT: .LBB11_1: # %bb2
659+
; ZVE32F-NEXT: .LBB12_1: # %bb2
622660
; ZVE32F-NEXT: # =>This Inner Loop Header: Depth=1
623661
; ZVE32F-NEXT: mul a6, a4, a5
624662
; ZVE32F-NEXT: add a6, a1, a6
@@ -635,7 +673,7 @@ define void @gather_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptur
635673
; ZVE32F-NEXT: addi a2, a2, 4
636674
; ZVE32F-NEXT: addi a0, a0, 32
637675
; ZVE32F-NEXT: addi a4, a4, 4
638-
; ZVE32F-NEXT: bne a0, a3, .LBB11_1
676+
; ZVE32F-NEXT: bne a0, a3, .LBB12_1
639677
; ZVE32F-NEXT: # %bb.2: # %bb18
640678
; ZVE32F-NEXT: ret
641679
bb:
@@ -674,7 +712,7 @@ define void @scatter_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptu
674712
; V-NEXT: add a2, a1, a2
675713
; V-NEXT: li a3, 40
676714
; V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
677-
; V-NEXT: .LBB12_1: # %bb2
715+
; V-NEXT: .LBB13_1: # %bb2
678716
; V-NEXT: # =>This Inner Loop Header: Depth=1
679717
; V-NEXT: addi a4, a1, 16
680718
; V-NEXT: vle64.v v8, (a1)
@@ -684,7 +722,7 @@ define void @scatter_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptu
684722
; V-NEXT: vsse64.v v9, (a4), a3
685723
; V-NEXT: addi a1, a1, 32
686724
; V-NEXT: addi a0, a0, 160
687-
; V-NEXT: bne a1, a2, .LBB12_1
725+
; V-NEXT: bne a1, a2, .LBB13_1
688726
; V-NEXT: # %bb.2: # %bb18
689727
; V-NEXT: ret
690728
;
@@ -695,7 +733,7 @@ define void @scatter_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptu
695733
; ZVE32F-NEXT: add a3, a1, a3
696734
; ZVE32F-NEXT: li a4, 1
697735
; ZVE32F-NEXT: li a5, 40
698-
; ZVE32F-NEXT: .LBB12_1: # %bb2
736+
; ZVE32F-NEXT: .LBB13_1: # %bb2
699737
; ZVE32F-NEXT: # =>This Inner Loop Header: Depth=1
700738
; ZVE32F-NEXT: ld a6, 8(a1)
701739
; ZVE32F-NEXT: ld a7, 0(a1)
@@ -712,7 +750,7 @@ define void @scatter_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptu
712750
; ZVE32F-NEXT: addi a2, a2, 4
713751
; ZVE32F-NEXT: addi a1, a1, 32
714752
; ZVE32F-NEXT: addi a4, a4, 4
715-
; ZVE32F-NEXT: bne a1, a3, .LBB12_1
753+
; ZVE32F-NEXT: bne a1, a3, .LBB13_1
716754
; ZVE32F-NEXT: # %bb.2: # %bb18
717755
; ZVE32F-NEXT: ret
718756
bb:
@@ -747,13 +785,13 @@ define void @strided_load_startval_add_with_splat(ptr noalias nocapture %arg, pt
747785
; CHECK-LABEL: strided_load_startval_add_with_splat:
748786
; CHECK: # %bb.0: # %bb
749787
; CHECK-NEXT: li a3, 1024
750-
; CHECK-NEXT: beq a2, a3, .LBB13_7
788+
; CHECK-NEXT: beq a2, a3, .LBB14_7
751789
; CHECK-NEXT: # %bb.1: # %bb3
752790
; CHECK-NEXT: li a3, 1023
753791
; CHECK-NEXT: subw a5, a3, a2
754792
; CHECK-NEXT: li a6, 31
755793
; CHECK-NEXT: mv a4, a2
756-
; CHECK-NEXT: bltu a5, a6, .LBB13_5
794+
; CHECK-NEXT: bltu a5, a6, .LBB14_5
757795
; CHECK-NEXT: # %bb.2: # %bb9
758796
; CHECK-NEXT: slli a5, a5, 32
759797
; CHECK-NEXT: srli a5, a5, 32
@@ -768,18 +806,18 @@ define void @strided_load_startval_add_with_splat(ptr noalias nocapture %arg, pt
768806
; CHECK-NEXT: li t2, 32
769807
; CHECK-NEXT: li t1, 5
770808
; CHECK-NEXT: vsetvli zero, t2, e8, m1, ta, ma
771-
; CHECK-NEXT: .LBB13_3: # %bb15
809+
; CHECK-NEXT: .LBB14_3: # %bb15
772810
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
773811
; CHECK-NEXT: vlse8.v v8, (a2), t1
774812
; CHECK-NEXT: vle8.v v9, (a7)
775813
; CHECK-NEXT: vadd.vv v8, v9, v8
776814
; CHECK-NEXT: vse8.v v8, (a7)
777815
; CHECK-NEXT: addi a7, a7, 32
778816
; CHECK-NEXT: addi a2, a2, 160
779-
; CHECK-NEXT: bne a7, t0, .LBB13_3
817+
; CHECK-NEXT: bne a7, t0, .LBB14_3
780818
; CHECK-NEXT: # %bb.4: # %bb30
781-
; CHECK-NEXT: beq a5, a6, .LBB13_7
782-
; CHECK-NEXT: .LBB13_5: # %bb32
819+
; CHECK-NEXT: beq a5, a6, .LBB14_7
820+
; CHECK-NEXT: .LBB14_5: # %bb32
783821
; CHECK-NEXT: add a2, a0, a4
784822
; CHECK-NEXT: slli a5, a4, 2
785823
; CHECK-NEXT: add a1, a1, a4
@@ -790,16 +828,16 @@ define void @strided_load_startval_add_with_splat(ptr noalias nocapture %arg, pt
790828
; CHECK-NEXT: add a0, a4, a0
791829
; CHECK-NEXT: add a0, a0, a3
792830
; CHECK-NEXT: addi a0, a0, 1
793-
; CHECK-NEXT: .LBB13_6: # %bb35
831+
; CHECK-NEXT: .LBB14_6: # %bb35
794832
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
795833
; CHECK-NEXT: lbu a3, 0(a1)
796834
; CHECK-NEXT: lbu a4, 0(a2)
797835
; CHECK-NEXT: add a3, a4, a3
798836
; CHECK-NEXT: sb a3, 0(a2)
799837
; CHECK-NEXT: addi a2, a2, 1
800838
; CHECK-NEXT: addi a1, a1, 5
801-
; CHECK-NEXT: bne a2, a0, .LBB13_6
802-
; CHECK-NEXT: .LBB13_7: # %bb34
839+
; CHECK-NEXT: bne a2, a0, .LBB14_6
840+
; CHECK-NEXT: .LBB14_7: # %bb34
803841
; CHECK-NEXT: ret
804842
bb:
805843
%i = icmp eq i32 %arg2, 1024
@@ -870,24 +908,24 @@ define void @gather_no_scalar_remainder(ptr noalias nocapture noundef %arg, ptr
870908
; CHECK-LABEL: gather_no_scalar_remainder:
871909
; CHECK: # %bb.0: # %bb
872910
; CHECK-NEXT: slli a2, a2, 4
873-
; CHECK-NEXT: beqz a2, .LBB14_3
911+
; CHECK-NEXT: beqz a2, .LBB15_3
874912
; CHECK-NEXT: # %bb.1: # %bb2
875913
; CHECK-NEXT: addi a2, a2, -16
876914
; CHECK-NEXT: andi a2, a2, -16
877915
; CHECK-NEXT: add a2, a2, a0
878916
; CHECK-NEXT: addi a2, a2, 16
879917
; CHECK-NEXT: li a3, 5
880918
; CHECK-NEXT: vsetivli zero, 16, e8, mf2, ta, ma
881-
; CHECK-NEXT: .LBB14_2: # %bb4
919+
; CHECK-NEXT: .LBB15_2: # %bb4
882920
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
883921
; CHECK-NEXT: vlse8.v v8, (a1), a3
884922
; CHECK-NEXT: vle8.v v9, (a0)
885923
; CHECK-NEXT: vadd.vv v8, v9, v8
886924
; CHECK-NEXT: vse8.v v8, (a0)
887925
; CHECK-NEXT: addi a0, a0, 16
888926
; CHECK-NEXT: addi a1, a1, 80
889-
; CHECK-NEXT: bne a0, a2, .LBB14_2
890-
; CHECK-NEXT: .LBB14_3: # %bb16
927+
; CHECK-NEXT: bne a0, a2, .LBB15_2
928+
; CHECK-NEXT: .LBB15_3: # %bb16
891929
; CHECK-NEXT: ret
892930
bb:
893931
%i = shl i64 %arg2, 4
@@ -922,15 +960,15 @@ define void @gather_zero_stride_fp(ptr noalias nocapture %A, ptr noalias nocaptu
922960
; CHECK-NEXT: lui a2, 1
923961
; CHECK-NEXT: add a2, a0, a2
924962
; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma
925-
; CHECK-NEXT: .LBB15_1: # %vector.body
963+
; CHECK-NEXT: .LBB16_1: # %vector.body
926964
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
927965
; CHECK-NEXT: flw fa5, 0(a1)
928966
; CHECK-NEXT: vle32.v v8, (a0)
929967
; CHECK-NEXT: vfadd.vf v8, v8, fa5
930968
; CHECK-NEXT: vse32.v v8, (a0)
931969
; CHECK-NEXT: addi a0, a0, 128
932970
; CHECK-NEXT: addi a1, a1, 640
933-
; CHECK-NEXT: bne a0, a2, .LBB15_1
971+
; CHECK-NEXT: bne a0, a2, .LBB16_1
934972
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
935973
; CHECK-NEXT: ret
936974
entry:

0 commit comments

Comments
 (0)