Skip to content

Commit 087a6ac

Browse files
committed
[LV] Add users to some first-order recurrence tests.
Add extra users to ensure the recurrence cannot be DCE'd. Also re-generates some checks.
1 parent a8d76ac commit 087a6ac

File tree

5 files changed

+1697
-458
lines changed

5 files changed

+1697
-458
lines changed

llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll

Lines changed: 52 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -589,88 +589,100 @@ for.exit: ; preds = %for.body
589589
ret i32 %add
590590
}
591591

592-
define i32 @not_dotp_not_phi(ptr %a, ptr %b) {
592+
define i32 @not_dotp_not_phi(ptr %a, ptr noalias %b, ptr noalias %c) {
593593
; CHECK-INTERLEAVE1-LABEL: define i32 @not_dotp_not_phi(
594-
; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
594+
; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
595595
; CHECK-INTERLEAVE1-NEXT: entry:
596596
; CHECK-INTERLEAVE1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
597597
; CHECK-INTERLEAVE1: vector.ph:
598598
; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]]
599599
; CHECK-INTERLEAVE1: vector.body:
600600
; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
601-
; CHECK-INTERLEAVE1-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
601+
; CHECK-INTERLEAVE1-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
602602
; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
603-
; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
604-
; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
605-
; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
603+
; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
604+
; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1
605+
; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
606606
; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
607-
; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0
608-
; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
609-
; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i32>
610-
; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = mul <16 x i32> [[TMP6]], [[TMP3]]
611-
; CHECK-INTERLEAVE1-NEXT: [[TMP8]] = add <16 x i32> [[TMP7]], [[TMP6]]
607+
; CHECK-INTERLEAVE1-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0
608+
; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP11]], align 1
609+
; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i32>
610+
; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = mul <16 x i32> [[TMP5]], [[TMP2]]
611+
; CHECK-INTERLEAVE1-NEXT: [[TMP7]] = add <16 x i32> [[TMP6]], [[TMP5]]
612+
; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = shufflevector <16 x i32> [[VECTOR_RECUR]], <16 x i32> [[TMP7]], <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
613+
; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[C]], i64 [[INDEX]]
614+
; CHECK-INTERLEAVE1-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0
615+
; CHECK-INTERLEAVE1-NEXT: store <16 x i32> [[TMP8]], ptr [[TMP10]], align 4
612616
; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
613617
; CHECK-INTERLEAVE1-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
614618
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
615619
; CHECK-INTERLEAVE1: middle.block:
616-
; CHECK-INTERLEAVE1-NEXT: [[TMP10:%.*]] = extractelement <16 x i32> [[TMP8]], i32 15
617-
; CHECK-INTERLEAVE1-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP8]], i32 15
620+
; CHECK-INTERLEAVE1-NEXT: [[TMP12:%.*]] = extractelement <16 x i32> [[TMP7]], i32 15
621+
; CHECK-INTERLEAVE1-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP7]], i32 15
618622
; CHECK-INTERLEAVE1-NEXT: br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
619623
; CHECK-INTERLEAVE1: scalar.ph:
620624
;
621625
; CHECK-INTERLEAVED-LABEL: define i32 @not_dotp_not_phi(
622-
; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
626+
; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
623627
; CHECK-INTERLEAVED-NEXT: entry:
624628
; CHECK-INTERLEAVED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
625629
; CHECK-INTERLEAVED: vector.ph:
626630
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
627631
; CHECK-INTERLEAVED: vector.body:
628632
; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
629-
; CHECK-INTERLEAVED-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
633+
; CHECK-INTERLEAVED-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
630634
; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
631-
; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
632-
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
633-
; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
635+
; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
636+
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1
637+
; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
634638
; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
635-
; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0
636-
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
637-
; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i32>
638-
; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = mul <16 x i32> [[TMP6]], [[TMP3]]
639-
; CHECK-INTERLEAVED-NEXT: [[TMP8]] = add <16 x i32> [[TMP7]], [[TMP6]]
639+
; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0
640+
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP11]], align 1
641+
; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i32>
642+
; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = mul <16 x i32> [[TMP5]], [[TMP2]]
643+
; CHECK-INTERLEAVED-NEXT: [[TMP7]] = add <16 x i32> [[TMP6]], [[TMP5]]
644+
; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = shufflevector <16 x i32> [[VECTOR_RECUR]], <16 x i32> [[TMP7]], <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
645+
; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[C]], i64 [[INDEX]]
646+
; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0
647+
; CHECK-INTERLEAVED-NEXT: store <16 x i32> [[TMP8]], ptr [[TMP10]], align 4
640648
; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
641649
; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
642650
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
643651
; CHECK-INTERLEAVED: middle.block:
644-
; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = extractelement <16 x i32> [[TMP8]], i32 15
645-
; CHECK-INTERLEAVED-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP8]], i32 15
652+
; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = extractelement <16 x i32> [[TMP7]], i32 15
653+
; CHECK-INTERLEAVED-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP7]], i32 15
646654
; CHECK-INTERLEAVED-NEXT: br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
647655
; CHECK-INTERLEAVED: scalar.ph:
648656
;
649657
; CHECK-MAXBW-LABEL: define i32 @not_dotp_not_phi(
650-
; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
658+
; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
651659
; CHECK-MAXBW-NEXT: entry:
652660
; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
653661
; CHECK-MAXBW: vector.ph:
654662
; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]]
655663
; CHECK-MAXBW: vector.body:
656664
; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
657-
; CHECK-MAXBW-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
665+
; CHECK-MAXBW-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
658666
; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
659-
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
660-
; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
661-
; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
667+
; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
668+
; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1
669+
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
662670
; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
663-
; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0
664-
; CHECK-MAXBW-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
665-
; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i32>
666-
; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = mul <16 x i32> [[TMP6]], [[TMP3]]
667-
; CHECK-MAXBW-NEXT: [[TMP8]] = add <16 x i32> [[TMP7]], [[TMP6]]
671+
; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0
672+
; CHECK-MAXBW-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP11]], align 1
673+
; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i32>
674+
; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = mul <16 x i32> [[TMP5]], [[TMP2]]
675+
; CHECK-MAXBW-NEXT: [[TMP7]] = add <16 x i32> [[TMP6]], [[TMP5]]
676+
; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = shufflevector <16 x i32> [[VECTOR_RECUR]], <16 x i32> [[TMP7]], <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
677+
; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[C]], i64 [[INDEX]]
678+
; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0
679+
; CHECK-MAXBW-NEXT: store <16 x i32> [[TMP8]], ptr [[TMP10]], align 4
668680
; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
669681
; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
670682
; CHECK-MAXBW-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
671683
; CHECK-MAXBW: middle.block:
672-
; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = extractelement <16 x i32> [[TMP8]], i32 15
673-
; CHECK-MAXBW-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP8]], i32 15
684+
; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = extractelement <16 x i32> [[TMP7]], i32 15
685+
; CHECK-MAXBW-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP7]], i32 15
674686
; CHECK-MAXBW-NEXT: br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
675687
; CHECK-MAXBW: scalar.ph:
676688
;
@@ -688,6 +700,8 @@ for.body: ; preds = %for.body, %entry
688700
%ext.b = zext i8 %load.b to i32
689701
%mul = mul i32 %ext.b, %ext.a
690702
%add = add i32 %mul, %ext.b
703+
%gep.c = getelementptr i32, ptr %c, i64 %iv
704+
store i32 %accum, ptr %gep.c
691705
%iv.next = add i64 %iv, 1
692706
%exitcond.not = icmp eq i64 %iv.next, 1024
693707
br i1 %exitcond.not, label %for.exit, label %for.body
@@ -946,6 +960,7 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) {
946960
; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[NUM_IN]], [[N_VEC]]
947961
; CHECK-MAXBW-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
948962
; CHECK-MAXBW: scalar.ph:
963+
;
949964
entry:
950965
br label %for.body
951966

llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@ target triple = "riscv64-unknown-linux-gnu"
66

77
; Make sure we do not pick <vscale x 1 x i64> as VF for a loop with a
88
; first-order recurrence.
9-
define i64 @pr97452_scalable_vf1_for(ptr %src) #0 {
9+
define i64 @pr97452_scalable_vf1_for(ptr %src, ptr noalias %dst) #0 {
1010
; CHECK-LABEL: define i64 @pr97452_scalable_vf1_for(
11-
; CHECK-SAME: ptr [[SRC:%.*]]) #[[ATTR0:[0-9]+]] {
11+
; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]]) #[[ATTR0:[0-9]+]] {
1212
; CHECK-NEXT: [[ENTRY:.*]]:
1313
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1414
; CHECK: [[VECTOR_PH]]:
@@ -17,8 +17,17 @@ define i64 @pr97452_scalable_vf1_for(ptr %src) #0 {
1717
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1818
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ <i64 poison, i64 poison, i64 poison, i64 0>, %[[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], %[[VECTOR_BODY]] ]
1919
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[INDEX]]
20+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
2021
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4
22+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8
2123
; CHECK-NEXT: [[WIDE_LOAD1]] = load <4 x i64>, ptr [[TMP5]], align 8
24+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
25+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> [[WIDE_LOAD1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
26+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[INDEX]]
27+
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0
28+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 4
29+
; CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[TMP9]], align 8
30+
; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[TMP7]], align 8
2231
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
2332
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
2433
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -31,15 +40,17 @@ define i64 @pr97452_scalable_vf1_for(ptr %src) #0 {
3140
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
3241
; CHECK-NEXT: br label %[[LOOP:.*]]
3342
; CHECK: [[LOOP]]:
34-
; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[L:%.*]], %[[LOOP]] ]
43+
; CHECK-NEXT: [[FOR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[L:%.*]], %[[LOOP]] ]
3544
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
3645
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
3746
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[IV]]
3847
; CHECK-NEXT: [[L]] = load i64, ptr [[GEP]], align 8
48+
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[IV]]
49+
; CHECK-NEXT: store i64 [[FOR]], ptr [[GEP_DST]], align 8
3950
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 22
4051
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
4152
; CHECK: [[EXIT]]:
42-
; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[SCALAR_RECUR]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ]
53+
; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[FOR]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ]
4354
; CHECK-NEXT: ret i64 [[RES]]
4455
;
4556
entry:
@@ -49,8 +60,10 @@ loop:
4960
%for = phi i64 [ 0, %entry ], [ %l, %loop ]
5061
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
5162
%iv.next = add i64 %iv, 1
52-
%gep = getelementptr inbounds i64, ptr %src, i64 %iv
53-
%l = load i64, ptr %gep, align 8
63+
%gep.src = getelementptr inbounds i64, ptr %src, i64 %iv
64+
%l = load i64, ptr %gep.src, align 8
65+
%gep.dst = getelementptr inbounds i64, ptr %dst, i64 %iv
66+
store i64 %for, ptr %gep.dst
5467
%ec = icmp eq i64 %iv, 22
5568
br i1 %ec, label %exit, label %loop
5669

0 commit comments

Comments
 (0)