Skip to content

Commit 1a9358c

Browse files
committed
[LV] Relax over-strict assertion for reduction exit value selects.
After f108c6c, (mul x, 1) is simplified to x, which can cause the select for the final reduction value when tail-folding to use the reduction value for both options. Relax the assertion to make sure this case is allowed. Note that the reduction is now redundant itself and could be further simplified. Fixes #66895.
1 parent 8bbcc6d commit 1a9358c

File tree

2 files changed

+67
-15
lines changed

2 files changed

+67
-15
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3823,7 +3823,8 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
38233823
SelectInst *Sel = nullptr;
38243824
for (User *U : VecLoopExitInst->users()) {
38253825
if (isa<SelectInst>(U)) {
3826-
assert(!Sel && "Reduction exit feeding two selects");
3826+
assert((!Sel || U == Sel) &&
3827+
"Reduction exit feeding two different selects");
38273828
Sel = cast<SelectInst>(U);
38283829
} else
38293830
assert(isa<PHINode>(U) && "Reduction exit must feed Phi's or select");

llvm/test/Transforms/LoopVectorize/select-reduction.ll

Lines changed: 65 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -24,23 +24,23 @@ define i32 @test(i64 %N, i32 %x) {
2424
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
2525
; CHECK: vector.body:
2626
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
27-
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
28-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0
29-
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT3]], <4 x i64> poison, <4 x i32> zeroinitializer
30-
; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT4]], <i64 0, i64 1, i64 2, i64 3>
31-
; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]]
32-
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[VEC_PHI]], <i32 10, i32 10, i32 10, i32 10>
33-
; CHECK-NEXT: [[TMP3]] = select <4 x i1> [[TMP2]], <4 x i32> [[VEC_PHI]], <4 x i32> <i32 10, i32 10, i32 10, i32 10>
34-
; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP3]], <4 x i32> [[VEC_PHI]]
27+
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
28+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0
29+
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer
30+
; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT2]], <i64 0, i64 1, i64 2, i64 3>
31+
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]]
32+
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[VEC_PHI]], <i32 10, i32 10, i32 10, i32 10>
33+
; CHECK-NEXT: [[TMP2]] = select <4 x i1> [[TMP1]], <4 x i32> [[VEC_PHI]], <4 x i32> <i32 10, i32 10, i32 10, i32 10>
34+
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP2]], <4 x i32> [[VEC_PHI]]
3535
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
36-
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
37-
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
36+
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
37+
; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
3838
; CHECK: middle.block:
39-
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP4]])
39+
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP3]])
4040
; CHECK-NEXT: br i1 true, label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
4141
; CHECK: scalar.ph:
4242
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[EXTRA_ITER]], [[LOOP_PREHEADER]] ]
43-
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
43+
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
4444
; CHECK-NEXT: br label [[LOOP:%.*]]
4545
; CHECK: loop:
4646
; CHECK-NEXT: [[NEXT:%.*]] = phi i32 [ [[SEL:%.*]], [[LOOP]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
@@ -49,9 +49,9 @@ define i32 @test(i64 %N, i32 %x) {
4949
; CHECK-NEXT: [[SEL]] = select i1 [[SEL_COND]], i32 [[NEXT]], i32 10
5050
; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
5151
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 0
52-
; CHECK-NEXT: br i1 [[EC]], label [[EXIT_LOOPEXIT]], label [[LOOP]], [[LOOP2:!llvm.loop !.*]]
52+
; CHECK-NEXT: br i1 [[EC]], label [[EXIT_LOOPEXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
5353
; CHECK: exit.loopexit:
54-
; CHECK-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], [[LOOP]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
54+
; CHECK-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], [[LOOP]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
5555
; CHECK-NEXT: br label [[EXIT]]
5656
; CHECK: exit:
5757
; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ 0, [[CHECK]] ], [ [[SEL_LCSSA]], [[EXIT_LOOPEXIT]] ]
@@ -78,3 +78,54 @@ exit:
7878
%result = phi i32 [ %sel, %loop], [ 0, %check ]
7979
ret i32 %result
8080
}
81+
82+
define i32 @pr66895_tail_fold_reduction_exit_inst_gets_simplified(i32 %n) {
83+
; CHECK-LABEL: @pr66895_tail_fold_reduction_exit_inst_gets_simplified(
84+
; CHECK-NEXT: entry:
85+
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
86+
; CHECK: vector.ph:
87+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
88+
; CHECK: vector.body:
89+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
90+
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 1, i32 1>, [[VECTOR_PH]] ], [ [[VEC_PHI]], [[VECTOR_BODY]] ]
91+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0
92+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
93+
; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3>
94+
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IV]], <i32 12, i32 12, i32 12, i32 12>
95+
; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_PHI]], <4 x i32> [[VEC_PHI]]
96+
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
97+
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
98+
; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
99+
; CHECK: middle.block:
100+
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP1]])
101+
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
102+
; CHECK: scalar.ph:
103+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ -4, [[MIDDLE_BLOCK]] ], [ 12, [[ENTRY:%.*]] ]
104+
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ]
105+
; CHECK-NEXT: br label [[LOOP:%.*]]
106+
; CHECK: loop:
107+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
108+
; CHECK-NEXT: [[RED:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], [[LOOP]] ]
109+
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], -1
110+
; CHECK-NEXT: [[RED_NEXT]] = mul i32 [[RED]], 1
111+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 0
112+
; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
113+
; CHECK: exit:
114+
; CHECK-NEXT: [[RED_LCSSA:%.*]] = phi i32 [ [[RED_NEXT]], [[LOOP]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ]
115+
; CHECK-NEXT: ret i32 [[RED_LCSSA]]
116+
;
117+
entry:
118+
br label %loop
119+
120+
loop:
121+
%iv = phi i32 [ 12, %entry ], [ %iv.next, %loop ]
122+
%red = phi i32 [ 0, %entry ], [ %red.next, %loop ]
123+
%iv.next = add i32 %iv, -1
124+
%red.next = mul i32 %red, 1
125+
%ec = icmp eq i32 %iv, 0
126+
br i1 %ec, label %exit, label %loop
127+
128+
exit:
129+
%red.lcssa = phi i32 [ %red.next, %loop ]
130+
ret i32 %red.lcssa
131+
}

0 commit comments

Comments
 (0)