Skip to content

Commit f61c9b7

Browse files
committed
[SLP] Fix infinite loop in isUndefVector.
This fixes an infinite loop if isa<T>(II->getOperand(1)) is true. Update Base at the top of the loop, before the continue. Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D144292
1 parent d41a73a commit f61c9b7

File tree

2 files changed

+23
-1
lines changed

2 files changed

+23
-1
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -381,9 +381,9 @@ static SmallBitVector isUndefVector(const Value *V,
381381
if (!UseMask.empty()) {
382382
const Value *Base = V;
383383
while (auto *II = dyn_cast<InsertElementInst>(Base)) {
384+
Base = II->getOperand(0);
384385
if (isa<T>(II->getOperand(1)))
385386
continue;
386-
Base = II->getOperand(0);
387387
std::optional<unsigned> Idx = getInsertIndex(II);
388388
if (!Idx)
389389
continue;

llvm/test/Transforms/SLPVectorizer/AArch64/insertelement.ll

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,25 @@ define <vscale x 2 x float> @insertelement-scalable-vector() {
3535

3636
; Function Attrs: nounwind readnone speculatable willreturn
3737
declare float @llvm.fabs.f32(float)
38+
39+
40+
define <4 x float> @insertelement_poison_lanes(ptr %0) {
41+
; CHECK-LABEL: @insertelement_poison_lanes(
42+
; CHECK-NEXT: [[INS_1:%.*]] = insertelement <4 x float> zeroinitializer, float poison, i64 0
43+
; CHECK-NEXT: [[INS_2:%.*]] = insertelement <4 x float> [[INS_1]], float 0.000000e+00, i64 0
44+
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr double, ptr [[TMP0:%.*]], i64 1
45+
; CHECK-NEXT: store <2 x double> <double 0.000000e+00, double 1.000000e+00>, ptr [[GEP_1]], align 8
46+
; CHECK-NEXT: ret <4 x float> [[INS_2]]
47+
;
48+
%trunc.1 = fptrunc double 0.000000e+00 to float
49+
%trunc.2 = fptrunc double 1.000000e+00 to float
50+
%ins.1 = insertelement <4 x float> zeroinitializer, float poison, i64 0
51+
%ins.2 = insertelement <4 x float> %ins.1, float %trunc.1, i64 0
52+
%ext.1 = fpext float %trunc.1 to double
53+
%gep.1 = getelementptr double, ptr %0, i64 1
54+
store double %ext.1, ptr %gep.1, align 8
55+
%ext.2 = fpext float %trunc.2 to double
56+
%gep.2 = getelementptr double, ptr %0, i64 2
57+
store double %ext.2, ptr %gep.2, align 8
58+
ret <4 x float> %ins.2
59+
}

0 commit comments

Comments
 (0)