Skip to content

Commit 8e9216f

Browse files
author
Valery N Dmitriev
committed
[SLP] Do not make an attempt to match reduction on already erased instruction.
Differential Revision: https://reviews.llvm.org/D105752
1 parent ac02baa commit 8e9216f

File tree

2 files changed

+99
-0
lines changed

2 files changed

+99
-0
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8174,6 +8174,11 @@ static bool tryToVectorizeHorReductionOrInstOperands(
81748174
Instruction *Inst;
81758175
unsigned Level;
81768176
std::tie(Inst, Level) = Stack.pop_back_val();
8177+
// Do not try to analyze instruction that has already been vectorized.
8178+
// This may happen when we vectorize instruction operands on a previous
8179+
// iteration while stack was populated before that happened.
8180+
if (R.isDeleted(Inst))
8181+
continue;
81778182
Value *B0, *B1;
81788183
bool IsBinop = matchRdxBop(Inst, B0, B1);
81798184
bool IsSelect = match(Inst, m_Select(m_Value(), m_Value(), m_Value()));
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt < %s -mtriple=x86_64-unknown -slp-vectorizer -S | FileCheck %s
3+
4+
; REQUIRES: asserts
5+
6+
; SLP crashed when tried to delete instruction with uses.
7+
; It tried to match reduction subsequently on %i23, then %i22 etc
8+
; When it reached %i18 it was still failing to match reduction but
9+
; succeeded with its operands pair: %i17, %i11.
10+
; Then it popped instruction %i17 from stack to make next attempt on
11+
; matching reduction but the instruction was actually erased on prior
12+
; iteration (it was matched and vectorized, which added a use of a deleted
13+
; instruction)
14+
15+
define void @test() {
16+
; CHECK-LABEL: @test(
17+
; CHECK-NEXT: entry:
18+
; CHECK-NEXT: br i1 undef, label [[IF_END:%.*]], label [[FOR_COND_PREHEADER:%.*]]
19+
; CHECK: for.cond.preheader:
20+
; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* undef, i64 0, i64 2
21+
; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* undef, i64 0, i64 3
22+
; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* undef, i64 0, i64 4
23+
; CHECK-NEXT: [[I3:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* undef, i64 0, i64 5
24+
; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* undef, i64 0, i64 6
25+
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[I]] to <2 x i32>*
26+
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 8
27+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[I1]] to <2 x i32>*
28+
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[TMP2]], align 4
29+
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[I2]] to <2 x i32>*
30+
; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[TMP4]], align 16
31+
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[I3]] to <2 x i32>*
32+
; CHECK-NEXT: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[TMP6]], align 4
33+
; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i32> undef, [[TMP7]]
34+
; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i32> [[TMP8]], [[TMP5]]
35+
; CHECK-NEXT: [[TMP10:%.*]] = add <2 x i32> [[TMP9]], [[TMP3]]
36+
; CHECK-NEXT: [[TMP11:%.*]] = add <2 x i32> [[TMP10]], [[TMP1]]
37+
; CHECK-NEXT: [[TMP12:%.*]] = add <2 x i32> [[TMP11]], undef
38+
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i32> [[TMP12]], i32 0
39+
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i32> [[TMP11]], i32 0
40+
; CHECK-NEXT: [[I11:%.*]] = add i32 [[TMP14]], [[TMP13]]
41+
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i32> [[TMP12]], i32 1
42+
; CHECK-NEXT: [[I18:%.*]] = add i32 [[TMP15]], [[I11]]
43+
; CHECK-NEXT: [[I19:%.*]] = add i32 [[TMP15]], [[I18]]
44+
; CHECK-NEXT: [[I20:%.*]] = add i32 undef, [[I19]]
45+
; CHECK-NEXT: [[I21:%.*]] = add i32 undef, [[I20]]
46+
; CHECK-NEXT: [[I22:%.*]] = add i32 undef, [[I21]]
47+
; CHECK-NEXT: [[I23:%.*]] = add i32 undef, [[I22]]
48+
; CHECK-NEXT: br label [[IF_END]]
49+
; CHECK: if.end:
50+
; CHECK-NEXT: [[R:%.*]] = phi i32 [ [[I23]], [[FOR_COND_PREHEADER]] ], [ undef, [[ENTRY:%.*]] ]
51+
; CHECK-NEXT: ret void
52+
;
53+
entry:
54+
br i1 undef, label %if.end, label %for.cond.preheader
55+
56+
for.cond.preheader: ; preds = %entry
57+
%i = getelementptr inbounds [100 x i32], [100 x i32]* undef, i64 0, i64 2
58+
%i1 = getelementptr inbounds [100 x i32], [100 x i32]* undef, i64 0, i64 3
59+
%i2 = getelementptr inbounds [100 x i32], [100 x i32]* undef, i64 0, i64 4
60+
%i3 = getelementptr inbounds [100 x i32], [100 x i32]* undef, i64 0, i64 5
61+
%i4 = getelementptr inbounds [100 x i32], [100 x i32]* undef, i64 0, i64 6
62+
%ld0 = load i32, i32* %i, align 8
63+
%ld1 = load i32, i32* %i1, align 4
64+
%ld2 = load i32, i32* %i2, align 16
65+
%ld3 = load i32, i32* %i3, align 4
66+
%i5 = add i32 undef, undef
67+
%i6 = add i32 %i5, %ld3
68+
%i7 = add i32 %i6, %ld2
69+
%i8 = add i32 %i7, %ld1
70+
%i9 = add i32 %i8, %ld0
71+
%i10 = add i32 %i9, undef
72+
%i11 = add i32 %i9, %i10
73+
%ld4 = load i32, i32* %i1, align 4
74+
%ld5 = load i32, i32* %i2, align 16
75+
%ld6 = load i32, i32* %i3, align 4
76+
%ld7 = load i32, i32* %i4, align 8
77+
%i12 = add i32 undef, undef
78+
%i13 = add i32 %i12, %ld7
79+
%i14 = add i32 %i13, %ld6
80+
%i15 = add i32 %i14, %ld5
81+
%i16 = add i32 %i15, %ld4
82+
%i17 = add i32 %i16, undef
83+
%i18 = add i32 %i17, %i11
84+
%i19 = add i32 %i17, %i18
85+
%i20 = add i32 undef, %i19
86+
%i21 = add i32 undef, %i20
87+
%i22 = add i32 undef, %i21
88+
%i23 = add i32 undef, %i22
89+
br label %if.end
90+
91+
if.end: ; preds = %for.cond.preheader, %entry
92+
%r = phi i32 [ %i23, %for.cond.preheader ], [ undef, %entry ]
93+
ret void
94+
}

0 commit comments

Comments
 (0)