Skip to content

Commit 3e6d7c6

Browse files
committed
[SLP]Improve tryToGatherExtractElements by using per-register analysis.
Currently tryToGatherExtractElements function analyzes the whole vector, regrdless number of actual registers, used in this vector. It may prevent some optimizations, because per-register analysis may allow to simplify the final code by reusing more already emitted vectors and better shuffles. Differential Revision: https://reviews.llvm.org/D148855
1 parent 6c320b4 commit 3e6d7c6

File tree

8 files changed

+879
-426
lines changed

8 files changed

+879
-426
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 311 additions & 224 deletions
Large diffs are not rendered by default.

llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll

Lines changed: 55 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -75,64 +75,47 @@ define void @dist_vec(ptr nocapture noundef readonly %pA, ptr nocapture noundef
7575
; CHECK-NEXT: [[TMP4TT_0_LCSSA:%.*]] = phi <2 x i64> [ zeroinitializer, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_END_LOOPEXIT]] ]
7676
; CHECK-NEXT: [[PB_ADDR_0_LCSSA:%.*]] = phi ptr [ [[PB]], [[ENTRY]] ], [ [[SCEVGEP311]], [[WHILE_END_LOOPEXIT]] ]
7777
; CHECK-NEXT: [[PA_ADDR_0_LCSSA:%.*]] = phi ptr [ [[PA]], [[ENTRY]] ], [ [[SCEVGEP]], [[WHILE_END_LOOPEXIT]] ]
78-
; CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP4TT_0_LCSSA]], i64 0
79-
; CHECK-NEXT: [[VGETQ_LANE45:%.*]] = extractelement <2 x i64> [[TMP4TT_0_LCSSA]], i64 1
80-
; CHECK-NEXT: [[ADD:%.*]] = add i64 [[VGETQ_LANE]], [[VGETQ_LANE45]]
81-
; CHECK-NEXT: [[CONV48:%.*]] = trunc i64 [[ADD]] to i32
82-
; CHECK-NEXT: [[VGETQ_LANE51:%.*]] = extractelement <2 x i64> [[TMP4FF_0_LCSSA]], i64 0
83-
; CHECK-NEXT: [[VGETQ_LANE55:%.*]] = extractelement <2 x i64> [[TMP4FF_0_LCSSA]], i64 1
84-
; CHECK-NEXT: [[ADD57:%.*]] = add i64 [[VGETQ_LANE51]], [[VGETQ_LANE55]]
85-
; CHECK-NEXT: [[CONV60:%.*]] = trunc i64 [[ADD57]] to i32
86-
; CHECK-NEXT: [[VGETQ_LANE63:%.*]] = extractelement <2 x i64> [[TMP4TF_0_LCSSA]], i64 0
87-
; CHECK-NEXT: [[VGETQ_LANE67:%.*]] = extractelement <2 x i64> [[TMP4TF_0_LCSSA]], i64 1
88-
; CHECK-NEXT: [[ADD69:%.*]] = add i64 [[VGETQ_LANE63]], [[VGETQ_LANE67]]
89-
; CHECK-NEXT: [[CONV72:%.*]] = trunc i64 [[ADD69]] to i32
90-
; CHECK-NEXT: [[VGETQ_LANE75:%.*]] = extractelement <2 x i64> [[TMP4FT_0_LCSSA]], i64 0
91-
; CHECK-NEXT: [[VGETQ_LANE79:%.*]] = extractelement <2 x i64> [[TMP4FT_0_LCSSA]], i64 1
92-
; CHECK-NEXT: [[ADD81:%.*]] = add i64 [[VGETQ_LANE75]], [[VGETQ_LANE79]]
93-
; CHECK-NEXT: [[CONV84:%.*]] = trunc i64 [[ADD81]] to i32
78+
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP4FT_0_LCSSA]], <2 x i64> [[TMP4TF_0_LCSSA]], <2 x i32> <i32 0, i32 2>
79+
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i64> [[TMP4TT_0_LCSSA]], <2 x i64> [[TMP4FF_0_LCSSA]], <2 x i32> <i32 0, i32 2>
80+
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
81+
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i64> [[TMP4FT_0_LCSSA]], <2 x i64> [[TMP4TF_0_LCSSA]], <2 x i32> <i32 1, i32 3>
82+
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i64> [[TMP4TT_0_LCSSA]], <2 x i64> [[TMP4FF_0_LCSSA]], <2 x i32> <i32 1, i32 3>
83+
; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
84+
; CHECK-NEXT: [[TMP16:%.*]] = add <4 x i64> [[TMP12]], [[TMP15]]
85+
; CHECK-NEXT: [[TMP17:%.*]] = trunc <4 x i64> [[TMP16]] to <4 x i32>
9486
; CHECK-NEXT: [[AND:%.*]] = and i32 [[NUMBEROFBOOLS]], 127
9587
; CHECK-NEXT: [[CMP86284:%.*]] = icmp ugt i32 [[AND]], 31
9688
; CHECK-NEXT: br i1 [[CMP86284]], label [[WHILE_BODY88:%.*]], label [[WHILE_END122:%.*]]
9789
; CHECK: while.body88:
9890
; CHECK-NEXT: [[PA_ADDR_1291:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_END121:%.*]] ], [ [[PA_ADDR_0_LCSSA]], [[WHILE_END]] ]
9991
; CHECK-NEXT: [[PB_ADDR_1290:%.*]] = phi ptr [ [[INCDEC_PTR89:%.*]], [[WHILE_END121]] ], [ [[PB_ADDR_0_LCSSA]], [[WHILE_END]] ]
100-
; CHECK-NEXT: [[_CTT_0289:%.*]] = phi i32 [ [[ADD99:%.*]], [[WHILE_END121]] ], [ [[CONV48]], [[WHILE_END]] ]
101-
; CHECK-NEXT: [[_CFF_0288:%.*]] = phi i32 [ [[ADD106:%.*]], [[WHILE_END121]] ], [ [[CONV60]], [[WHILE_END]] ]
102-
; CHECK-NEXT: [[_CTF_0287:%.*]] = phi i32 [ [[ADD113:%.*]], [[WHILE_END121]] ], [ [[CONV72]], [[WHILE_END]] ]
103-
; CHECK-NEXT: [[_CFT_0286:%.*]] = phi i32 [ [[ADD120:%.*]], [[WHILE_END121]] ], [ [[CONV84]], [[WHILE_END]] ]
10492
; CHECK-NEXT: [[NBBOOLBLOCK_1285:%.*]] = phi i32 [ [[SUB:%.*]], [[WHILE_END121]] ], [ [[AND]], [[WHILE_END]] ]
105-
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[PA_ADDR_1291]], align 4
106-
; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[PB_ADDR_1290]], align 4
93+
; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP34:%.*]], [[WHILE_END121]] ], [ [[TMP17]], [[WHILE_END]] ]
94+
; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[PA_ADDR_1291]], align 4
95+
; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[PB_ADDR_1290]], align 4
10796
; CHECK-NEXT: br label [[WHILE_BODY93:%.*]]
10897
; CHECK: while.body93:
109-
; CHECK-NEXT: [[_CTT_1283:%.*]] = phi i32 [ [[_CTT_0289]], [[WHILE_BODY88]] ], [ [[ADD99]], [[WHILE_BODY93]] ]
110-
; CHECK-NEXT: [[_CFF_1282:%.*]] = phi i32 [ [[_CFF_0288]], [[WHILE_BODY88]] ], [ [[ADD106]], [[WHILE_BODY93]] ]
111-
; CHECK-NEXT: [[_CTF_1281:%.*]] = phi i32 [ [[_CTF_0287]], [[WHILE_BODY88]] ], [ [[ADD113]], [[WHILE_BODY93]] ]
112-
; CHECK-NEXT: [[_CFT_1280:%.*]] = phi i32 [ [[_CFT_0286]], [[WHILE_BODY88]] ], [ [[ADD120]], [[WHILE_BODY93]] ]
113-
; CHECK-NEXT: [[A_0279:%.*]] = phi i32 [ [[TMP10]], [[WHILE_BODY88]] ], [ [[SHR96:%.*]], [[WHILE_BODY93]] ]
114-
; CHECK-NEXT: [[B_0278:%.*]] = phi i32 [ [[TMP11]], [[WHILE_BODY88]] ], [ [[SHR97:%.*]], [[WHILE_BODY93]] ]
98+
; CHECK-NEXT: [[A_0279:%.*]] = phi i32 [ [[TMP19]], [[WHILE_BODY88]] ], [ [[SHR96:%.*]], [[WHILE_BODY93]] ]
99+
; CHECK-NEXT: [[B_0278:%.*]] = phi i32 [ [[TMP20]], [[WHILE_BODY88]] ], [ [[SHR97:%.*]], [[WHILE_BODY93]] ]
115100
; CHECK-NEXT: [[SHIFT_0277:%.*]] = phi i32 [ 0, [[WHILE_BODY88]] ], [ [[INC:%.*]], [[WHILE_BODY93]] ]
101+
; CHECK-NEXT: [[TMP21:%.*]] = phi <4 x i32> [ [[TMP18]], [[WHILE_BODY88]] ], [ [[TMP34]], [[WHILE_BODY93]] ]
116102
; CHECK-NEXT: [[AND94:%.*]] = and i32 [[A_0279]], 1
117103
; CHECK-NEXT: [[AND95:%.*]] = and i32 [[B_0278]], 1
118104
; CHECK-NEXT: [[SHR96]] = lshr i32 [[A_0279]], 1
119105
; CHECK-NEXT: [[SHR97]] = lshr i32 [[B_0278]], 1
120-
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[AND94]], 0
121-
; CHECK-NEXT: [[TOBOOL98:%.*]] = icmp ne i32 [[AND95]], 0
122-
; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TOBOOL]], i1 [[TOBOOL98]], i1 false
123-
; CHECK-NEXT: [[LAND_EXT:%.*]] = zext i1 [[TMP12]] to i32
124-
; CHECK-NEXT: [[ADD99]] = add i32 [[_CTT_1283]], [[LAND_EXT]]
125-
; CHECK-NEXT: [[TOBOOL100:%.*]] = icmp eq i32 [[AND94]], 0
126-
; CHECK-NEXT: [[TOBOOL103:%.*]] = icmp eq i32 [[AND95]], 0
127-
; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TOBOOL100]], i1 [[TOBOOL103]], i1 false
128-
; CHECK-NEXT: [[LAND_EXT105:%.*]] = zext i1 [[TMP13]] to i32
129-
; CHECK-NEXT: [[ADD106]] = add i32 [[_CFF_1282]], [[LAND_EXT105]]
130-
; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TOBOOL]], i1 [[TOBOOL103]], i1 false
131-
; CHECK-NEXT: [[LAND_EXT112:%.*]] = zext i1 [[TMP14]] to i32
132-
; CHECK-NEXT: [[ADD113]] = add i32 [[_CTF_1281]], [[LAND_EXT112]]
133-
; CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TOBOOL100]], i1 [[TOBOOL98]], i1 false
134-
; CHECK-NEXT: [[LAND_EXT119:%.*]] = zext i1 [[TMP15]] to i32
135-
; CHECK-NEXT: [[ADD120]] = add i32 [[_CFT_1280]], [[LAND_EXT119]]
106+
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x i32> poison, i32 [[AND94]], i32 0
107+
; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <2 x i32> [[TMP22]], <2 x i32> poison, <2 x i32> zeroinitializer
108+
; CHECK-NEXT: [[TMP24:%.*]] = icmp eq <2 x i32> [[TMP23]], zeroinitializer
109+
; CHECK-NEXT: [[TMP25:%.*]] = icmp ne <2 x i32> [[TMP23]], zeroinitializer
110+
; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <2 x i1> [[TMP24]], <2 x i1> [[TMP25]], <4 x i32> <i32 0, i32 3, i32 3, i32 0>
111+
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <2 x i32> poison, i32 [[AND95]], i32 0
112+
; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <2 x i32> [[TMP27]], <2 x i32> poison, <2 x i32> zeroinitializer
113+
; CHECK-NEXT: [[TMP29:%.*]] = icmp ne <2 x i32> [[TMP28]], zeroinitializer
114+
; CHECK-NEXT: [[TMP30:%.*]] = icmp eq <2 x i32> [[TMP28]], zeroinitializer
115+
; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <2 x i1> [[TMP29]], <2 x i1> [[TMP30]], <4 x i32> <i32 0, i32 3, i32 0, i32 3>
116+
; CHECK-NEXT: [[TMP32:%.*]] = select <4 x i1> [[TMP26]], <4 x i1> [[TMP31]], <4 x i1> zeroinitializer
117+
; CHECK-NEXT: [[TMP33:%.*]] = zext <4 x i1> [[TMP32]] to <4 x i32>
118+
; CHECK-NEXT: [[TMP34]] = add <4 x i32> [[TMP21]], [[TMP33]]
136119
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[SHIFT_0277]], 1
137120
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], 32
138121
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[WHILE_END121]], label [[WHILE_BODY93]]
@@ -144,61 +127,53 @@ define void @dist_vec(ptr nocapture noundef readonly %pA, ptr nocapture noundef
144127
; CHECK-NEXT: br i1 [[CMP86]], label [[WHILE_BODY88]], label [[WHILE_END122]]
145128
; CHECK: while.end122:
146129
; CHECK-NEXT: [[NBBOOLBLOCK_1_LCSSA:%.*]] = phi i32 [ [[AND]], [[WHILE_END]] ], [ [[SUB]], [[WHILE_END121]] ]
147-
; CHECK-NEXT: [[_CFT_0_LCSSA:%.*]] = phi i32 [ [[CONV84]], [[WHILE_END]] ], [ [[ADD120]], [[WHILE_END121]] ]
148-
; CHECK-NEXT: [[_CTF_0_LCSSA:%.*]] = phi i32 [ [[CONV72]], [[WHILE_END]] ], [ [[ADD113]], [[WHILE_END121]] ]
149-
; CHECK-NEXT: [[_CFF_0_LCSSA:%.*]] = phi i32 [ [[CONV60]], [[WHILE_END]] ], [ [[ADD106]], [[WHILE_END121]] ]
150-
; CHECK-NEXT: [[_CTT_0_LCSSA:%.*]] = phi i32 [ [[CONV48]], [[WHILE_END]] ], [ [[ADD99]], [[WHILE_END121]] ]
151130
; CHECK-NEXT: [[PB_ADDR_1_LCSSA:%.*]] = phi ptr [ [[PB_ADDR_0_LCSSA]], [[WHILE_END]] ], [ [[INCDEC_PTR89]], [[WHILE_END121]] ]
152131
; CHECK-NEXT: [[PA_ADDR_1_LCSSA:%.*]] = phi ptr [ [[PA_ADDR_0_LCSSA]], [[WHILE_END]] ], [ [[INCDEC_PTR]], [[WHILE_END121]] ]
132+
; CHECK-NEXT: [[TMP35:%.*]] = phi <4 x i32> [ [[TMP17]], [[WHILE_END]] ], [ [[TMP34]], [[WHILE_END121]] ]
153133
; CHECK-NEXT: [[CMP130_NOT299:%.*]] = icmp eq i32 [[NBBOOLBLOCK_1_LCSSA]], 0
154134
; CHECK-NEXT: br i1 [[CMP130_NOT299]], label [[WHILE_END166:%.*]], label [[WHILE_BODY132_PREHEADER:%.*]]
155135
; CHECK: while.body132.preheader:
156-
; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[PB_ADDR_1_LCSSA]], align 4
136+
; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[PB_ADDR_1_LCSSA]], align 4
157137
; CHECK-NEXT: [[SUB125:%.*]] = sub nuw nsw i32 32, [[NBBOOLBLOCK_1_LCSSA]]
158-
; CHECK-NEXT: [[SHR128:%.*]] = lshr i32 [[TMP16]], [[SUB125]]
159-
; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[PA_ADDR_1_LCSSA]], align 4
160-
; CHECK-NEXT: [[SHR126:%.*]] = lshr i32 [[TMP17]], [[SUB125]]
138+
; CHECK-NEXT: [[SHR128:%.*]] = lshr i32 [[TMP36]], [[SUB125]]
139+
; CHECK-NEXT: [[TMP37:%.*]] = load i32, ptr [[PA_ADDR_1_LCSSA]], align 4
140+
; CHECK-NEXT: [[SHR126:%.*]] = lshr i32 [[TMP37]], [[SUB125]]
161141
; CHECK-NEXT: br label [[WHILE_BODY132:%.*]]
162142
; CHECK: while.body132:
163-
; CHECK-NEXT: [[_CTT_2306:%.*]] = phi i32 [ [[ADD142:%.*]], [[WHILE_BODY132]] ], [ [[_CTT_0_LCSSA]], [[WHILE_BODY132_PREHEADER]] ]
164-
; CHECK-NEXT: [[_CFF_2305:%.*]] = phi i32 [ [[ADD150:%.*]], [[WHILE_BODY132]] ], [ [[_CFF_0_LCSSA]], [[WHILE_BODY132_PREHEADER]] ]
165-
; CHECK-NEXT: [[_CTF_2304:%.*]] = phi i32 [ [[ADD157:%.*]], [[WHILE_BODY132]] ], [ [[_CTF_0_LCSSA]], [[WHILE_BODY132_PREHEADER]] ]
166-
; CHECK-NEXT: [[_CFT_2303:%.*]] = phi i32 [ [[ADD164:%.*]], [[WHILE_BODY132]] ], [ [[_CFT_0_LCSSA]], [[WHILE_BODY132_PREHEADER]] ]
167143
; CHECK-NEXT: [[NBBOOLBLOCK_2302:%.*]] = phi i32 [ [[DEC165:%.*]], [[WHILE_BODY132]] ], [ [[NBBOOLBLOCK_1_LCSSA]], [[WHILE_BODY132_PREHEADER]] ]
168144
; CHECK-NEXT: [[A_1301:%.*]] = phi i32 [ [[SHR135:%.*]], [[WHILE_BODY132]] ], [ [[SHR126]], [[WHILE_BODY132_PREHEADER]] ]
169145
; CHECK-NEXT: [[B_1300:%.*]] = phi i32 [ [[SHR136:%.*]], [[WHILE_BODY132]] ], [ [[SHR128]], [[WHILE_BODY132_PREHEADER]] ]
146+
; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP51:%.*]], [[WHILE_BODY132]] ], [ [[TMP35]], [[WHILE_BODY132_PREHEADER]] ]
170147
; CHECK-NEXT: [[AND133:%.*]] = and i32 [[A_1301]], 1
171148
; CHECK-NEXT: [[AND134:%.*]] = and i32 [[B_1300]], 1
172149
; CHECK-NEXT: [[SHR135]] = lshr i32 [[A_1301]], 1
173150
; CHECK-NEXT: [[SHR136]] = lshr i32 [[B_1300]], 1
174-
; CHECK-NEXT: [[TOBOOL137:%.*]] = icmp ne i32 [[AND133]], 0
175-
; CHECK-NEXT: [[TOBOOL139:%.*]] = icmp ne i32 [[AND134]], 0
176-
; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TOBOOL137]], i1 [[TOBOOL139]], i1 false
177-
; CHECK-NEXT: [[LAND_EXT141:%.*]] = zext i1 [[TMP18]] to i32
178-
; CHECK-NEXT: [[ADD142]] = add i32 [[_CTT_2306]], [[LAND_EXT141]]
179-
; CHECK-NEXT: [[TOBOOL144:%.*]] = icmp eq i32 [[AND133]], 0
180-
; CHECK-NEXT: [[TOBOOL147:%.*]] = icmp eq i32 [[AND134]], 0
181-
; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TOBOOL144]], i1 [[TOBOOL147]], i1 false
182-
; CHECK-NEXT: [[LAND_EXT149:%.*]] = zext i1 [[TMP19]] to i32
183-
; CHECK-NEXT: [[ADD150]] = add i32 [[_CFF_2305]], [[LAND_EXT149]]
184-
; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TOBOOL137]], i1 [[TOBOOL147]], i1 false
185-
; CHECK-NEXT: [[LAND_EXT156:%.*]] = zext i1 [[TMP20]] to i32
186-
; CHECK-NEXT: [[ADD157]] = add i32 [[_CTF_2304]], [[LAND_EXT156]]
187-
; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TOBOOL144]], i1 [[TOBOOL139]], i1 false
188-
; CHECK-NEXT: [[LAND_EXT163:%.*]] = zext i1 [[TMP21]] to i32
189-
; CHECK-NEXT: [[ADD164]] = add i32 [[_CFT_2303]], [[LAND_EXT163]]
151+
; CHECK-NEXT: [[TMP39:%.*]] = insertelement <2 x i32> poison, i32 [[AND133]], i32 0
152+
; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <2 x i32> [[TMP39]], <2 x i32> poison, <2 x i32> zeroinitializer
153+
; CHECK-NEXT: [[TMP41:%.*]] = icmp eq <2 x i32> [[TMP40]], zeroinitializer
154+
; CHECK-NEXT: [[TMP42:%.*]] = icmp ne <2 x i32> [[TMP40]], zeroinitializer
155+
; CHECK-NEXT: [[TMP43:%.*]] = shufflevector <2 x i1> [[TMP41]], <2 x i1> [[TMP42]], <4 x i32> <i32 0, i32 3, i32 3, i32 0>
156+
; CHECK-NEXT: [[TMP44:%.*]] = insertelement <2 x i32> poison, i32 [[AND134]], i32 0
157+
; CHECK-NEXT: [[TMP45:%.*]] = shufflevector <2 x i32> [[TMP44]], <2 x i32> poison, <2 x i32> zeroinitializer
158+
; CHECK-NEXT: [[TMP46:%.*]] = icmp ne <2 x i32> [[TMP45]], zeroinitializer
159+
; CHECK-NEXT: [[TMP47:%.*]] = icmp eq <2 x i32> [[TMP45]], zeroinitializer
160+
; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <2 x i1> [[TMP46]], <2 x i1> [[TMP47]], <4 x i32> <i32 0, i32 3, i32 0, i32 3>
161+
; CHECK-NEXT: [[TMP49:%.*]] = select <4 x i1> [[TMP43]], <4 x i1> [[TMP48]], <4 x i1> zeroinitializer
162+
; CHECK-NEXT: [[TMP50:%.*]] = zext <4 x i1> [[TMP49]] to <4 x i32>
163+
; CHECK-NEXT: [[TMP51]] = add <4 x i32> [[TMP38]], [[TMP50]]
190164
; CHECK-NEXT: [[DEC165]] = add nsw i32 [[NBBOOLBLOCK_2302]], -1
191165
; CHECK-NEXT: [[CMP130_NOT:%.*]] = icmp eq i32 [[DEC165]], 0
192166
; CHECK-NEXT: br i1 [[CMP130_NOT]], label [[WHILE_END166]], label [[WHILE_BODY132]]
193167
; CHECK: while.end166:
194-
; CHECK-NEXT: [[_CFT_2_LCSSA:%.*]] = phi i32 [ [[_CFT_0_LCSSA]], [[WHILE_END122]] ], [ [[ADD164]], [[WHILE_BODY132]] ]
195-
; CHECK-NEXT: [[_CTF_2_LCSSA:%.*]] = phi i32 [ [[_CTF_0_LCSSA]], [[WHILE_END122]] ], [ [[ADD157]], [[WHILE_BODY132]] ]
196-
; CHECK-NEXT: [[_CFF_2_LCSSA:%.*]] = phi i32 [ [[_CFF_0_LCSSA]], [[WHILE_END122]] ], [ [[ADD150]], [[WHILE_BODY132]] ]
197-
; CHECK-NEXT: [[_CTT_2_LCSSA:%.*]] = phi i32 [ [[_CTT_0_LCSSA]], [[WHILE_END122]] ], [ [[ADD142]], [[WHILE_BODY132]] ]
198-
; CHECK-NEXT: store i32 [[_CTT_2_LCSSA]], ptr [[CTT:%.*]], align 4
199-
; CHECK-NEXT: store i32 [[_CFF_2_LCSSA]], ptr [[CFF:%.*]], align 4
200-
; CHECK-NEXT: store i32 [[_CTF_2_LCSSA]], ptr [[CTF:%.*]], align 4
201-
; CHECK-NEXT: store i32 [[_CFT_2_LCSSA]], ptr [[CFT:%.*]], align 4
168+
; CHECK-NEXT: [[TMP52:%.*]] = phi <4 x i32> [ [[TMP35]], [[WHILE_END122]] ], [ [[TMP51]], [[WHILE_BODY132]] ]
169+
; CHECK-NEXT: [[TMP53:%.*]] = extractelement <4 x i32> [[TMP52]], i32 2
170+
; CHECK-NEXT: store i32 [[TMP53]], ptr [[CTT:%.*]], align 4
171+
; CHECK-NEXT: [[TMP54:%.*]] = extractelement <4 x i32> [[TMP52]], i32 3
172+
; CHECK-NEXT: store i32 [[TMP54]], ptr [[CFF:%.*]], align 4
173+
; CHECK-NEXT: [[TMP55:%.*]] = extractelement <4 x i32> [[TMP52]], i32 1
174+
; CHECK-NEXT: store i32 [[TMP55]], ptr [[CTF:%.*]], align 4
175+
; CHECK-NEXT: [[TMP56:%.*]] = extractelement <4 x i32> [[TMP52]], i32 0
176+
; CHECK-NEXT: store i32 [[TMP56]], ptr [[CFT:%.*]], align 4
202177
; CHECK-NEXT: ret void
203178
;
204179
entry:

llvm/test/Transforms/SLPVectorizer/X86/crash_clear_undefs.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16
99
; YAML-NEXT: Function: foo
1010
; YAML-NEXT: Args:
1111
; YAML-NEXT: - String: 'SLP vectorized with cost '
12-
; YAML-NEXT: - Cost: '-3'
12+
; YAML-NEXT: - Cost: '-4'
1313
; YAML-NEXT: - String: ' and with tree size '
1414
; YAML-NEXT: - TreeSize: '10'
1515
; YAML-NEXT: ...

0 commit comments

Comments
 (0)