Skip to content

Commit 5739a22

Browse files
authored
[VPlan] Also duplicated scalar-steps when it enables sinking scalars. (#136021)
Extend sinking logic to duplicate scalar steps recipe if it enables sinking, that is if all users in a destination block require all lanes. This should be the last step before removing legacy sinkScalarOperands. PR: #136021
1 parent f02b285 commit 5739a22

18 files changed

+295
-279
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -164,18 +164,27 @@ static bool sinkScalarOperands(VPlan &Plan) {
164164
if (UI->getParent() == SinkTo)
165165
return true;
166166
NeedsDuplicating = UI->onlyFirstLaneUsed(SinkCandidate);
167-
// We only know how to duplicate VPRecipeRecipes for now.
168-
return NeedsDuplicating && isa<VPReplicateRecipe>(SinkCandidate);
167+
// We only know how to duplicate VPReplicateRecipes and
168+
// VPScalarIVStepsRecipes for now.
169+
return NeedsDuplicating &&
170+
isa<VPReplicateRecipe, VPScalarIVStepsRecipe>(SinkCandidate);
169171
};
170172
if (!all_of(SinkCandidate->users(), CanSinkWithUser))
171173
continue;
172174

173175
if (NeedsDuplicating) {
174176
if (ScalarVFOnly)
175177
continue;
176-
Instruction *I = SinkCandidate->getUnderlyingInstr();
177-
auto *Clone = new VPReplicateRecipe(I, SinkCandidate->operands(), true);
178-
// TODO: add ".cloned" suffix to name of Clone's VPValue.
178+
VPSingleDefRecipe *Clone;
179+
if (isa<VPReplicateRecipe>(SinkCandidate)) {
180+
// TODO: Handle converting to uniform recipes as separate transform,
181+
// then cloning should be sufficient here.
182+
Instruction *I = SinkCandidate->getUnderlyingInstr();
183+
Clone = new VPReplicateRecipe(I, SinkCandidate->operands(), true);
184+
// TODO: add ".cloned" suffix to name of Clone's VPValue.
185+
} else {
186+
Clone = SinkCandidate->clone();
187+
}
179188

180189
Clone->insertBefore(SinkCandidate);
181190
SinkCandidate->replaceUsesWithIf(Clone, [SinkTo](VPUser &U, unsigned) {

llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -211,8 +211,7 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
211211
; CHECK-NEXT: [[TMP5:%.*]] = xor <16 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
212212
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
213213
; CHECK: [[VECTOR_BODY]]:
214-
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE30:.*]] ]
215-
; CHECK-NEXT: [[IV:%.*]] = add i32 [[INDEX]], 0
214+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE30:.*]] ]
216215
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[IV]]
217216
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[GEP_SRC]], i32 0
218217
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
@@ -224,15 +223,16 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
224223
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <16 x i1> [[TMP7]], i32 0
225224
; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
226225
; CHECK: [[PRED_STORE_IF]]:
227-
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[IV]]
226+
; CHECK-NEXT: [[TMP72:%.*]] = add i32 [[IV]], 0
227+
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP72]]
228228
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 0
229229
; CHECK-NEXT: store i8 [[TMP10]], ptr [[TMP9]], align 1
230230
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
231231
; CHECK: [[PRED_STORE_CONTINUE]]:
232232
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <16 x i1> [[TMP7]], i32 1
233233
; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
234234
; CHECK: [[PRED_STORE_IF1]]:
235-
; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[INDEX]], 1
235+
; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[IV]], 1
236236
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP12]]
237237
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 1
238238
; CHECK-NEXT: store i8 [[TMP14]], ptr [[TMP13]], align 1
@@ -241,7 +241,7 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
241241
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i1> [[TMP7]], i32 2
242242
; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
243243
; CHECK: [[PRED_STORE_IF3]]:
244-
; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[INDEX]], 2
244+
; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[IV]], 2
245245
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP16]]
246246
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 2
247247
; CHECK-NEXT: store i8 [[TMP18]], ptr [[TMP17]], align 1
@@ -250,7 +250,7 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
250250
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i1> [[TMP7]], i32 3
251251
; CHECK-NEXT: br i1 [[TMP19]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
252252
; CHECK: [[PRED_STORE_IF5]]:
253-
; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[INDEX]], 3
253+
; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[IV]], 3
254254
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP20]]
255255
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 3
256256
; CHECK-NEXT: store i8 [[TMP22]], ptr [[TMP21]], align 1
@@ -259,7 +259,7 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
259259
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i1> [[TMP7]], i32 4
260260
; CHECK-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
261261
; CHECK: [[PRED_STORE_IF7]]:
262-
; CHECK-NEXT: [[TMP24:%.*]] = add i32 [[INDEX]], 4
262+
; CHECK-NEXT: [[TMP24:%.*]] = add i32 [[IV]], 4
263263
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP24]]
264264
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 4
265265
; CHECK-NEXT: store i8 [[TMP26]], ptr [[TMP25]], align 1
@@ -268,7 +268,7 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
268268
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i1> [[TMP7]], i32 5
269269
; CHECK-NEXT: br i1 [[TMP27]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
270270
; CHECK: [[PRED_STORE_IF9]]:
271-
; CHECK-NEXT: [[TMP28:%.*]] = add i32 [[INDEX]], 5
271+
; CHECK-NEXT: [[TMP28:%.*]] = add i32 [[IV]], 5
272272
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP28]]
273273
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 5
274274
; CHECK-NEXT: store i8 [[TMP30]], ptr [[TMP29]], align 1
@@ -277,7 +277,7 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
277277
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i1> [[TMP7]], i32 6
278278
; CHECK-NEXT: br i1 [[TMP31]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
279279
; CHECK: [[PRED_STORE_IF11]]:
280-
; CHECK-NEXT: [[TMP32:%.*]] = add i32 [[INDEX]], 6
280+
; CHECK-NEXT: [[TMP32:%.*]] = add i32 [[IV]], 6
281281
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP32]]
282282
; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 6
283283
; CHECK-NEXT: store i8 [[TMP34]], ptr [[TMP33]], align 1
@@ -286,7 +286,7 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
286286
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i1> [[TMP7]], i32 7
287287
; CHECK-NEXT: br i1 [[TMP35]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
288288
; CHECK: [[PRED_STORE_IF13]]:
289-
; CHECK-NEXT: [[TMP36:%.*]] = add i32 [[INDEX]], 7
289+
; CHECK-NEXT: [[TMP36:%.*]] = add i32 [[IV]], 7
290290
; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP36]]
291291
; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 7
292292
; CHECK-NEXT: store i8 [[TMP38]], ptr [[TMP37]], align 1
@@ -295,7 +295,7 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
295295
; CHECK-NEXT: [[TMP39:%.*]] = extractelement <16 x i1> [[TMP7]], i32 8
296296
; CHECK-NEXT: br i1 [[TMP39]], label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
297297
; CHECK: [[PRED_STORE_IF15]]:
298-
; CHECK-NEXT: [[TMP40:%.*]] = add i32 [[INDEX]], 8
298+
; CHECK-NEXT: [[TMP40:%.*]] = add i32 [[IV]], 8
299299
; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP40]]
300300
; CHECK-NEXT: [[TMP42:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 8
301301
; CHECK-NEXT: store i8 [[TMP42]], ptr [[TMP41]], align 1
@@ -304,7 +304,7 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
304304
; CHECK-NEXT: [[TMP43:%.*]] = extractelement <16 x i1> [[TMP7]], i32 9
305305
; CHECK-NEXT: br i1 [[TMP43]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
306306
; CHECK: [[PRED_STORE_IF17]]:
307-
; CHECK-NEXT: [[TMP44:%.*]] = add i32 [[INDEX]], 9
307+
; CHECK-NEXT: [[TMP44:%.*]] = add i32 [[IV]], 9
308308
; CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP44]]
309309
; CHECK-NEXT: [[TMP46:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 9
310310
; CHECK-NEXT: store i8 [[TMP46]], ptr [[TMP45]], align 1
@@ -313,7 +313,7 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
313313
; CHECK-NEXT: [[TMP47:%.*]] = extractelement <16 x i1> [[TMP7]], i32 10
314314
; CHECK-NEXT: br i1 [[TMP47]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
315315
; CHECK: [[PRED_STORE_IF19]]:
316-
; CHECK-NEXT: [[TMP48:%.*]] = add i32 [[INDEX]], 10
316+
; CHECK-NEXT: [[TMP48:%.*]] = add i32 [[IV]], 10
317317
; CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP48]]
318318
; CHECK-NEXT: [[TMP50:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 10
319319
; CHECK-NEXT: store i8 [[TMP50]], ptr [[TMP49]], align 1
@@ -322,7 +322,7 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
322322
; CHECK-NEXT: [[TMP51:%.*]] = extractelement <16 x i1> [[TMP7]], i32 11
323323
; CHECK-NEXT: br i1 [[TMP51]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
324324
; CHECK: [[PRED_STORE_IF21]]:
325-
; CHECK-NEXT: [[TMP52:%.*]] = add i32 [[INDEX]], 11
325+
; CHECK-NEXT: [[TMP52:%.*]] = add i32 [[IV]], 11
326326
; CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP52]]
327327
; CHECK-NEXT: [[TMP54:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 11
328328
; CHECK-NEXT: store i8 [[TMP54]], ptr [[TMP53]], align 1
@@ -331,7 +331,7 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
331331
; CHECK-NEXT: [[TMP55:%.*]] = extractelement <16 x i1> [[TMP7]], i32 12
332332
; CHECK-NEXT: br i1 [[TMP55]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
333333
; CHECK: [[PRED_STORE_IF23]]:
334-
; CHECK-NEXT: [[TMP56:%.*]] = add i32 [[INDEX]], 12
334+
; CHECK-NEXT: [[TMP56:%.*]] = add i32 [[IV]], 12
335335
; CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP56]]
336336
; CHECK-NEXT: [[TMP58:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 12
337337
; CHECK-NEXT: store i8 [[TMP58]], ptr [[TMP57]], align 1
@@ -340,7 +340,7 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
340340
; CHECK-NEXT: [[TMP59:%.*]] = extractelement <16 x i1> [[TMP7]], i32 13
341341
; CHECK-NEXT: br i1 [[TMP59]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
342342
; CHECK: [[PRED_STORE_IF25]]:
343-
; CHECK-NEXT: [[TMP60:%.*]] = add i32 [[INDEX]], 13
343+
; CHECK-NEXT: [[TMP60:%.*]] = add i32 [[IV]], 13
344344
; CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP60]]
345345
; CHECK-NEXT: [[TMP62:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 13
346346
; CHECK-NEXT: store i8 [[TMP62]], ptr [[TMP61]], align 1
@@ -349,7 +349,7 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
349349
; CHECK-NEXT: [[TMP63:%.*]] = extractelement <16 x i1> [[TMP7]], i32 14
350350
; CHECK-NEXT: br i1 [[TMP63]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]]
351351
; CHECK: [[PRED_STORE_IF27]]:
352-
; CHECK-NEXT: [[TMP64:%.*]] = add i32 [[INDEX]], 14
352+
; CHECK-NEXT: [[TMP64:%.*]] = add i32 [[IV]], 14
353353
; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP64]]
354354
; CHECK-NEXT: [[TMP66:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 14
355355
; CHECK-NEXT: store i8 [[TMP66]], ptr [[TMP65]], align 1
@@ -358,13 +358,13 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
358358
; CHECK-NEXT: [[TMP67:%.*]] = extractelement <16 x i1> [[TMP7]], i32 15
359359
; CHECK-NEXT: br i1 [[TMP67]], label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30]]
360360
; CHECK: [[PRED_STORE_IF29]]:
361-
; CHECK-NEXT: [[TMP68:%.*]] = add i32 [[INDEX]], 15
361+
; CHECK-NEXT: [[TMP68:%.*]] = add i32 [[IV]], 15
362362
; CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP68]]
363363
; CHECK-NEXT: [[TMP70:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 15
364364
; CHECK-NEXT: store i8 [[TMP70]], ptr [[TMP69]], align 1
365365
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE30]]
366366
; CHECK: [[PRED_STORE_CONTINUE30]]:
367-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
367+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[IV]], 16
368368
; CHECK-NEXT: [[TMP71:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
369369
; CHECK-NEXT: br i1 [[TMP71]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
370370
; CHECK: [[MIDDLE_BLOCK]]:

llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1045,12 +1045,12 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias
10451045
; TF-FIXEDLEN-NEXT: br label %[[VECTOR_BODY:.*]]
10461046
; TF-FIXEDLEN: [[VECTOR_BODY]]:
10471047
; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ]
1048-
; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
1049-
; TF-FIXEDLEN-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP0]], i64 1025)
1050-
; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 0
1051-
; TF-FIXEDLEN-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
1048+
; TF-FIXEDLEN-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX]], i64 1025)
1049+
; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 0
1050+
; TF-FIXEDLEN-NEXT: br i1 [[TMP0]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
10521051
; TF-FIXEDLEN: [[PRED_STORE_IF]]:
1053-
; TF-FIXEDLEN-NEXT: store i64 [[TMP0]], ptr [[B]], align 8
1052+
; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
1053+
; TF-FIXEDLEN-NEXT: store i64 [[TMP1]], ptr [[B]], align 8
10541054
; TF-FIXEDLEN-NEXT: br label %[[PRED_STORE_CONTINUE]]
10551055
; TF-FIXEDLEN: [[PRED_STORE_CONTINUE]]:
10561056
; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 1
@@ -1074,7 +1074,7 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias
10741074
; TF-FIXEDLEN-NEXT: store i64 [[TMP7]], ptr [[B]], align 8
10751075
; TF-FIXEDLEN-NEXT: br label %[[PRED_STORE_CONTINUE6]]
10761076
; TF-FIXEDLEN: [[PRED_STORE_CONTINUE6]]:
1077-
; TF-FIXEDLEN-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
1077+
; TF-FIXEDLEN-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]]
10781078
; TF-FIXEDLEN-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0
10791079
; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP9]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]])
10801080
; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4

llvm/test/Transforms/LoopVectorize/X86/cost-model.ll

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -603,11 +603,10 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 {
603603
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
604604
; CHECK: vector.body:
605605
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE37:%.*]] ]
606-
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
607606
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4
608607
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 8
609608
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 12
610-
; CHECK-NEXT: [[TMP7:%.*]] = shl nsw i64 [[TMP3]], 2
609+
; CHECK-NEXT: [[TMP7:%.*]] = shl nsw i64 [[INDEX]], 2
611610
; CHECK-NEXT: [[TMP8:%.*]] = shl nsw i64 [[TMP4]], 2
612611
; CHECK-NEXT: [[TMP9:%.*]] = shl nsw i64 [[TMP5]], 2
613612
; CHECK-NEXT: [[TMP10:%.*]] = shl nsw i64 [[TMP6]], 2
@@ -630,6 +629,7 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 {
630629
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP19]], i32 0
631630
; CHECK-NEXT: br i1 [[TMP23]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
632631
; CHECK: pred.store.if:
632+
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
633633
; CHECK-NEXT: [[TMP24:%.*]] = shl nsw i64 [[TMP3]], 2
634634
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP24]]
635635
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP25]], align 8
@@ -665,7 +665,8 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 {
665665
; CHECK-NEXT: [[TMP38:%.*]] = extractelement <4 x i1> [[TMP20]], i32 0
666666
; CHECK-NEXT: br i1 [[TMP38]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15:%.*]]
667667
; CHECK: pred.store.if14:
668-
; CHECK-NEXT: [[TMP39:%.*]] = shl nsw i64 [[TMP4]], 2
668+
; CHECK-NEXT: [[TMP88:%.*]] = add i64 [[INDEX]], 4
669+
; CHECK-NEXT: [[TMP39:%.*]] = shl nsw i64 [[TMP88]], 2
669670
; CHECK-NEXT: [[TMP40:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP39]]
670671
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP40]], align 8
671672
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE15]]
@@ -700,7 +701,8 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 {
700701
; CHECK-NEXT: [[TMP53:%.*]] = extractelement <4 x i1> [[TMP21]], i32 0
701702
; CHECK-NEXT: br i1 [[TMP53]], label [[PRED_STORE_IF22:%.*]], label [[PRED_STORE_CONTINUE23:%.*]]
702703
; CHECK: pred.store.if22:
703-
; CHECK-NEXT: [[TMP54:%.*]] = shl nsw i64 [[TMP5]], 2
704+
; CHECK-NEXT: [[TMP107:%.*]] = add i64 [[INDEX]], 8
705+
; CHECK-NEXT: [[TMP54:%.*]] = shl nsw i64 [[TMP107]], 2
704706
; CHECK-NEXT: [[TMP55:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP54]]
705707
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP55]], align 8
706708
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE23]]
@@ -735,7 +737,8 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 {
735737
; CHECK-NEXT: [[TMP68:%.*]] = extractelement <4 x i1> [[TMP22]], i32 0
736738
; CHECK-NEXT: br i1 [[TMP68]], label [[PRED_STORE_IF30:%.*]], label [[PRED_STORE_CONTINUE31:%.*]]
737739
; CHECK: pred.store.if30:
738-
; CHECK-NEXT: [[TMP69:%.*]] = shl nsw i64 [[TMP6]], 2
740+
; CHECK-NEXT: [[TMP108:%.*]] = add i64 [[INDEX]], 12
741+
; CHECK-NEXT: [[TMP69:%.*]] = shl nsw i64 [[TMP108]], 2
739742
; CHECK-NEXT: [[TMP70:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP69]]
740743
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP70]], align 8
741744
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE31]]
@@ -785,15 +788,15 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 {
785788
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
786789
; CHECK: vec.epilog.vector.body:
787790
; CHECK-NEXT: [[INDEX40:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL1]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT51:%.*]], [[PRED_STORE_CONTINUE50:%.*]] ]
788-
; CHECK-NEXT: [[TMP86:%.*]] = add i64 [[INDEX40]], 0
789-
; CHECK-NEXT: [[TMP87:%.*]] = shl nsw i64 [[TMP86]], 2
791+
; CHECK-NEXT: [[TMP87:%.*]] = shl nsw i64 [[INDEX40]], 2
790792
; CHECK-NEXT: [[TMP89:%.*]] = getelementptr nusw double, ptr [[A]], i64 [[TMP87]]
791793
; CHECK-NEXT: [[WIDE_VEC41:%.*]] = load <16 x double>, ptr [[TMP89]], align 8
792794
; CHECK-NEXT: [[STRIDED_VEC42:%.*]] = shufflevector <16 x double> [[WIDE_VEC41]], <16 x double> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
793795
; CHECK-NEXT: [[TMP90:%.*]] = fcmp oeq <4 x double> [[STRIDED_VEC42]], zeroinitializer
794796
; CHECK-NEXT: [[TMP91:%.*]] = extractelement <4 x i1> [[TMP90]], i32 0
795797
; CHECK-NEXT: br i1 [[TMP91]], label [[PRED_STORE_IF43:%.*]], label [[PRED_STORE_CONTINUE44:%.*]]
796798
; CHECK: pred.store.if43:
799+
; CHECK-NEXT: [[TMP86:%.*]] = add i64 [[INDEX40]], 0
797800
; CHECK-NEXT: [[TMP92:%.*]] = shl nsw i64 [[TMP86]], 2
798801
; CHECK-NEXT: [[TMP93:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP92]]
799802
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP93]], align 8

0 commit comments

Comments
 (0)