Skip to content

Commit 89d6915

Browse files
committed
[VPlan] Remove ILV::sinkScalarOperands.
Remove legacy ILV sinkScalarOperands, which is superseded by the sinkScalarOperands VPlan transforms. There are a few cases that aren't handled by VPlan's sinkScalarOperands, because the recipes doesn't support replicating. Those are pointer inductions and blends. We could probably improve this further, by allowing replication for more recipes, but I don't think the extra complexity is warranted. Depends on #136021.
1 parent 68c1da5 commit 89d6915

File tree

5 files changed

+43
-131
lines changed

5 files changed

+43
-131
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 0 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -541,10 +541,6 @@ class InnerLoopVectorizer {
541541
protected:
542542
friend class LoopVectorizationPlanner;
543543

544-
/// Iteratively sink the scalarized operands of a predicated instruction into
545-
/// the block that was created for it.
546-
void sinkScalarOperands(Instruction *PredInst);
547-
548544
/// Returns (and creates if needed) the trip count of the widened loop.
549545
Value *getOrCreateVectorTripCount(BasicBlock *InsertBlock);
550546

@@ -629,9 +625,6 @@ class InnerLoopVectorizer {
629625
/// A list of all bypass blocks. The first block is the entry of the loop.
630626
SmallVector<BasicBlock *, 4> LoopBypassBlocks;
631627

632-
/// Store instructions that were predicated.
633-
SmallVector<Instruction *, 4> PredicatedInstructions;
634-
635628
/// Trip count of the original loop.
636629
Value *TripCount = nullptr;
637630

@@ -2385,15 +2378,12 @@ void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr,
23852378

23862379
// End if-block.
23872380
VPRegionBlock *Parent = RepRecipe->getParent()->getParent();
2388-
bool IfPredicateInstr = Parent ? Parent->isReplicator() : false;
23892381
assert(
23902382
(Parent || !RepRecipe->getParent()->getPlan()->getVectorLoopRegion() ||
23912383
all_of(RepRecipe->operands(),
23922384
[](VPValue *Op) { return Op->isDefinedOutsideLoopRegions(); })) &&
23932385
"Expected a recipe is either within a region or all of its operands "
23942386
"are defined outside the vectorized region.");
2395-
if (IfPredicateInstr)
2396-
PredicatedInstructions.push_back(Cloned);
23972387
}
23982388

23992389
Value *
@@ -2867,8 +2857,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
28672857
if (!State.Plan->getVectorLoopRegion())
28682858
return;
28692859

2870-
for (Instruction *PI : PredicatedInstructions)
2871-
sinkScalarOperands(&*PI);
28722860

28732861
VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion();
28742862
VPBasicBlock *HeaderVPBB = VectorRegion->getEntryBasicBlock();
@@ -2895,82 +2883,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
28952883
VF.getKnownMinValue() * UF);
28962884
}
28972885

2898-
void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) {
2899-
// The basic block and loop containing the predicated instruction.
2900-
auto *PredBB = PredInst->getParent();
2901-
auto *VectorLoop = LI->getLoopFor(PredBB);
2902-
2903-
// Initialize a worklist with the operands of the predicated instruction.
2904-
SetVector<Value *> Worklist(PredInst->op_begin(), PredInst->op_end());
2905-
2906-
// Holds instructions that we need to analyze again. An instruction may be
2907-
// reanalyzed if we don't yet know if we can sink it or not.
2908-
SmallVector<Instruction *, 8> InstsToReanalyze;
2909-
2910-
// Returns true if a given use occurs in the predicated block. Phi nodes use
2911-
// their operands in their corresponding predecessor blocks.
2912-
auto IsBlockOfUsePredicated = [&](Use &U) -> bool {
2913-
auto *I = cast<Instruction>(U.getUser());
2914-
BasicBlock *BB = I->getParent();
2915-
if (auto *Phi = dyn_cast<PHINode>(I))
2916-
BB = Phi->getIncomingBlock(
2917-
PHINode::getIncomingValueNumForOperand(U.getOperandNo()));
2918-
return BB == PredBB;
2919-
};
2920-
2921-
// Iteratively sink the scalarized operands of the predicated instruction
2922-
// into the block we created for it. When an instruction is sunk, it's
2923-
// operands are then added to the worklist. The algorithm ends after one pass
2924-
// through the worklist doesn't sink a single instruction.
2925-
bool Changed;
2926-
do {
2927-
// Add the instructions that need to be reanalyzed to the worklist, and
2928-
// reset the changed indicator.
2929-
Worklist.insert_range(InstsToReanalyze);
2930-
InstsToReanalyze.clear();
2931-
Changed = false;
2932-
2933-
while (!Worklist.empty()) {
2934-
auto *I = dyn_cast<Instruction>(Worklist.pop_back_val());
2935-
2936-
// We can't sink an instruction if it is a phi node, is not in the loop,
2937-
// may have side effects or may read from memory.
2938-
// TODO: Could do more granular checking to allow sinking
2939-
// a load past non-store instructions.
2940-
if (!I || isa<PHINode>(I) || !VectorLoop->contains(I) ||
2941-
I->mayHaveSideEffects() || I->mayReadFromMemory())
2942-
continue;
2943-
2944-
// If the instruction is already in PredBB, check if we can sink its
2945-
// operands. In that case, VPlan's sinkScalarOperands() succeeded in
2946-
// sinking the scalar instruction I, hence it appears in PredBB; but it
2947-
// may have failed to sink I's operands (recursively), which we try
2948-
// (again) here.
2949-
if (I->getParent() == PredBB) {
2950-
Worklist.insert_range(I->operands());
2951-
continue;
2952-
}
2953-
2954-
// It's legal to sink the instruction if all its uses occur in the
2955-
// predicated block. Otherwise, there's nothing to do yet, and we may
2956-
// need to reanalyze the instruction.
2957-
if (!llvm::all_of(I->uses(), IsBlockOfUsePredicated)) {
2958-
InstsToReanalyze.push_back(I);
2959-
continue;
2960-
}
2961-
2962-
// Move the instruction to the beginning of the predicated block, and add
2963-
// it's operands to the worklist.
2964-
I->moveBefore(PredBB->getFirstInsertionPt());
2965-
Worklist.insert_range(I->operands());
2966-
2967-
// The sinking may have enabled other instructions to be sunk, so we will
2968-
// need to iterate.
2969-
Changed = true;
2970-
}
2971-
} while (Changed);
2972-
}
2973-
29742886
void InnerLoopVectorizer::fixNonInductionPHIs(VPTransformState &State) {
29752887
auto Iter = vp_depth_first_deep(Plan.getEntry());
29762888
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {

llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,13 @@ define ptr @test(ptr noalias %src, ptr noalias %dst) {
88
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %vector.ph ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE2]] ]
99
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
1010
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
11-
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[TMP1]]
11+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[TMP0]]
12+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP1]]
1213
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i64> [[VEC_IND]], zeroinitializer
1314
; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true)
1415
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
1516
; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
1617
; CHECK: pred.load.if:
17-
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP0]]
1818
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
1919
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
2020
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]

llvm/test/Transforms/LoopVectorize/X86/small-size.ll

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -275,49 +275,49 @@ define void @example3(i32 %n, ptr noalias nocapture %p, ptr noalias nocapture %q
275275
; CHECK: vector.body:
276276
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE16:%.*]] ]
277277
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 2
278+
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[OFFSET_IDX]]
279+
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[TMP11]], i64 4
280+
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[P]], i64 [[OFFSET_IDX]]
281+
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[TMP12]], i64 8
282+
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[P]], i64 [[OFFSET_IDX]]
283+
; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[TMP15]], i64 12
278284
; CHECK-NEXT: [[OFFSET_IDX6:%.*]] = shl i64 [[INDEX]], 2
285+
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Q:%.*]], i64 [[OFFSET_IDX6]]
286+
; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[TMP16]], i64 4
287+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Q]], i64 [[OFFSET_IDX6]]
288+
; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[TMP7]], i64 8
289+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[Q]], i64 [[OFFSET_IDX6]]
290+
; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i8, ptr [[TMP8]], i64 12
279291
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0
280292
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
281293
; CHECK-NEXT: [[VEC_IV:%.*]] = or disjoint <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
282294
; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT12]]
283295
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i64 0
284296
; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
285297
; CHECK: pred.store.if:
286-
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[OFFSET_IDX]]
287-
; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[Q:%.*]], i64 [[OFFSET_IDX6]]
298+
; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[Q]], i64 [[OFFSET_IDX6]]
299+
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[P]], i64 [[OFFSET_IDX]]
288300
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[NEXT_GEP7]], align 16
289301
; CHECK-NEXT: store i32 [[TMP5]], ptr [[NEXT_GEP]], align 16
290302
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
291303
; CHECK: pred.store.continue:
292304
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP3]], i64 1
293305
; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]]
294306
; CHECK: pred.store.if11:
295-
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[P]], i64 [[OFFSET_IDX]]
296-
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[TMP7]], i64 4
297-
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[Q]], i64 [[OFFSET_IDX6]]
298-
; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[TMP8]], i64 4
299307
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[NEXT_GEP8]], align 16
300308
; CHECK-NEXT: store i32 [[TMP9]], ptr [[NEXT_GEP3]], align 16
301309
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]]
302310
; CHECK: pred.store.continue12:
303311
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP3]], i64 2
304312
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]]
305313
; CHECK: pred.store.if13:
306-
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[P]], i64 [[OFFSET_IDX]]
307-
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[TMP11]], i64 8
308-
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[Q]], i64 [[OFFSET_IDX6]]
309-
; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[TMP12]], i64 8
310314
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[NEXT_GEP9]], align 16
311315
; CHECK-NEXT: store i32 [[TMP13]], ptr [[NEXT_GEP4]], align 16
312316
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]]
313317
; CHECK: pred.store.continue14:
314318
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i64 3
315319
; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16]]
316320
; CHECK: pred.store.if15:
317-
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[P]], i64 [[OFFSET_IDX]]
318-
; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[TMP15]], i64 12
319-
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Q]], i64 [[OFFSET_IDX6]]
320-
; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i8, ptr [[TMP16]], i64 12
321321
; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[NEXT_GEP10]], align 16
322322
; CHECK-NEXT: store i32 [[TMP17]], ptr [[NEXT_GEP5]], align 16
323323
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE16]]
@@ -453,16 +453,28 @@ define void @example23c(ptr noalias nocapture %src, ptr noalias nocapture %dst)
453453
; CHECK: vector.body:
454454
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE15:%.*]] ]
455455
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1
456+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 [[OFFSET_IDX]]
457+
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[TMP7]], i64 2
458+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]]
459+
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[TMP8]], i64 4
460+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]]
461+
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[TMP13]], i64 6
456462
; CHECK-NEXT: [[OFFSET_IDX5:%.*]] = shl i64 [[INDEX]], 2
463+
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[OFFSET_IDX5]]
464+
; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[TMP14]], i64 4
465+
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX5]]
466+
; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[TMP19]], i64 8
467+
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX5]]
468+
; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[TMP20]], i64 12
457469
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0
458470
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
459471
; CHECK-NEXT: [[VEC_IV:%.*]] = or disjoint <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
460472
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i64> [[VEC_IV]], splat (i64 257)
461473
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i64 0
462474
; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
463475
; CHECK: pred.store.if:
464-
; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[OFFSET_IDX5]]
465-
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 [[OFFSET_IDX]]
476+
; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX5]]
477+
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]]
466478
; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[NEXT_GEP]], align 2
467479
; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP3]] to i32
468480
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i32 [[TMP4]], 7
@@ -472,10 +484,6 @@ define void @example23c(ptr noalias nocapture %src, ptr noalias nocapture %dst)
472484
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP1]], i64 1
473485
; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
474486
; CHECK: pred.store.if9:
475-
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX5]]
476-
; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[TMP7]], i64 4
477-
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]]
478-
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[TMP8]], i64 2
479487
; CHECK-NEXT: [[TMP9:%.*]] = load i16, ptr [[NEXT_GEP2]], align 2
480488
; CHECK-NEXT: [[TMP10:%.*]] = zext i16 [[TMP9]] to i32
481489
; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i32 [[TMP10]], 7
@@ -485,10 +493,6 @@ define void @example23c(ptr noalias nocapture %src, ptr noalias nocapture %dst)
485493
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP1]], i64 2
486494
; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE12:%.*]]
487495
; CHECK: pred.store.if11:
488-
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX5]]
489-
; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[TMP13]], i64 8
490-
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]]
491-
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[TMP14]], i64 4
492496
; CHECK-NEXT: [[TMP15:%.*]] = load i16, ptr [[NEXT_GEP3]], align 2
493497
; CHECK-NEXT: [[TMP16:%.*]] = zext i16 [[TMP15]] to i32
494498
; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i32 [[TMP16]], 7
@@ -498,10 +502,6 @@ define void @example23c(ptr noalias nocapture %src, ptr noalias nocapture %dst)
498502
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP1]], i64 3
499503
; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE15]]
500504
; CHECK: pred.store.if13:
501-
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX5]]
502-
; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[TMP19]], i64 12
503-
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]]
504-
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[TMP20]], i64 6
505505
; CHECK-NEXT: [[TMP21:%.*]] = load i16, ptr [[NEXT_GEP4]], align 2
506506
; CHECK-NEXT: [[TMP22:%.*]] = zext i16 [[TMP21]] to i32
507507
; CHECK-NEXT: [[TMP23:%.*]] = shl nuw nsw i32 [[TMP22]], 7

llvm/test/Transforms/LoopVectorize/pointer-induction.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,13 @@ define void @a(ptr readnone %b) {
2525
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE10:%.*]] ]
2626
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 0, [[INDEX]]
2727
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 0
28+
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], -1
29+
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], -2
30+
; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], -3
2831
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[TMP2]]
32+
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]]
33+
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP14]]
34+
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr null, i64 [[TMP17]]
2935
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1
3036
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0
3137
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 -3
@@ -43,26 +49,20 @@ define void @a(ptr readnone %b) {
4349
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1
4450
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
4551
; CHECK: pred.store.if5:
46-
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], -1
47-
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]]
4852
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP2]], i64 -1
4953
; CHECK-NEXT: store i8 95, ptr [[TMP12]], align 1
5054
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
5155
; CHECK: pred.store.continue6:
5256
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP7]], i32 2
5357
; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
5458
; CHECK: pred.store.if7:
55-
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], -2
56-
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP14]]
5759
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP3]], i64 -1
5860
; CHECK-NEXT: store i8 95, ptr [[TMP15]], align 1
5961
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
6062
; CHECK: pred.store.continue8:
6163
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3
6264
; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10]]
6365
; CHECK: pred.store.if9:
64-
; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], -3
65-
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr null, i64 [[TMP17]]
6666
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP4]], i64 -1
6767
; CHECK-NEXT: store i8 95, ptr [[TMP18]], align 1
6868
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]]

0 commit comments

Comments
 (0)