Skip to content

Commit 2fd69ff

Browse files
committed
[VPlan] Explicitly handle scalar pointer inductions.
Add a new PtrAdd opcode to VPInstruction that corresponds to IRBuilder::CreatePtrAdd, which creates a GEP with source element type i8. This is then used to model scalarizing VPWidenPointerInductionRecipe by introducing scalar-steps to model the index increment followed by a PtrAdd. Note that PtrAdd needs to be able to generate code for only the first lane or for all lanes. This may warrant introducing a separate recipe for scalarizing that can be created without relying on the underlying IR.
1 parent 7cb2c1d commit 2fd69ff

17 files changed

+573
-578
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -9233,42 +9233,11 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
92339233
"Not a pointer induction according to InductionDescriptor!");
92349234
assert(cast<PHINode>(getUnderlyingInstr())->getType()->isPointerTy() &&
92359235
"Unexpected type.");
9236+
assert(!onlyScalarsGenerated(State.VF.isScalable()) &&
9237+
"Recipe should have been replaced");
92369238

92379239
auto *IVR = getParent()->getPlan()->getCanonicalIV();
92389240
PHINode *CanonicalIV = cast<PHINode>(State.get(IVR, 0));
9239-
9240-
if (onlyScalarsGenerated(State.VF.isScalable())) {
9241-
// This is the normalized GEP that starts counting at zero.
9242-
Value *PtrInd = State.Builder.CreateSExtOrTrunc(
9243-
CanonicalIV, IndDesc.getStep()->getType());
9244-
// Determine the number of scalars we need to generate for each unroll
9245-
// iteration. If the instruction is uniform, we only need to generate the
9246-
// first lane. Otherwise, we generate all VF values.
9247-
bool IsUniform = vputils::onlyFirstLaneUsed(this);
9248-
assert((IsUniform || !State.VF.isScalable()) &&
9249-
"Cannot scalarize a scalable VF");
9250-
unsigned Lanes = IsUniform ? 1 : State.VF.getFixedValue();
9251-
9252-
for (unsigned Part = 0; Part < State.UF; ++Part) {
9253-
Value *PartStart =
9254-
createStepForVF(State.Builder, PtrInd->getType(), State.VF, Part);
9255-
9256-
for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
9257-
Value *Idx = State.Builder.CreateAdd(
9258-
PartStart, ConstantInt::get(PtrInd->getType(), Lane));
9259-
Value *GlobalIdx = State.Builder.CreateAdd(PtrInd, Idx);
9260-
9261-
Value *Step = State.get(getOperand(1), VPIteration(Part, Lane));
9262-
Value *SclrGep = emitTransformedIndex(
9263-
State.Builder, GlobalIdx, IndDesc.getStartValue(), Step,
9264-
IndDesc.getKind(), IndDesc.getInductionBinOp());
9265-
SclrGep->setName("next.gep");
9266-
State.set(this, SclrGep, VPIteration(Part, Lane));
9267-
}
9268-
}
9269-
return;
9270-
}
9271-
92729241
Type *PhiType = IndDesc.getStep()->getType();
92739242

92749243
// Build a pointer phi

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1156,6 +1156,7 @@ class VPInstruction : public VPRecipeWithIRFlags {
11561156
BranchOnCount,
11571157
BranchOnCond,
11581158
ComputeReductionResult,
1159+
PtrAdd,
11591160
};
11601161

11611162
private:
@@ -2502,6 +2503,12 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe {
25022503
dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
25032504
Start, CanonicalIV, Step) {}
25042505

2506+
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, VPValue *Start,
2507+
VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
2508+
FPMathOperator *FPBinOp)
2509+
: VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}),
2510+
Kind(Kind), FPBinOp(FPBinOp) {}
2511+
25052512
~VPDerivedIVRecipe() override = default;
25062513

25072514
VPRecipeBase *clone() override {

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
4343
CachedTypes[OtherV] = ResTy;
4444
return ResTy;
4545
}
46+
case VPInstruction::PtrAdd:
47+
return inferScalarType(R->getOperand(0));
4648
default:
4749
break;
4850
}

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
127127
case VPInstruction::Not:
128128
case VPInstruction::CalculateTripCountMinusVF:
129129
case VPInstruction::CanonicalIVIncrementForPart:
130+
case VPInstruction::PtrAdd:
130131
return false;
131132
default:
132133
return true;
@@ -489,6 +490,23 @@ Value *VPInstruction::generateInstruction(VPTransformState &State,
489490

490491
return ReducedPartRdx;
491492
}
493+
case VPInstruction::PtrAdd: {
494+
if (vputils::onlyFirstLaneUsed(this)) {
495+
auto *P =
496+
Builder.CreatePtrAdd(State.get(getOperand(0), VPIteration(Part, 0)),
497+
State.get(getOperand(1), VPIteration(Part, 0)));
498+
State.set(this, P, VPIteration(Part, 0));
499+
} else {
500+
for (unsigned Lane = 0; Lane != State.VF.getKnownMinValue(); ++Lane) {
501+
Value *P = Builder.CreatePtrAdd(
502+
State.get(getOperand(0), VPIteration(Part, Lane)),
503+
State.get(getOperand(1), VPIteration(Part, Lane)));
504+
505+
State.set(this, P, VPIteration(Part, Lane));
506+
}
507+
}
508+
return nullptr;
509+
}
492510
default:
493511
llvm_unreachable("Unsupported opcode for instruction");
494512
}
@@ -515,6 +533,8 @@ void VPInstruction::execute(VPTransformState &State) {
515533
State.Builder.setFastMathFlags(getFastMathFlags());
516534
for (unsigned Part = 0; Part < State.UF; ++Part) {
517535
Value *GeneratedValue = generateInstruction(State, Part);
536+
if (!GeneratedValue)
537+
continue;
518538
if (!hasResult())
519539
continue;
520540
assert(GeneratedValue && "generateInstruction must produce a value");
@@ -598,6 +618,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
598618
case VPInstruction::ComputeReductionResult:
599619
O << "compute-reduction-result";
600620
break;
621+
case VPInstruction::PtrAdd:
622+
O << "ptradd";
623+
break;
601624
default:
602625
O << Instruction::getOpcodeName(getOpcode());
603626
}

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -489,15 +489,18 @@ void VPlanTransforms::removeDeadRecipes(VPlan &Plan) {
489489
}
490490
}
491491

492-
static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID,
492+
static VPValue *createScalarIVSteps(VPlan &Plan,
493+
InductionDescriptor::InductionKind Kind,
493494
ScalarEvolution &SE, Instruction *TruncI,
494495
VPValue *StartV, VPValue *Step,
495-
VPBasicBlock::iterator IP) {
496+
Instruction::BinaryOps InductionOpcode,
497+
VPBasicBlock::iterator IP,
498+
FPMathOperator *FPBinOp = nullptr) {
496499
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
497500
VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
498501
VPSingleDefRecipe *BaseIV = CanonicalIV;
499-
if (!CanonicalIV->isCanonical(ID.getKind(), StartV, Step)) {
500-
BaseIV = new VPDerivedIVRecipe(ID, StartV, CanonicalIV, Step);
502+
if (!CanonicalIV->isCanonical(Kind, StartV, Step)) {
503+
BaseIV = new VPDerivedIVRecipe(Kind, StartV, CanonicalIV, Step, FPBinOp);
501504
HeaderVPBB->insert(BaseIV, IP);
502505
}
503506

@@ -526,7 +529,9 @@ static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID,
526529
VecPreheader->appendRecipe(Step->getDefiningRecipe());
527530
}
528531

529-
VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe(ID, BaseIV, Step);
532+
VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe(
533+
BaseIV, Step, InductionOpcode,
534+
FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags());
530535
HeaderVPBB->insert(Steps, IP);
531536
return Steps;
532537
}
@@ -537,6 +542,30 @@ void VPlanTransforms::optimizeInductions(VPlan &Plan, ScalarEvolution &SE) {
537542
bool HasOnlyVectorVFs = !Plan.hasVF(ElementCount::getFixed(1));
538543
VPBasicBlock::iterator InsertPt = HeaderVPBB->getFirstNonPhi();
539544
for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
545+
if (auto *PtrIV = dyn_cast<VPWidenPointerInductionRecipe>(&Phi)) {
546+
if (!PtrIV->onlyScalarsGenerated(Plan.hasScalableVF()))
547+
continue;
548+
549+
const InductionDescriptor &ID = PtrIV->getInductionDescriptor();
550+
VPValue *StartV = Plan.getVPValueOrAddLiveIn(
551+
ConstantInt::get(ID.getStep()->getType(), 0));
552+
VPValue *StepV = PtrIV->getOperand(1);
553+
VPRecipeBase *Steps =
554+
createScalarIVSteps(Plan, InductionDescriptor::IK_IntInduction, SE,
555+
nullptr, StartV, StepV, Instruction::Add,
556+
InsertPt)
557+
->getDefiningRecipe();
558+
559+
auto *Recipe =
560+
new VPInstruction(VPInstruction::PtrAdd,
561+
{PtrIV->getStartValue(), Steps->getVPSingleValue()},
562+
PtrIV->getDebugLoc());
563+
564+
Recipe->insertAfter(Steps);
565+
PtrIV->replaceAllUsesWith(Recipe);
566+
continue;
567+
}
568+
540569
auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
541570
if (!WideIV)
542571
continue;
@@ -546,9 +575,10 @@ void VPlanTransforms::optimizeInductions(VPlan &Plan, ScalarEvolution &SE) {
546575
continue;
547576

548577
const InductionDescriptor &ID = WideIV->getInductionDescriptor();
549-
VPValue *Steps = createScalarIVSteps(Plan, ID, SE, WideIV->getTruncInst(),
550-
WideIV->getStartValue(),
551-
WideIV->getStepValue(), InsertPt);
578+
VPValue *Steps = createScalarIVSteps(
579+
Plan, ID.getKind(), SE, WideIV->getTruncInst(), WideIV->getStartValue(),
580+
WideIV->getStepValue(), ID.getInductionOpcode(), InsertPt,
581+
dyn_cast_or_null<FPMathOperator>(ID.getInductionBinOp()));
552582

553583
// Update scalar users of IV to use Step instead.
554584
if (!HasOnlyVectorVFs)

llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll

Lines changed: 49 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -11,76 +11,74 @@ define void @test_widen_ptr_induction(ptr %ptr.start.1) {
1111
; CHECK: vector.main.loop.iter.check:
1212
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
1313
; CHECK: vector.ph:
14-
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PTR_START_1:%.*]], i64 10000
1514
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
1615
; CHECK: vector.body:
1716
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1817
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
19-
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP0]]
2018
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
21-
; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP1]]
22-
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0
23-
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x ptr> [[TMP2]], ptr [[NEXT_GEP1]], i32 1
24-
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 2
25-
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP4]]
26-
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 3
27-
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP5]]
28-
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP2]], i32 0
29-
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x ptr> [[TMP6]], ptr [[NEXT_GEP3]], i32 1
30-
; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <2 x ptr> [[TMP3]], zeroinitializer
31-
; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <2 x ptr> [[TMP7]], zeroinitializer
32-
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
33-
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP10]])
34-
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1
35-
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]])
36-
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0
37-
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP12]])
38-
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP9]], i32 1
39-
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP13]])
40-
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
41-
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 2
42-
; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP14]], align 1
43-
; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP15]], align 1
19+
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
20+
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
21+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[PTR_START_1:%.*]], i64 [[TMP0]]
22+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP1]]
23+
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP4]], i32 0
24+
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x ptr> [[TMP6]], ptr [[TMP5]], i32 1
25+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP2]]
26+
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP3]]
27+
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP8]], i32 0
28+
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x ptr> [[TMP10]], ptr [[TMP9]], i32 1
29+
; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <2 x ptr> [[TMP7]], zeroinitializer
30+
; CHECK-NEXT: [[TMP13:%.*]] = icmp ne <2 x ptr> [[TMP11]], zeroinitializer
31+
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP12]], i32 0
32+
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP14]])
33+
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP12]], i32 1
34+
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP15]])
35+
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP13]], i32 0
36+
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP16]])
37+
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP13]], i32 1
38+
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP17]])
39+
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0
40+
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP4]], i32 2
41+
; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP18]], align 1
42+
; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP19]], align 1
4443
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
45-
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
46-
; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
44+
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
45+
; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
4746
; CHECK: middle.block:
4847
; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
4948
; CHECK: vec.epilog.iter.check:
50-
; CHECK-NEXT: [[IND_END6:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000
49+
; CHECK-NEXT: [[IND_END1:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000
5150
; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
5251
; CHECK: vec.epilog.ph:
53-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START_1]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
5452
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
55-
; CHECK-NEXT: [[IND_END5:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000
53+
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000
5654
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
5755
; CHECK: vec.epilog.vector.body:
58-
; CHECK-NEXT: [[INDEX8:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT11:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
59-
; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX8]], 0
60-
; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP17]]
61-
; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[INDEX8]], 1
62-
; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP18]]
63-
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP9]], i32 0
64-
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x ptr> [[TMP19]], ptr [[NEXT_GEP10]], i32 1
65-
; CHECK-NEXT: [[TMP21:%.*]] = icmp ne <2 x ptr> [[TMP20]], zeroinitializer
66-
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP21]], i32 0
67-
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP22]])
68-
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP21]], i32 1
69-
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP23]])
70-
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[NEXT_GEP9]], i32 0
71-
; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP24]], align 1
72-
; CHECK-NEXT: [[INDEX_NEXT11]] = add nuw i64 [[INDEX8]], 2
73-
; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT11]], 10000
74-
; CHECK-NEXT: br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
56+
; CHECK-NEXT: [[INDEX3:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT4:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
57+
; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX3]], 0
58+
; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[INDEX3]], 1
59+
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP21]]
60+
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP22]]
61+
; CHECK-NEXT: [[TMP25:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP23]], i32 0
62+
; CHECK-NEXT: [[TMP26:%.*]] = insertelement <2 x ptr> [[TMP25]], ptr [[TMP24]], i32 1
63+
; CHECK-NEXT: [[TMP27:%.*]] = icmp ne <2 x ptr> [[TMP26]], zeroinitializer
64+
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP27]], i32 0
65+
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP28]])
66+
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x i1> [[TMP27]], i32 1
67+
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP29]])
68+
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[TMP23]], i32 0
69+
; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP30]], align 1
70+
; CHECK-NEXT: [[INDEX_NEXT4]] = add nuw i64 [[INDEX3]], 2
71+
; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT4]], 10000
72+
; CHECK-NEXT: br i1 [[TMP31]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
7573
; CHECK: vec.epilog.middle.block:
7674
; CHECK-NEXT: br i1 false, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
7775
; CHECK: vec.epilog.scalar.ph:
78-
; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
79-
; CHECK-NEXT: [[BC_RESUME_VAL7:%.*]] = phi ptr [ [[IND_END5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END6]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START_1]], [[ITER_CHECK]] ]
76+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
77+
; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END1]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START_1]], [[ITER_CHECK]] ]
8078
; CHECK-NEXT: br label [[LOOP:%.*]]
8179
; CHECK: loop:
82-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL4]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
83-
; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL7]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
80+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
81+
; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL2]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
8482
; CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = icmp ne ptr [[PTR_IV]], null
8583
; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I_I]])
8684
; CHECK-NEXT: store i8 0, ptr [[PTR_IV]], align 1

0 commit comments

Comments
 (0)