Skip to content

[VPlan] Explicitly handle scalar pointer inductions. #83068

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 30 commits into from
Mar 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
f4dabdf
[VPlan] Update VPInst::onlyFirstLaneUsed to check users.
fhahn Jan 31, 2024
b08e892
[VPlan] Consistently use (Part, 0) for first lane scalar values
fhahn Jan 31, 2024
f56e217
Merge branch 'main' into users/fhahn/vplan-uniform-scalar-lanes
fhahn Feb 3, 2024
172dbf6
!fixup fix merge
fhahn Feb 3, 2024
916a7d2
[VPlan] Explicitly handle scalar pointer inductions.
fhahn Jan 29, 2024
d2c51ec
Merge branch 'main' into users/fhahn/vplan-uniform-scalar-lanes
fhahn Feb 6, 2024
82d74df
Merge branch 'main' into users/fhahn/vplan-uniform-scalar-lanes
fhahn Feb 7, 2024
c6797e6
!fixup address latest comments, thanks!
fhahn Feb 7, 2024
53f2937
!fixup fix formatting
fhahn Feb 7, 2024
b0a78f6
Merge branch 'users/fhahn/vplan-uniform-scalar-lanes' into vplan-vect…
fhahn Feb 7, 2024
e6d2db8
!fixup Address latest comments, thanks!
fhahn Feb 7, 2024
5065331
!Fixup split generateInstruction into per-part and per lane.
fhahn Feb 7, 2024
f38d682
!fixup address comments in VPlanTransforms.cpp, thanks!
fhahn Feb 7, 2024
a166da5
Merge branch 'main' into users/fhahn/vplan-uniform-scalar-lanes
fhahn Feb 8, 2024
df9cad0
Merge branch 'users/fhahn/vplan-uniform-scalar-lanes' into vplan-vect…
fhahn Feb 8, 2024
ab14184
Merge remote-tracking branch 'origin/main' into vplan-vector-ptr-iv-t…
fhahn Feb 26, 2024
133776f
!fixup fix things after update to main.
fhahn Feb 26, 2024
d8173fb
Merge remote-tracking branch 'origin/main' into vplan-vector-ptr-iv-t…
fhahn Mar 1, 2024
0bb9f5c
!fixup fix formatting.
fhahn Mar 1, 2024
3a698c0
Merge remote-tracking branch 'origin/main' into vplan-vector-ptr-iv-t…
fhahn Mar 7, 2024
1e41111
!fixup address latest comments, thanks!
fhahn Mar 7, 2024
8d05e99
Merge remote-tracking branch 'origin/main' into vplan-vector-ptr-iv-t…
fhahn Mar 11, 2024
6f4516f
fixup address comments.
fhahn Mar 11, 2024
5f4e4aa
Merge remote-tracking branch 'origin/main' into vplan-vector-ptr-iv-t…
fhahn Mar 11, 2024
c936a4e
Merge remote-tracking branch 'origin/main' into vplan-vector-ptr-iv-t…
fhahn Mar 18, 2024
a9df1d9
!fixup address latest comments, thanks!
fhahn Mar 18, 2024
9f68460
Merge remote-tracking branch 'origin/main' into vplan-vector-ptr-iv-t…
fhahn Mar 25, 2024
74cb095
!fixup address comments, thansk!
fhahn Mar 25, 2024
4211565
Merge remote-tracking branch 'origin/main' into vplan-vector-ptr-iv-t…
fhahn Mar 26, 2024
643969c
!fixup address latest comments, thanks!
fhahn Mar 26, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 2 additions & 33 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9111,42 +9111,11 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
"Not a pointer induction according to InductionDescriptor!");
assert(cast<PHINode>(getUnderlyingInstr())->getType()->isPointerTy() &&
"Unexpected type.");
assert(!onlyScalarsGenerated(State.VF.isScalable()) &&
"Recipe should have been replaced");

auto *IVR = getParent()->getPlan()->getCanonicalIV();
PHINode *CanonicalIV = cast<PHINode>(State.get(IVR, 0, /*IsScalar*/ true));

if (onlyScalarsGenerated(State.VF.isScalable())) {
// This is the normalized GEP that starts counting at zero.
Value *PtrInd = State.Builder.CreateSExtOrTrunc(
CanonicalIV, IndDesc.getStep()->getType());
// Determine the number of scalars we need to generate for each unroll
// iteration. If the instruction is uniform, we only need to generate the
// first lane. Otherwise, we generate all VF values.
bool IsUniform = vputils::onlyFirstLaneUsed(this);
assert((IsUniform || !State.VF.isScalable()) &&
"Cannot scalarize a scalable VF");
unsigned Lanes = IsUniform ? 1 : State.VF.getFixedValue();

for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *PartStart =
createStepForVF(State.Builder, PtrInd->getType(), State.VF, Part);

for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
Value *Idx = State.Builder.CreateAdd(
PartStart, ConstantInt::get(PtrInd->getType(), Lane));
Value *GlobalIdx = State.Builder.CreateAdd(PtrInd, Idx);

Value *Step = State.get(getOperand(1), VPIteration(Part, Lane));
Value *SclrGep = emitTransformedIndex(
State.Builder, GlobalIdx, IndDesc.getStartValue(), Step,
IndDesc.getKind(), IndDesc.getInductionBinOp());
SclrGep->setName("next.gep");
State.set(this, SclrGep, VPIteration(Part, Lane));
}
}
return;
}

Type *PhiType = IndDesc.getStep()->getType();

// Build a pointer phi
Expand Down
7 changes: 2 additions & 5 deletions llvm/lib/Transforms/Vectorize/VPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -860,11 +860,8 @@ void VPlan::execute(VPTransformState *State) {
Phi = cast<PHINode>(State->get(R.getVPSingleValue(), 0));
} else {
auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
// TODO: Split off the case that all users of a pointer phi are scalar
// from the VPWidenPointerInductionRecipe.
if (WidenPhi->onlyScalarsGenerated(State->VF.isScalable()))
continue;

assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
"recipe generating only scalars should have been replaced");
auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi, 0));
Phi = cast<PHINode>(GEP->getPointerOperand());
}
Expand Down
41 changes: 31 additions & 10 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -1155,6 +1155,10 @@ class VPInstruction : public VPRecipeWithIRFlags {
BranchOnCount,
BranchOnCond,
ComputeReductionResult,
// Add an offset in bytes (second operand) to a base pointer (first
// operand). Only generates scalar values (either for the first lane only or
// for all lanes, depending on its uses).
PtrAdd,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worth documenting somewhere what this VPInstruction/Opcode represents, including being scalar.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added, thanks!

};

private:
Expand All @@ -1164,11 +1168,28 @@ class VPInstruction : public VPRecipeWithIRFlags {
/// An optional name that can be used for the generated IR instruction.
const std::string Name;

/// Utility method serving execute(): generates a single instance of the
/// modeled instruction. \returns the generated value for \p Part.
/// In some cases an existing value is returned rather than a generated
/// Returns true if this VPInstruction generates scalar values for all lanes.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding some context? E.g.,
// Most VPInstructions generate a single value per part, either vector or scalar. VPReplicateRecipe takes care of generating multiple (scalar) values per all lanes, stemming from an original ingredient. This method identifies the (rare) cases of VPInstructions that do so as well, w/o an underlying ingredient.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added, thanks!

/// Most VPInstructions generate a single value per part, either vector or
/// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
/// values per all lanes, stemming from an original ingredient. This method
/// identifies the (rare) cases of VPInstructions that do so as well, w/o an
/// underlying ingredient.
bool doesGeneratePerAllLanes() const;

/// Returns true if we can generate a scalar for the first lane only if
/// needed.
bool canGenerateScalarForFirstLane() const;

/// Utility methods serving execute(): generates a single instance of the
/// modeled instruction for a given part. \returns the generated value for \p
/// Part. In some cases an existing value is returned rather than a generated
/// one.
Value *generateInstruction(VPTransformState &State, unsigned Part);
Value *generatePerPart(VPTransformState &State, unsigned Part);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worth updating documentation?
generateInstructionPerPart(), generateInstructionPerLane()??

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated, thanks!


/// Utility methods serving execute(): generates a scalar single instance of
/// the modeled instruction for a given lane. \returns the scalar generated
/// value for lane \p Lane.
Value *generatePerLane(VPTransformState &State, const VPIteration &Lane);

#if !defined(NDEBUG)
/// Return true if the VPInstruction is a floating point math operation, i.e.
Expand Down Expand Up @@ -2491,12 +2512,6 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe {
/// for floating point inductions.
const FPMathOperator *FPBinOp;

VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind,
const FPMathOperator *FPBinOp, VPValue *Start,
VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step)
: VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}),
Kind(Kind), FPBinOp(FPBinOp) {}

public:
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start,
VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step)
Expand All @@ -2505,6 +2520,12 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe {
dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
Start, CanonicalIV, Step) {}

VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind,
const FPMathOperator *FPBinOp, VPValue *Start,
VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step)
: VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}),
Kind(Kind), FPBinOp(FPBinOp) {}

~VPDerivedIVRecipe() override = default;

VPRecipeBase *clone() override {
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
CachedTypes[OtherV] = ResTy;
return ResTy;
}
case VPInstruction::PtrAdd:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: worth asserting and caching the type of the other operand, i.e., join the above cases of ICmp and FOR Splice?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PtrAdd's operand have different types, with the first one being a pointer and the second one being an integer offset.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, right, of course. Perhaps worth a comment.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added, thanks!

// Return the type based on the pointer argument (i.e. first operand).
return inferScalarType(R->getOperand(0));
default:
break;
}
Expand Down
79 changes: 68 additions & 11 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPInstruction::Not:
case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::PtrAdd:
return false;
default:
return true;
Expand Down Expand Up @@ -270,10 +271,39 @@ VPInstruction::VPInstruction(unsigned Opcode,
assert(isFPMathOp() && "this op can't take fast-math flags");
}

Value *VPInstruction::generateInstruction(VPTransformState &State,
unsigned Part) {
bool VPInstruction::doesGeneratePerAllLanes() const {
return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this);
}

bool VPInstruction::canGenerateScalarForFirstLane() const {
if (Instruction::isBinaryOp(getOpcode()))
return true;

switch (Opcode) {
case VPInstruction::BranchOnCond:
case VPInstruction::BranchOnCount:
case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::ComputeReductionResult:
case VPInstruction::PtrAdd:
return true;
default:
return false;
}
}

Value *VPInstruction::generatePerLane(VPTransformState &State,
const VPIteration &Lane) {
IRBuilderBase &Builder = State.Builder;

assert(getOpcode() == VPInstruction::PtrAdd &&
"only PtrAdd opcodes are supported for now");
return Builder.CreatePtrAdd(State.get(getOperand(0), Lane),
State.get(getOperand(1), Lane), Name);
}

Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
IRBuilderBase &Builder = State.Builder;
Builder.SetCurrentDebugLocation(getDebugLoc());

if (Instruction::isBinaryOp(getOpcode())) {
bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
Expand Down Expand Up @@ -490,6 +520,13 @@ Value *VPInstruction::generateInstruction(VPTransformState &State,

return ReducedPartRdx;
}
case VPInstruction::PtrAdd: {
assert(vputils::onlyFirstLaneUsed(this) &&
"can only generate first lane for PtrAdd");
Value *Ptr = State.get(getOperand(0), Part, /* IsScalar */ true);
Value *Addend = State.get(getOperand(1), Part, /* IsScalar */ true);
return Builder.CreatePtrAdd(Ptr, Addend, Name);
}
default:
llvm_unreachable("Unsupported opcode for instruction");
}
Expand All @@ -514,17 +551,33 @@ void VPInstruction::execute(VPTransformState &State) {
"Recipe not a FPMathOp but has fast-math flags?");
if (hasFastMathFlags())
State.Builder.setFastMathFlags(getFastMathFlags());
State.Builder.SetCurrentDebugLocation(getDebugLoc());
bool GeneratesPerFirstLaneOnly =
canGenerateScalarForFirstLane() &&
(vputils::onlyFirstLaneUsed(this) ||
getOpcode() == VPInstruction::ComputeReductionResult);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Unrelated to this patch, while we're here: why is ComputeReductionResult an exception?)

bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *GeneratedValue = generateInstruction(State, Part);
if (!hasResult())
if (GeneratesPerAllLanes) {
for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue();
Lane != NumLanes; ++Lane) {
Value *GeneratedValue = generatePerLane(State, VPIteration(Part, Lane));
assert(GeneratedValue && "generatePerLane must produce a value");
State.set(this, GeneratedValue, VPIteration(Part, Lane));
}
continue;
assert(GeneratedValue && "generateInstruction must produce a value");
}

bool IsVector = GeneratedValue->getType()->isVectorTy();
State.set(this, GeneratedValue, Part, !IsVector);
assert((IsVector || getOpcode() == VPInstruction::ComputeReductionResult ||
State.VF.isScalar() || vputils::onlyFirstLaneUsed(this)) &&
"scalar value but not only first lane used");
Value *GeneratedValue = generatePerPart(State, Part);
if (!hasResult())
continue;
assert(GeneratedValue && "generatePerPart must produce a value");
assert((GeneratedValue->getType()->isVectorTy() ==
!GeneratesPerFirstLaneOnly ||
State.VF.isScalar()) &&
"scalar value but not only first lane defined");
State.set(this, GeneratedValue, Part,
/*IsScalar*/ GeneratesPerFirstLaneOnly);
}
}

Expand All @@ -537,6 +590,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
default:
return false;
case Instruction::ICmp:
case VPInstruction::PtrAdd:
// TODO: Cover additional opcodes.
return vputils::onlyFirstLaneUsed(this);
case VPInstruction::ActiveLaneMask:
Expand Down Expand Up @@ -594,6 +648,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::ComputeReductionResult:
O << "compute-reduction-result";
break;
case VPInstruction::PtrAdd:
O << "ptradd";
break;
default:
O << Instruction::getOpcodeName(getOpcode());
}
Expand Down
66 changes: 53 additions & 13 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -498,15 +498,18 @@ static void removeDeadRecipes(VPlan &Plan) {
}
}

static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID,
static VPValue *createScalarIVSteps(VPlan &Plan,
InductionDescriptor::InductionKind Kind,
Instruction::BinaryOps InductionOpcode,
FPMathOperator *FPBinOp,
ScalarEvolution &SE, Instruction *TruncI,
VPValue *StartV, VPValue *Step,
VPBasicBlock::iterator IP) {
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
VPSingleDefRecipe *BaseIV = CanonicalIV;
if (!CanonicalIV->isCanonical(ID.getKind(), StartV, Step)) {
BaseIV = new VPDerivedIVRecipe(ID, StartV, CanonicalIV, Step);
if (!CanonicalIV->isCanonical(Kind, StartV, Step)) {
BaseIV = new VPDerivedIVRecipe(Kind, FPBinOp, StartV, CanonicalIV, Step);
HeaderVPBB->insert(BaseIV, IP);
}

Expand Down Expand Up @@ -536,21 +539,56 @@ static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID,
VecPreheader->appendRecipe(Step->getDefiningRecipe());
}

VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe(ID, BaseIV, Step);
VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe(
BaseIV, Step, InductionOpcode,
FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags());
HeaderVPBB->insert(Steps, IP);
return Steps;
}

/// If any user of a VPWidenIntOrFpInductionRecipe needs scalar values,
/// provide them by building scalar steps off of the canonical scalar IV and
/// update the original IV's users. This is an optional optimization to reduce
/// the needs of vector extracts.
static void optimizeInductions(VPlan &Plan, ScalarEvolution &SE) {
/// Legalize VPWidenPointerInductionRecipe, by replacing it with a PtrAdd
/// (IndStart, ScalarIVSteps (0, Step)) if only its scalar values are used, as
/// VPWidenPointerInductionRecipe will generate vectors only. If some users
/// require vectors while other require scalars, the scalar uses need to extract
/// the scalars from the generated vectors (Note that this is different to how
/// int/fp inductions are handled). Also optimize VPWidenIntOrFpInductionRecipe,
/// if any of its users needs scalar values, by providing them scalar steps
/// built on the canonical scalar IV and update the original IV's users. This is
/// an optional optimization to reduce the needs of vector extracts.
static void legalizeAndOptimizeInductions(VPlan &Plan, ScalarEvolution &SE) {
SmallVector<VPRecipeBase *> ToRemove;
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
bool HasOnlyVectorVFs = !Plan.hasVF(ElementCount::getFixed(1));
VPBasicBlock::iterator InsertPt = HeaderVPBB->getFirstNonPhi();
for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
// Replace wide pointer inductions which have only their scalars used by
// PtrAdd(IndStart, ScalarIVSteps (0, Step)).
if (auto *PtrIV = dyn_cast<VPWidenPointerInductionRecipe>(&Phi)) {
if (!PtrIV->onlyScalarsGenerated(Plan.hasScalableVF()))
continue;

const InductionDescriptor &ID = PtrIV->getInductionDescriptor();
VPValue *StartV = Plan.getVPValueOrAddLiveIn(
ConstantInt::get(ID.getStep()->getType(), 0));
VPValue *StepV = PtrIV->getOperand(1);
VPRecipeBase *Steps =
createScalarIVSteps(Plan, InductionDescriptor::IK_IntInduction,
Instruction::Add, nullptr, SE, nullptr, StartV,
StepV, InsertPt)
->getDefiningRecipe();

auto *Recipe =
new VPInstruction(VPInstruction::PtrAdd,
{PtrIV->getStartValue(), Steps->getVPSingleValue()},
PtrIV->getDebugLoc(), "next.gep");

Recipe->insertAfter(Steps);
PtrIV->replaceAllUsesWith(Recipe);
continue;
}

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps worth moving here some of the documentation above that described what happens next.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added a comment, thanks!

// Replace widened induction with scalar steps for users that only use
// scalars.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be good if these two cases that createScalarIVSteps for scalar users only, would share something.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think try to share createScalarIVSteps for both would make things more complicated, as they have a lot of different arguments. left as is for now.

auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
if (!WideIV)
continue;
Expand All @@ -560,9 +598,11 @@ static void optimizeInductions(VPlan &Plan, ScalarEvolution &SE) {
continue;

const InductionDescriptor &ID = WideIV->getInductionDescriptor();
VPValue *Steps = createScalarIVSteps(Plan, ID, SE, WideIV->getTruncInst(),
WideIV->getStartValue(),
WideIV->getStepValue(), InsertPt);
VPValue *Steps = createScalarIVSteps(
Plan, ID.getKind(), ID.getInductionOpcode(),
dyn_cast_or_null<FPMathOperator>(ID.getInductionBinOp()), SE,
WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
InsertPt);

// Update scalar users of IV to use Step instead.
if (!HasOnlyVectorVFs)
Expand Down Expand Up @@ -1025,7 +1065,7 @@ void VPlanTransforms::optimize(VPlan &Plan, ScalarEvolution &SE) {
removeRedundantInductionCasts(Plan);

simplifyRecipes(Plan, SE.getContext());
optimizeInductions(Plan, SE);
legalizeAndOptimizeInductions(Plan, SE);
removeDeadRecipes(Plan);

createAndOptimizeReplicateRegions(Plan);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ define void @g(ptr %dst.1, ptr %start, i64 %N) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 8
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[LCSSA_PTR_IV_1]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr double, ptr [[NEXT_GEP]], i32 0
; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP5]], align 8
Expand Down
Loading