Skip to content

[VPlan] Explicitly handle scalar pointer inductions. #83068

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 30 commits into from
Mar 26, 2024
Merged
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
f4dabdf
[VPlan] Update VPInst::onlyFirstLaneUsed to check users.
fhahn Jan 31, 2024
b08e892
[VPlan] Consistently use (Part, 0) for first lane scalar values
fhahn Jan 31, 2024
f56e217
Merge branch 'main' into users/fhahn/vplan-uniform-scalar-lanes
fhahn Feb 3, 2024
172dbf6
!fixup fix merge
fhahn Feb 3, 2024
916a7d2
[VPlan] Explicitly handle scalar pointer inductions.
fhahn Jan 29, 2024
d2c51ec
Merge branch 'main' into users/fhahn/vplan-uniform-scalar-lanes
fhahn Feb 6, 2024
82d74df
Merge branch 'main' into users/fhahn/vplan-uniform-scalar-lanes
fhahn Feb 7, 2024
c6797e6
!fixup address latest comments, thanks!
fhahn Feb 7, 2024
53f2937
!fixup fix formatting
fhahn Feb 7, 2024
b0a78f6
Merge branch 'users/fhahn/vplan-uniform-scalar-lanes' into vplan-vect…
fhahn Feb 7, 2024
e6d2db8
!fixup Address latest comments, thanks!
fhahn Feb 7, 2024
5065331
!Fixup split generateInstruction into per-part and per lane.
fhahn Feb 7, 2024
f38d682
!fixup address comments in VPlanTransforms.cpp, thanks!
fhahn Feb 7, 2024
a166da5
Merge branch 'main' into users/fhahn/vplan-uniform-scalar-lanes
fhahn Feb 8, 2024
df9cad0
Merge branch 'users/fhahn/vplan-uniform-scalar-lanes' into vplan-vect…
fhahn Feb 8, 2024
ab14184
Merge remote-tracking branch 'origin/main' into vplan-vector-ptr-iv-t…
fhahn Feb 26, 2024
133776f
!fixup fix things after update to main.
fhahn Feb 26, 2024
d8173fb
Merge remote-tracking branch 'origin/main' into vplan-vector-ptr-iv-t…
fhahn Mar 1, 2024
0bb9f5c
!fixup fix formatting.
fhahn Mar 1, 2024
3a698c0
Merge remote-tracking branch 'origin/main' into vplan-vector-ptr-iv-t…
fhahn Mar 7, 2024
1e41111
!fixup address latest comments, thanks!
fhahn Mar 7, 2024
8d05e99
Merge remote-tracking branch 'origin/main' into vplan-vector-ptr-iv-t…
fhahn Mar 11, 2024
6f4516f
fixup address comments.
fhahn Mar 11, 2024
5f4e4aa
Merge remote-tracking branch 'origin/main' into vplan-vector-ptr-iv-t…
fhahn Mar 11, 2024
c936a4e
Merge remote-tracking branch 'origin/main' into vplan-vector-ptr-iv-t…
fhahn Mar 18, 2024
a9df1d9
!fixup address latest comments, thanks!
fhahn Mar 18, 2024
9f68460
Merge remote-tracking branch 'origin/main' into vplan-vector-ptr-iv-t…
fhahn Mar 25, 2024
74cb095
!fixup address comments, thansk!
fhahn Mar 25, 2024
4211565
Merge remote-tracking branch 'origin/main' into vplan-vector-ptr-iv-t…
fhahn Mar 26, 2024
643969c
!fixup address latest comments, thanks!
fhahn Mar 26, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 2 additions & 33 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9122,42 +9122,11 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
"Not a pointer induction according to InductionDescriptor!");
assert(cast<PHINode>(getUnderlyingInstr())->getType()->isPointerTy() &&
"Unexpected type.");
assert(!onlyScalarsGenerated(State.VF.isScalable()) &&
"Recipe should have been replaced");

auto *IVR = getParent()->getPlan()->getCanonicalIV();
PHINode *CanonicalIV = cast<PHINode>(State.get(IVR, 0, /*IsScalar*/ true));

if (onlyScalarsGenerated(State.VF.isScalable())) {
// This is the normalized GEP that starts counting at zero.
Value *PtrInd = State.Builder.CreateSExtOrTrunc(
CanonicalIV, IndDesc.getStep()->getType());
// Determine the number of scalars we need to generate for each unroll
// iteration. If the instruction is uniform, we only need to generate the
// first lane. Otherwise, we generate all VF values.
bool IsUniform = vputils::onlyFirstLaneUsed(this);
assert((IsUniform || !State.VF.isScalable()) &&
"Cannot scalarize a scalable VF");
unsigned Lanes = IsUniform ? 1 : State.VF.getFixedValue();

for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *PartStart =
createStepForVF(State.Builder, PtrInd->getType(), State.VF, Part);

for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
Value *Idx = State.Builder.CreateAdd(
PartStart, ConstantInt::get(PtrInd->getType(), Lane));
Value *GlobalIdx = State.Builder.CreateAdd(PtrInd, Idx);

Value *Step = State.get(getOperand(1), VPIteration(Part, Lane));
Value *SclrGep = emitTransformedIndex(
State.Builder, GlobalIdx, IndDesc.getStartValue(), Step,
IndDesc.getKind(), IndDesc.getInductionBinOp());
SclrGep->setName("next.gep");
State.set(this, SclrGep, VPIteration(Part, Lane));
}
}
return;
}

Type *PhiType = IndDesc.getStep()->getType();

// Build a pointer phi
Expand Down
7 changes: 2 additions & 5 deletions llvm/lib/Transforms/Vectorize/VPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -860,11 +860,8 @@ void VPlan::execute(VPTransformState *State) {
Phi = cast<PHINode>(State->get(R.getVPSingleValue(), 0));
} else {
auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
// TODO: Split off the case that all users of a pointer phi are scalar
// from the VPWidenPointerInductionRecipe.
if (WidenPhi->onlyScalarsGenerated(State->VF.isScalable()))
continue;

assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
"recipe generating only scalars should have been replaced");
auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi, 0));
Phi = cast<PHINode>(GEP->getPointerOperand());
}
Expand Down
36 changes: 26 additions & 10 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -1155,6 +1155,10 @@ class VPInstruction : public VPRecipeWithIRFlags {
BranchOnCount,
BranchOnCond,
ComputeReductionResult,
// Add an offset in bytes (second operand) to a base pointer (first
// operand). Only generates scalar values (either for the first lane only or
// for all lanes, depending on its uses).
PtrAdd,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worth documenting somewhere what this VPInstruction/Opcode represents, including being scalar.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added, thanks!

};

private:
Expand All @@ -1164,11 +1168,23 @@ class VPInstruction : public VPRecipeWithIRFlags {
/// An optional name that can be used for the generated IR instruction.
const std::string Name;

/// Utility method serving execute(): generates a single instance of the
/// modeled instruction. \returns the generated value for \p Part.
/// In some cases an existing value is returned rather than a generated
/// Returns true if this VPInstruction generates scalar values for all lanes.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding some context? E.g.,
// Most VPInstructions generate a single value per part, either vector or scalar. VPReplicateRecipe takes care of generating multiple (scalar) values per all lanes, stemming from an original ingredient. This method identifies the (rare) cases of VPInstructions that do so as well, w/o an underlying ingredient.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added, thanks!

bool doesGeneratePerAllLanes() const;

/// Returns true if we can generate a scalar for the first lane only if
/// needed.
bool canGenerateScalarForFirstLane() const;

/// Utility methods serving execute(): generates a single instance of the
/// modeled instruction for a given part. \returns the generated value for \p
/// Part. In some cases an existing value is returned rather than a generated
/// one.
Value *generateInstruction(VPTransformState &State, unsigned Part);
Value *generatePerPart(VPTransformState &State, unsigned Part);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worth updating documentation?
generateInstructionPerPart(), generateInstructionPerLane()??

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated, thanks!


/// Utility methods serving execute(): generates a scalar single instance of
/// the modeled instruction for a given lane. \returns the scalar generated
/// value for lane \p Lane.
Value *generatePerLane(VPTransformState &State, const VPIteration &Lane);

#if !defined(NDEBUG)
/// Return true if the VPInstruction is a floating point math operation, i.e.
Expand Down Expand Up @@ -2490,12 +2506,6 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe {
/// for floating point inductions.
const FPMathOperator *FPBinOp;

VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind,
const FPMathOperator *FPBinOp, VPValue *Start,
VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step)
: VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}),
Kind(Kind), FPBinOp(FPBinOp) {}

public:
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start,
VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step)
Expand All @@ -2504,6 +2514,12 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe {
dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
Start, CanonicalIV, Step) {}

VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind,
const FPMathOperator *FPBinOp, VPValue *Start,
VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step)
: VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}),
Kind(Kind), FPBinOp(FPBinOp) {}

~VPDerivedIVRecipe() override = default;

VPRecipeBase *clone() override {
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
CachedTypes[OtherV] = ResTy;
return ResTy;
}
case VPInstruction::PtrAdd:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: worth asserting and caching the type of the other operand, i.e., join the above cases of ICmp and FOR Splice?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PtrAdd's operand have different types, with the first one being a pointer and the second one being an integer offset.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, right, of course. Perhaps worth a comment.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added, thanks!

return inferScalarType(R->getOperand(0));
default:
break;
}
Expand Down
79 changes: 69 additions & 10 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPInstruction::Not:
case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::PtrAdd:
return false;
default:
return true;
Expand Down Expand Up @@ -270,10 +271,47 @@ VPInstruction::VPInstruction(unsigned Opcode,
assert(isFPMathOp() && "this op can't take fast-math flags");
}

Value *VPInstruction::generateInstruction(VPTransformState &State,
unsigned Part) {
bool VPInstruction::doesGeneratePerAllLanes() const {
return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this);
}

bool VPInstruction::canGenerateScalarForFirstLane() const {
if (Instruction::isBinaryOp(getOpcode()))
return true;

switch (Opcode) {
case VPInstruction::BranchOnCond:
case VPInstruction::BranchOnCount:
case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::ComputeReductionResult:
case VPInstruction::PtrAdd:
return true;
default:
return false;
}
}

Value *VPInstruction::generatePerLane(VPTransformState &State,
const VPIteration &Lane) {
IRBuilderBase &Builder = State.Builder;

switch (getOpcode()) {
case VPInstruction::PtrAdd: {
auto *P = Builder.CreatePtrAdd(State.get(getOperand(0), Lane),
State.get(getOperand(1), Lane), Name);
return P;
}
default: {
Value *Res = generatePerPart(State, Lane.Part);
Copy link
Collaborator

@ayalz ayalz Mar 21, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, this raises an eyebrow, or two.
When/is it hard for the caller to accurately call for generatePerLane, rather than generatePerPart? If the former is caller for all lanes, this may end up generating VF copies per same part. Could some assert guard against this from happening, e.g., Lane must be zero?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a consequence of generatePerLane being responsible to generate all lanes and/or the first lane, while generatePerPart can either generate the vector or scalar per-part, but would need to also support PtrAdd. It seems clearer to flip around the delegation and have generatePerPart call generatePerLane for PtrAdd. Updated.

assert(!hasResult() || !Res->getType()->isVectorTy());
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

missing error message.

This assert is taken care of by the caller of generatePerPart(); could this be done consistently?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Assert is gone now.

return Res;
}
}
}

Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
IRBuilderBase &Builder = State.Builder;
Builder.SetCurrentDebugLocation(getDebugLoc());

if (Instruction::isBinaryOp(getOpcode())) {
bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
Expand Down Expand Up @@ -349,7 +387,8 @@ Value *VPInstruction::generateInstruction(VPTransformState &State,
Value *Step =
createStepForVF(Builder, ScalarTC->getType(), State.VF, State.UF);
Value *Sub = Builder.CreateSub(ScalarTC, Step);
Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step);
Value *Cmp =
Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: unrelated clang-format fix?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

undone, thanks!

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

still visible, missing a commit?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be undone now, need to be careful to avoid clang-format from undoing the change.

Value *Zero = ConstantInt::get(ScalarTC->getType(), 0);
return Builder.CreateSelect(Cmp, Sub, Zero);
}
Expand Down Expand Up @@ -511,17 +550,33 @@ void VPInstruction::execute(VPTransformState &State) {
"Recipe not a FPMathOp but has fast-math flags?");
if (hasFastMathFlags())
State.Builder.setFastMathFlags(getFastMathFlags());
State.Builder.SetCurrentDebugLocation(getDebugLoc());
bool OnlyGenerateFirstLane =
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
bool OnlyGenerateFirstLane =
bool GeneratesPerFirstLaneOnly =

to be more consistent?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, thanks!

canGenerateScalarForFirstLane() &&
(vputils::onlyFirstLaneUsed(this) ||
getOpcode() == VPInstruction::ComputeReductionResult);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Unrelated to this patch, while we're here: why is ComputeReductionResult an exception?)

bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *GeneratedValue = generateInstruction(State, Part);
if (GeneratesPerAllLanes) {
for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue();
Lane != NumLanes; ++Lane) {
Value *P = generatePerLane(State, VPIteration(Part, Lane));
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can assert P here, consistent with asserts of GeneratedValue below.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added, thanks!

State.set(this, P, VPIteration(Part, Lane));
}
continue;
}

Value *GeneratedValue = OnlyGenerateFirstLane
? generatePerLane(State, VPIteration(Part, 0))
: generatePerPart(State, Part);
if (!hasResult())
continue;
assert(GeneratedValue && "generateInstruction must produce a value");
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

generatePerPart rather than generateInstruction.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed, thanks!


bool IsVector = GeneratedValue->getType()->isVectorTy();
State.set(this, GeneratedValue, Part, !IsVector);
assert((IsVector || getOpcode() == VPInstruction::ComputeReductionResult ||
State.VF.isScalar() || vputils::onlyFirstLaneUsed(this)) &&
"scalar value but not only first lane used");
State.set(this, GeneratedValue, Part, /*IsScalar*/ OnlyGenerateFirstLane);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: better to set State after the following assert.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Order flipped, thanks!

assert((GeneratedValue->getType()->isVectorTy() == !OnlyGenerateFirstLane ||
State.VF.isScalar()) &&
"scalar value but not only first lane defined");
}
}

Expand All @@ -534,6 +589,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
default:
return false;
case Instruction::ICmp:
case VPInstruction::PtrAdd:
// TODO: Cover additional opcodes.
return vputils::onlyFirstLaneUsed(this);
case VPInstruction::ActiveLaneMask:
Expand Down Expand Up @@ -591,6 +647,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::ComputeReductionResult:
O << "compute-reduction-result";
break;
case VPInstruction::PtrAdd:
O << "ptradd";
break;
default:
O << Instruction::getOpcodeName(getOpcode());
}
Expand Down
60 changes: 49 additions & 11 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -499,15 +499,18 @@ static void removeDeadRecipes(VPlan &Plan) {
}
}

static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID,
static VPValue *createScalarIVSteps(VPlan &Plan,
InductionDescriptor::InductionKind Kind,
Instruction::BinaryOps InductionOpcode,
FPMathOperator *FPBinOp,
ScalarEvolution &SE, Instruction *TruncI,
VPValue *StartV, VPValue *Step,
VPBasicBlock::iterator IP) {
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
VPSingleDefRecipe *BaseIV = CanonicalIV;
if (!CanonicalIV->isCanonical(ID.getKind(), StartV, Step)) {
BaseIV = new VPDerivedIVRecipe(ID, StartV, CanonicalIV, Step);
if (!CanonicalIV->isCanonical(Kind, StartV, Step)) {
BaseIV = new VPDerivedIVRecipe(Kind, FPBinOp, StartV, CanonicalIV, Step);
HeaderVPBB->insert(BaseIV, IP);
}

Expand Down Expand Up @@ -537,21 +540,54 @@ static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID,
VecPreheader->appendRecipe(Step->getDefiningRecipe());
}

VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe(ID, BaseIV, Step);
VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe(
BaseIV, Step, InductionOpcode,
FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags());
HeaderVPBB->insert(Steps, IP);
return Steps;
}

/// If any user of a VPWidenIntOrFpInductionRecipe needs scalar values,
/// provide them by building scalar steps off of the canonical scalar IV and
/// Legalize VPWidenPointerInductionRecipe, by replacing it with a PtrAdd
/// (IndStart, ScalarIVSteps (0, Step)) if only its scalar values are used, as
/// VPWidenPointerInductionRecipe cannot generate scalars. Also optimize
Copy link
Collaborator

@ayalz ayalz Mar 21, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
/// VPWidenPointerInductionRecipe cannot generate scalars. Also optimize
/// VPWidenPointerInductionRecipe will generate vectors only. Also optimize

So if scalar are used along with vectors of widen pointer induction, the former will be extracted from the latter; as opposed to widening int or fp inductions, where scalar users are fed scalar steps even if there are also vector users. Right? This preserves current behavior, but worth noting the discrepancy, potentially removing it later.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tried to clarify, thanks!

/// VPWidenIntOrFpInductionRecipe, if any of its users needs scalar values, by
/// providing them by building scalar steps off of the canonical scalar IV and
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
/// providing them by building scalar steps off of the canonical scalar IV and
/// providing them scalar steps built on the canonical scalar IV and

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adjusted, thanks!

/// update the original IV's users. This is an optional optimization to reduce
/// the needs of vector extracts.
static void optimizeInductions(VPlan &Plan, ScalarEvolution &SE) {
static void legalizeAndOptimizeInductions(VPlan &Plan, ScalarEvolution &SE) {
SmallVector<VPRecipeBase *> ToRemove;
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
bool HasOnlyVectorVFs = !Plan.hasVF(ElementCount::getFixed(1));
VPBasicBlock::iterator InsertPt = HeaderVPBB->getFirstNonPhi();
for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
// Replace wide pointer inductions which have only their scalars used by
// PtrAdd(IndStart, ScalarIVSteps (0, Step)).
if (auto *PtrIV = dyn_cast<VPWidenPointerInductionRecipe>(&Phi)) {
if (!PtrIV->onlyScalarsGenerated(Plan.hasScalableVF()))
continue;

const InductionDescriptor &ID = PtrIV->getInductionDescriptor();
VPValue *StartV = Plan.getVPValueOrAddLiveIn(
ConstantInt::get(ID.getStep()->getType(), 0));
VPValue *StepV = PtrIV->getOperand(1);
VPRecipeBase *Steps =
createScalarIVSteps(Plan, InductionDescriptor::IK_IntInduction,
Instruction::Add, nullptr, SE, nullptr, StartV,
StepV, InsertPt)
->getDefiningRecipe();

auto *Recipe =
new VPInstruction(VPInstruction::PtrAdd,
{PtrIV->getStartValue(), Steps->getVPSingleValue()},
PtrIV->getDebugLoc(), "next.gep");

Recipe->insertAfter(Steps);
PtrIV->replaceAllUsesWith(Recipe);
continue;
}

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps worth moving here some of the documentation above that described what happens next.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added a comment, thanks!

// Replace widened induction with scalar steps for users that only use
// scalars.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be good if these two cases that createScalarIVSteps for scalar users only, would share something.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think try to share createScalarIVSteps for both would make things more complicated, as they have a lot of different arguments. left as is for now.

auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
if (!WideIV)
continue;
Expand All @@ -561,9 +597,11 @@ static void optimizeInductions(VPlan &Plan, ScalarEvolution &SE) {
continue;

const InductionDescriptor &ID = WideIV->getInductionDescriptor();
VPValue *Steps = createScalarIVSteps(Plan, ID, SE, WideIV->getTruncInst(),
WideIV->getStartValue(),
WideIV->getStepValue(), InsertPt);
VPValue *Steps = createScalarIVSteps(
Plan, ID.getKind(), ID.getInductionOpcode(),
dyn_cast_or_null<FPMathOperator>(ID.getInductionBinOp()), SE,
WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
InsertPt);

// Update scalar users of IV to use Step instead.
if (!HasOnlyVectorVFs)
Expand Down Expand Up @@ -1026,7 +1064,7 @@ void VPlanTransforms::optimize(VPlan &Plan, ScalarEvolution &SE) {
removeRedundantInductionCasts(Plan);

simplifyRecipes(Plan, SE.getContext());
optimizeInductions(Plan, SE);
legalizeAndOptimizeInductions(Plan, SE);
removeDeadRecipes(Plan);

createAndOptimizeReplicateRegions(Plan);
Expand Down
Loading