@@ -1015,10 +1015,10 @@ static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan,
1015
1015
1016
1016
// Calculate the widest type required for known TC, VF and UF.
1017
1017
auto ComputeBitWidth = [](APInt TC, uint64_t Align) {
1018
- auto AlignedTC =
1018
+ APInt AlignedTC =
1019
1019
Align * APIntOps::RoundingUDiv (TC, APInt (TC.getBitWidth (), Align),
1020
1020
APInt::Rounding::UP);
1021
- auto MaxVal = AlignedTC - 1 ;
1021
+ APInt MaxVal = AlignedTC - 1 ;
1022
1022
return std::max<unsigned >(PowerOf2Ceil (MaxVal.getActiveBits ()), 8 );
1023
1023
};
1024
1024
unsigned NewBitWidth =
@@ -1032,6 +1032,10 @@ static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan,
1032
1032
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion ()->getEntryBasicBlock ();
1033
1033
for (VPRecipeBase &Phi : HeaderVPBB->phis ()) {
1034
1034
auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
1035
+
1036
+ // Currently only handle canonical IVs as it is trivial to replace the start
1037
+ // and stop values, and we only perform the optimisation when the IV is only
1038
+ // used by the comparison controlling loop control-flow.
1035
1039
if (!WideIV || !WideIV->isCanonical () ||
1036
1040
WideIV->hasMoreThanOneUniqueUser () ||
1037
1041
NewIVTy == WideIV->getScalarType ())
@@ -1055,7 +1059,7 @@ static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan,
1055
1059
auto *NewBTC = new VPWidenCastRecipe (
1056
1060
Instruction::Trunc, Plan.getOrCreateBackedgeTakenCount (), NewIVTy);
1057
1061
Plan.getVectorPreheader ()->appendRecipe (NewBTC);
1058
- auto *Cmp = dyn_cast <VPInstruction>(*WideIV->user_begin ());
1062
+ auto *Cmp = cast <VPInstruction>(*WideIV->user_begin ());
1059
1063
Cmp->setOperand (1 , NewBTC);
1060
1064
1061
1065
MadeChange = true ;
0 commit comments