@@ -1105,10 +1105,10 @@ static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan,
1105
1105
1106
1106
// Calculate the widest type required for known TC, VF and UF.
1107
1107
auto ComputeBitWidth = [](APInt TC, uint64_t Align) {
1108
- auto AlignedTC =
1108
+ APInt AlignedTC =
1109
1109
Align * APIntOps::RoundingUDiv (TC, APInt (TC.getBitWidth (), Align),
1110
1110
APInt::Rounding::UP);
1111
- auto MaxVal = AlignedTC - 1 ;
1111
+ APInt MaxVal = AlignedTC - 1 ;
1112
1112
return std::max<unsigned >(PowerOf2Ceil (MaxVal.getActiveBits ()), 8 );
1113
1113
};
1114
1114
unsigned NewBitWidth =
@@ -1122,6 +1122,10 @@ static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan,
1122
1122
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion ()->getEntryBasicBlock ();
1123
1123
for (VPRecipeBase &Phi : HeaderVPBB->phis ()) {
1124
1124
auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
1125
+
1126
+ // Currently only handle canonical IVs as it is trivial to replace the start
1127
+ // and stop values, and we only perform the optimisation when the IV is only
1128
+ // used by the comparison controlling loop control-flow.
1125
1129
if (!WideIV || !WideIV->isCanonical () ||
1126
1130
WideIV->hasMoreThanOneUniqueUser () ||
1127
1131
NewIVTy == WideIV->getScalarType ())
@@ -1145,7 +1149,7 @@ static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan,
1145
1149
auto *NewBTC = new VPWidenCastRecipe (
1146
1150
Instruction::Trunc, Plan.getOrCreateBackedgeTakenCount (), NewIVTy);
1147
1151
Plan.getVectorPreheader ()->appendRecipe (NewBTC);
1148
- auto *Cmp = dyn_cast <VPInstruction>(*WideIV->user_begin ());
1152
+ auto *Cmp = cast <VPInstruction>(*WideIV->user_begin ());
1149
1153
Cmp->setOperand (1 , NewBTC);
1150
1154
1151
1155
MadeChange = true ;
0 commit comments