@@ -13000,179 +13000,227 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
13000
13000
return RHS;
13001
13001
}
13002
13002
13003
- // When the RHS is not invariant, we do not know the end bound of the loop and
13004
- // cannot calculate the ExactBECount needed by ExitLimit. However, we can
13005
- // calculate the MaxBECount, given the start, stride and max value for the end
13006
- // bound of the loop (RHS), and the fact that IV does not overflow (which is
13007
- // checked above).
13003
+ const SCEV *End = nullptr, *BECount = nullptr,
13004
+ *BECountIfBackedgeTaken = nullptr;
13008
13005
if (!isLoopInvariant(RHS, L)) {
13009
- const SCEV *MaxBECount = computeMaxBECountForLT(
13010
- Start, Stride, RHS, getTypeSizeInBits(LHS->getType()), IsSigned);
13011
- return ExitLimit(getCouldNotCompute() /* ExactNotTaken */, MaxBECount,
13012
- MaxBECount, false /*MaxOrZero*/, Predicates);
13013
- }
13014
-
13015
- // We use the expression (max(End,Start)-Start)/Stride to describe the
13016
- // backedge count, as if the backedge is taken at least once max(End,Start)
13017
- // is End and so the result is as above, and if not max(End,Start) is Start
13018
- // so we get a backedge count of zero.
13019
- const SCEV *BECount = nullptr;
13020
- auto *OrigStartMinusStride = getMinusSCEV(OrigStart, Stride);
13021
- assert(isAvailableAtLoopEntry(OrigStartMinusStride, L) && "Must be!");
13022
- assert(isAvailableAtLoopEntry(OrigStart, L) && "Must be!");
13023
- assert(isAvailableAtLoopEntry(OrigRHS, L) && "Must be!");
13024
- // Can we prove (max(RHS,Start) > Start - Stride?
13025
- if (isLoopEntryGuardedByCond(L, Cond, OrigStartMinusStride, OrigStart) &&
13026
- isLoopEntryGuardedByCond(L, Cond, OrigStartMinusStride, OrigRHS)) {
13027
- // In this case, we can use a refined formula for computing backedge taken
13028
- // count. The general formula remains:
13029
- // "End-Start /uceiling Stride" where "End = max(RHS,Start)"
13030
- // We want to use the alternate formula:
13031
- // "((End - 1) - (Start - Stride)) /u Stride"
13032
- // Let's do a quick case analysis to show these are equivalent under
13033
- // our precondition that max(RHS,Start) > Start - Stride.
13034
- // * For RHS <= Start, the backedge-taken count must be zero.
13035
- // "((End - 1) - (Start - Stride)) /u Stride" reduces to
13036
- // "((Start - 1) - (Start - Stride)) /u Stride" which simplies to
13037
- // "Stride - 1 /u Stride" which is indeed zero for all non-zero values
13038
- // of Stride. For 0 stride, we've use umin(1,Stride) above, reducing
13039
- // this to the stride of 1 case.
13040
- // * For RHS >= Start, the backedge count must be "RHS-Start /uceil Stride".
13041
- // "((End - 1) - (Start - Stride)) /u Stride" reduces to
13042
- // "((RHS - 1) - (Start - Stride)) /u Stride" reassociates to
13043
- // "((RHS - (Start - Stride) - 1) /u Stride".
13044
- // Our preconditions trivially imply no overflow in that form.
13045
- const SCEV *MinusOne = getMinusOne(Stride->getType());
13046
- const SCEV *Numerator =
13047
- getMinusSCEV(getAddExpr(RHS, MinusOne), getMinusSCEV(Start, Stride));
13048
- BECount = getUDivExpr(Numerator, Stride);
13049
- }
13050
-
13051
- const SCEV *BECountIfBackedgeTaken = nullptr;
13052
- if (!BECount) {
13053
- auto canProveRHSGreaterThanEqualStart = [&]() {
13054
- auto CondGE = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
13055
- const SCEV *GuardedRHS = applyLoopGuards(OrigRHS, L);
13056
- const SCEV *GuardedStart = applyLoopGuards(OrigStart, L);
13057
-
13058
- if (isLoopEntryGuardedByCond(L, CondGE, OrigRHS, OrigStart) ||
13059
- isKnownPredicate(CondGE, GuardedRHS, GuardedStart))
13060
- return true;
13061
-
13062
- // (RHS > Start - 1) implies RHS >= Start.
13063
- // * "RHS >= Start" is trivially equivalent to "RHS > Start - 1" if
13064
- // "Start - 1" doesn't overflow.
13065
- // * For signed comparison, if Start - 1 does overflow, it's equal
13066
- // to INT_MAX, and "RHS >s INT_MAX" is trivially false.
13067
- // * For unsigned comparison, if Start - 1 does overflow, it's equal
13068
- // to UINT_MAX, and "RHS >u UINT_MAX" is trivially false.
13069
- //
13070
- // FIXME: Should isLoopEntryGuardedByCond do this for us?
13071
- auto CondGT = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
13072
- auto *StartMinusOne = getAddExpr(OrigStart,
13073
- getMinusOne(OrigStart->getType()));
13074
- return isLoopEntryGuardedByCond(L, CondGT, OrigRHS, StartMinusOne);
13075
- };
13076
-
13077
- // If we know that RHS >= Start in the context of loop, then we know that
13078
- // max(RHS, Start) = RHS at this point.
13079
- const SCEV *End;
13080
- if (canProveRHSGreaterThanEqualStart()) {
13081
- End = RHS;
13082
- } else {
13083
- // If RHS < Start, the backedge will be taken zero times. So in
13084
- // general, we can write the backedge-taken count as:
13006
+ const auto *RHSAddRec = dyn_cast<SCEVAddRecExpr>(RHS);
13007
+ if (PositiveStride && RHSAddRec != nullptr && RHSAddRec->getLoop() == L &&
13008
+ RHSAddRec->getNoWrapFlags()) {
13009
+ // The structure of loop we are trying to calculate backedge count of:
13085
13010
//
13086
- // RHS >= Start ? ceil(RHS - Start) / Stride : 0
13011
+ // left = left_start
13012
+ // right = right_start
13087
13013
//
13088
- // We convert it to the following to make it more convenient for SCEV:
13014
+ // while(left < right){
13015
+ // ... do something here ...
13016
+ // left += s1; // stride of left is s1 (s1 > 0)
13017
+ // right += s2; // stride of right is s2 (s2 < 0)
13018
+ // }
13089
13019
//
13090
- // ceil(max(RHS, Start) - Start) / Stride
13091
- End = IsSigned ? getSMaxExpr(RHS, Start) : getUMaxExpr(RHS, Start);
13092
13020
13093
- // See what would happen if we assume the backedge is taken. This is
13094
- // used to compute MaxBECount.
13095
- BECountIfBackedgeTaken = getUDivCeilSCEV(getMinusSCEV(RHS, Start), Stride);
13096
- }
13021
+ const SCEV *RHSStart = RHSAddRec->getStart();
13022
+ const SCEV *RHSStride = RHSAddRec->getStepRecurrence(*this);
13097
13023
13098
- // At this point, we know:
13099
- //
13100
- // 1. If IsSigned, Start <=s End; otherwise, Start <=u End
13101
- // 2. The index variable doesn't overflow.
13102
- //
13103
- // Therefore, we know N exists such that
13104
- // (Start + Stride * N) >= End, and computing "(Start + Stride * N)"
13105
- // doesn't overflow.
13106
- //
13107
- // Using this information, try to prove whether the addition in
13108
- // "(Start - End) + (Stride - 1)" has unsigned overflow.
13109
- const SCEV *One = getOne(Stride->getType());
13110
- bool MayAddOverflow = [&] {
13111
- if (auto *StrideC = dyn_cast<SCEVConstant>(Stride)) {
13112
- if (StrideC->getAPInt().isPowerOf2()) {
13113
- // Suppose Stride is a power of two, and Start/End are unsigned
13114
- // integers. Let UMAX be the largest representable unsigned
13115
- // integer.
13116
- //
13117
- // By the preconditions of this function, we know
13118
- // "(Start + Stride * N) >= End", and this doesn't overflow.
13119
- // As a formula:
13120
- //
13121
- // End <= (Start + Stride * N) <= UMAX
13122
- //
13123
- // Subtracting Start from all the terms:
13124
- //
13125
- // End - Start <= Stride * N <= UMAX - Start
13126
- //
13127
- // Since Start is unsigned, UMAX - Start <= UMAX. Therefore:
13128
- //
13129
- // End - Start <= Stride * N <= UMAX
13130
- //
13131
- // Stride * N is a multiple of Stride. Therefore,
13132
- //
13133
- // End - Start <= Stride * N <= UMAX - (UMAX mod Stride)
13134
- //
13135
- // Since Stride is a power of two, UMAX + 1 is divisible by Stride.
13136
- // Therefore, UMAX mod Stride == Stride - 1. So we can write:
13137
- //
13138
- // End - Start <= Stride * N <= UMAX - Stride - 1
13139
- //
13140
- // Dropping the middle term:
13141
- //
13142
- // End - Start <= UMAX - Stride - 1
13143
- //
13144
- // Adding Stride - 1 to both sides:
13145
- //
13146
- // (End - Start) + (Stride - 1) <= UMAX
13147
- //
13148
- // In other words, the addition doesn't have unsigned overflow.
13149
- //
13150
- // A similar proof works if we treat Start/End as signed values.
13151
- // Just rewrite steps before "End - Start <= Stride * N <= UMAX" to
13152
- // use signed max instead of unsigned max. Note that we're trying
13153
- // to prove a lack of unsigned overflow in either case.
13154
- return false;
13024
+ // If Stride - RHSStride is positive and does not overflow, we can write
13025
+ // backedge count as ->
13026
+ // ceil((End - Start) /u (Stride - RHSStride))
13027
+ // Where, End = max(RHSStart, Start)
13028
+
13029
+ // Check if RHSStride < 0 and Stride - RHSStride will not overflow.
13030
+ if (isKnownNegative(RHSStride) &&
13031
+ willNotOverflow(Instruction::Sub, /*Signed=*/true, Stride,
13032
+ RHSStride)) {
13033
+
13034
+ const SCEV *Denominator = getMinusSCEV(Stride, RHSStride);
13035
+ if (isKnownPositive(Denominator)) {
13036
+ End = IsSigned ? getSMaxExpr(RHSStart, Start)
13037
+ : getUMaxExpr(RHSStart, Start);
13038
+
13039
+ // We can do this because End >= Start, as End = max(RHSStart, Start)
13040
+ const SCEV *Delta = getMinusSCEV(End, Start);
13041
+
13042
+ BECount = getUDivCeilSCEV(Delta, Denominator);
13043
+ BECountIfBackedgeTaken =
13044
+ getUDivCeilSCEV(getMinusSCEV(RHSStart, Start), Denominator);
13155
13045
}
13156
13046
}
13157
- if (Start == Stride || Start == getMinusSCEV(Stride, One)) {
13158
- // If Start is equal to Stride, (End - Start) + (Stride - 1) == End - 1.
13159
- // If !IsSigned, 0 <u Stride == Start <=u End; so 0 <u End - 1 <u End.
13160
- // If IsSigned, 0 <s Stride == Start <=s End; so 0 <s End - 1 <s End.
13047
+ }
13048
+ if (BECount == nullptr) {
13049
+ // If we cannot calculate ExactBECount, we can calculate the MaxBECount,
13050
+ // given the start, stride and max value for the end bound of the
13051
+ // loop (RHS), and the fact that IV does not overflow (which is
13052
+ // checked above).
13053
+ const SCEV *MaxBECount = computeMaxBECountForLT(
13054
+ Start, Stride, RHS, getTypeSizeInBits(LHS->getType()), IsSigned);
13055
+ return ExitLimit(getCouldNotCompute() /* ExactNotTaken */, MaxBECount,
13056
+ MaxBECount, false /*MaxOrZero*/, Predicates);
13057
+ }
13058
+ } else {
13059
+ // We use the expression (max(End,Start)-Start)/Stride to describe the
13060
+ // backedge count, as if the backedge is taken at least once
13061
+ // max(End,Start) is End and so the result is as above, and if not
13062
+ // max(End,Start) is Start so we get a backedge count of zero.
13063
+ auto *OrigStartMinusStride = getMinusSCEV(OrigStart, Stride);
13064
+ assert(isAvailableAtLoopEntry(OrigStartMinusStride, L) && "Must be!");
13065
+ assert(isAvailableAtLoopEntry(OrigStart, L) && "Must be!");
13066
+ assert(isAvailableAtLoopEntry(OrigRHS, L) && "Must be!");
13067
+ // Can we prove (max(RHS,Start) > Start - Stride?
13068
+ if (isLoopEntryGuardedByCond(L, Cond, OrigStartMinusStride, OrigStart) &&
13069
+ isLoopEntryGuardedByCond(L, Cond, OrigStartMinusStride, OrigRHS)) {
13070
+ // In this case, we can use a refined formula for computing backedge
13071
+ // taken count. The general formula remains:
13072
+ // "End-Start /uceiling Stride" where "End = max(RHS,Start)"
13073
+ // We want to use the alternate formula:
13074
+ // "((End - 1) - (Start - Stride)) /u Stride"
13075
+ // Let's do a quick case analysis to show these are equivalent under
13076
+ // our precondition that max(RHS,Start) > Start - Stride.
13077
+ // * For RHS <= Start, the backedge-taken count must be zero.
13078
+ // "((End - 1) - (Start - Stride)) /u Stride" reduces to
13079
+ // "((Start - 1) - (Start - Stride)) /u Stride" which simplies to
13080
+ // "Stride - 1 /u Stride" which is indeed zero for all non-zero values
13081
+ // of Stride. For 0 stride, we've use umin(1,Stride) above,
13082
+ // reducing this to the stride of 1 case.
13083
+ // * For RHS >= Start, the backedge count must be "RHS-Start /uceil
13084
+ // Stride".
13085
+ // "((End - 1) - (Start - Stride)) /u Stride" reduces to
13086
+ // "((RHS - 1) - (Start - Stride)) /u Stride" reassociates to
13087
+ // "((RHS - (Start - Stride) - 1) /u Stride".
13088
+ // Our preconditions trivially imply no overflow in that form.
13089
+ const SCEV *MinusOne = getMinusOne(Stride->getType());
13090
+ const SCEV *Numerator =
13091
+ getMinusSCEV(getAddExpr(RHS, MinusOne), getMinusSCEV(Start, Stride));
13092
+ BECount = getUDivExpr(Numerator, Stride);
13093
+ }
13094
+
13095
+ if (!BECount) {
13096
+ auto canProveRHSGreaterThanEqualStart = [&]() {
13097
+ auto CondGE = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
13098
+ const SCEV *GuardedRHS = applyLoopGuards(OrigRHS, L);
13099
+ const SCEV *GuardedStart = applyLoopGuards(OrigStart, L);
13100
+
13101
+ if (isLoopEntryGuardedByCond(L, CondGE, OrigRHS, OrigStart) ||
13102
+ isKnownPredicate(CondGE, GuardedRHS, GuardedStart))
13103
+ return true;
13104
+
13105
+ // (RHS > Start - 1) implies RHS >= Start.
13106
+ // * "RHS >= Start" is trivially equivalent to "RHS > Start - 1" if
13107
+ // "Start - 1" doesn't overflow.
13108
+ // * For signed comparison, if Start - 1 does overflow, it's equal
13109
+ // to INT_MAX, and "RHS >s INT_MAX" is trivially false.
13110
+ // * For unsigned comparison, if Start - 1 does overflow, it's equal
13111
+ // to UINT_MAX, and "RHS >u UINT_MAX" is trivially false.
13161
13112
//
13162
- // If Start is equal to Stride - 1, (End - Start) + Stride - 1 == End.
13163
- return false;
13113
+ // FIXME: Should isLoopEntryGuardedByCond do this for us?
13114
+ auto CondGT = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
13115
+ auto *StartMinusOne =
13116
+ getAddExpr(OrigStart, getMinusOne(OrigStart->getType()));
13117
+ return isLoopEntryGuardedByCond(L, CondGT, OrigRHS, StartMinusOne);
13118
+ };
13119
+
13120
+ // If we know that RHS >= Start in the context of loop, then we know
13121
+ // that max(RHS, Start) = RHS at this point.
13122
+ if (canProveRHSGreaterThanEqualStart()) {
13123
+ End = RHS;
13124
+ } else {
13125
+ // If RHS < Start, the backedge will be taken zero times. So in
13126
+ // general, we can write the backedge-taken count as:
13127
+ //
13128
+ // RHS >= Start ? ceil(RHS - Start) / Stride : 0
13129
+ //
13130
+ // We convert it to the following to make it more convenient for SCEV:
13131
+ //
13132
+ // ceil(max(RHS, Start) - Start) / Stride
13133
+ End = IsSigned ? getSMaxExpr(RHS, Start) : getUMaxExpr(RHS, Start);
13134
+
13135
+ // See what would happen if we assume the backedge is taken. This is
13136
+ // used to compute MaxBECount.
13137
+ BECountIfBackedgeTaken =
13138
+ getUDivCeilSCEV(getMinusSCEV(RHS, Start), Stride);
13164
13139
}
13165
- return true;
13166
- }();
13167
13140
13168
- const SCEV *Delta = getMinusSCEV(End, Start);
13169
- if (!MayAddOverflow) {
13170
- // floor((D + (S - 1)) / S)
13171
- // We prefer this formulation if it's legal because it's fewer operations.
13172
- BECount =
13173
- getUDivExpr(getAddExpr(Delta, getMinusSCEV(Stride, One)), Stride);
13174
- } else {
13175
- BECount = getUDivCeilSCEV(Delta, Stride);
13141
+ // At this point, we know:
13142
+ //
13143
+ // 1. If IsSigned, Start <=s End; otherwise, Start <=u End
13144
+ // 2. The index variable doesn't overflow.
13145
+ //
13146
+ // Therefore, we know N exists such that
13147
+ // (Start + Stride * N) >= End, and computing "(Start + Stride * N)"
13148
+ // doesn't overflow.
13149
+ //
13150
+ // Using this information, try to prove whether the addition in
13151
+ // "(Start - End) + (Stride - 1)" has unsigned overflow.
13152
+ const SCEV *One = getOne(Stride->getType());
13153
+ bool MayAddOverflow = [&] {
13154
+ if (auto *StrideC = dyn_cast<SCEVConstant>(Stride)) {
13155
+ if (StrideC->getAPInt().isPowerOf2()) {
13156
+ // Suppose Stride is a power of two, and Start/End are unsigned
13157
+ // integers. Let UMAX be the largest representable unsigned
13158
+ // integer.
13159
+ //
13160
+ // By the preconditions of this function, we know
13161
+ // "(Start + Stride * N) >= End", and this doesn't overflow.
13162
+ // As a formula:
13163
+ //
13164
+ // End <= (Start + Stride * N) <= UMAX
13165
+ //
13166
+ // Subtracting Start from all the terms:
13167
+ //
13168
+ // End - Start <= Stride * N <= UMAX - Start
13169
+ //
13170
+ // Since Start is unsigned, UMAX - Start <= UMAX. Therefore:
13171
+ //
13172
+ // End - Start <= Stride * N <= UMAX
13173
+ //
13174
+ // Stride * N is a multiple of Stride. Therefore,
13175
+ //
13176
+ // End - Start <= Stride * N <= UMAX - (UMAX mod Stride)
13177
+ //
13178
+ // Since Stride is a power of two, UMAX + 1 is divisible by
13179
+ // Stride. Therefore, UMAX mod Stride == Stride - 1. So we can
13180
+ // write:
13181
+ //
13182
+ // End - Start <= Stride * N <= UMAX - Stride - 1
13183
+ //
13184
+ // Dropping the middle term:
13185
+ //
13186
+ // End - Start <= UMAX - Stride - 1
13187
+ //
13188
+ // Adding Stride - 1 to both sides:
13189
+ //
13190
+ // (End - Start) + (Stride - 1) <= UMAX
13191
+ //
13192
+ // In other words, the addition doesn't have unsigned overflow.
13193
+ //
13194
+ // A similar proof works if we treat Start/End as signed values.
13195
+ // Just rewrite steps before "End - Start <= Stride * N <= UMAX"
13196
+ // to use signed max instead of unsigned max. Note that we're
13197
+ // trying to prove a lack of unsigned overflow in either case.
13198
+ return false;
13199
+ }
13200
+ }
13201
+ if (Start == Stride || Start == getMinusSCEV(Stride, One)) {
13202
+ // If Start is equal to Stride, (End - Start) + (Stride - 1) == End
13203
+ // - 1. If !IsSigned, 0 <u Stride == Start <=u End; so 0 <u End - 1
13204
+ // <u End. If IsSigned, 0 <s Stride == Start <=s End; so 0 <s End -
13205
+ // 1 <s End.
13206
+ //
13207
+ // If Start is equal to Stride - 1, (End - Start) + Stride - 1 ==
13208
+ // End.
13209
+ return false;
13210
+ }
13211
+ return true;
13212
+ }();
13213
+
13214
+ const SCEV *Delta = getMinusSCEV(End, Start);
13215
+ if (!MayAddOverflow) {
13216
+ // floor((D + (S - 1)) / S)
13217
+ // We prefer this formulation if it's legal because it's fewer
13218
+ // operations.
13219
+ BECount =
13220
+ getUDivExpr(getAddExpr(Delta, getMinusSCEV(Stride, One)), Stride);
13221
+ } else {
13222
+ BECount = getUDivCeilSCEV(Delta, Stride);
13223
+ }
13176
13224
}
13177
13225
}
13178
13226
0 commit comments