-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[LoopVectorize] Vectorize select-cmp reduction pattern for increasing integer induction variable #67812
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[LoopVectorize] Vectorize select-cmp reduction pattern for increasing integer induction variable #67812
Changes from all commits
6bdc81f
bcab2a6
dfa355b
734da15
2c896c4
1946c8c
e8d5b1d
d2bfe2f
4a88b44
d21b127
2fe0a94
7ebc7d8
3e2e9f1
11900cd
2924bf9
dd21cd3
76e91cc
4f743ab
556743a
a852eb6
4947e0f
6e739d8
7d2a8ec
e530a3a
e50439e
ab0b4d3
97e0433
69cd172
5ebec5d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -51,6 +51,8 @@ bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurKind Kind) { | |
case RecurKind::UMin: | ||
case RecurKind::IAnyOf: | ||
case RecurKind::FAnyOf: | ||
case RecurKind::IFindLastIV: | ||
case RecurKind::FFindLastIV: | ||
return true; | ||
} | ||
return false; | ||
|
@@ -372,7 +374,7 @@ bool RecurrenceDescriptor::AddReductionVar( | |
// type-promoted). | ||
if (Cur != Start) { | ||
ReduxDesc = | ||
isRecurrenceInstr(TheLoop, Phi, Cur, Kind, ReduxDesc, FuncFMF); | ||
isRecurrenceInstr(TheLoop, Phi, Cur, Kind, ReduxDesc, FuncFMF, SE); | ||
ExactFPMathInst = ExactFPMathInst == nullptr | ||
? ReduxDesc.getExactFPMathInst() | ||
: ExactFPMathInst; | ||
|
@@ -658,6 +660,95 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi, | |
: RecurKind::FAnyOf); | ||
} | ||
|
||
// We are looking for loops that do something like this: | ||
// int r = 0; | ||
// for (int i = 0; i < n; i++) { | ||
// if (src[i] > 3) | ||
// r = i; | ||
// } | ||
// The reduction value (r) is derived from either the values of an increasing | ||
// induction variable (i) sequence, or from the start value (0). | ||
// The LLVM IR generated for such loops would be as follows: | ||
// for.body: | ||
// %r = phi i32 [ %spec.select, %for.body ], [ 0, %entry ] | ||
// %i = phi i32 [ %inc, %for.body ], [ 0, %entry ] | ||
// ... | ||
// %cmp = icmp sgt i32 %5, 3 | ||
// %spec.select = select i1 %cmp, i32 %i, i32 %r | ||
// %inc = add nsw i32 %i, 1 | ||
// ... | ||
// Since 'i' is an increasing induction variable, the reduction value after the | ||
// loop will be the maximum value of 'i' that the condition (src[i] > 3) is | ||
// satisfied, or the start value (0 in the example above). When the start value | ||
// of the increasing induction variable 'i' is greater than the minimum value of | ||
// the data type, we can use the minimum value of the data type as a sentinel | ||
// value to replace the start value. This allows us to perform a single | ||
// reduction max operation to obtain the final reduction result. | ||
// TODO: It is possible to solve the case where the start value is the minimum | ||
// value of the data type or a non-constant value by using mask and multiple | ||
// reduction operations. | ||
RecurrenceDescriptor::InstDesc | ||
RecurrenceDescriptor::isFindLastIVPattern(Loop *TheLoop, PHINode *OrigPhi, | ||
Instruction *I, ScalarEvolution &SE) { | ||
// TODO: Support the vectorization of FindLastIV when the reduction phi is | ||
// used by more than one select instruction. This vectorization is only | ||
// performed when the SCEV of each increasing induction variable used by the | ||
// select instructions is identical. | ||
if (!OrigPhi->hasOneUse()) | ||
return InstDesc(false, I); | ||
|
||
// TODO: Match selects with multi-use cmp conditions. | ||
Value *NonRdxPhi = nullptr; | ||
if (!match(I, m_CombineOr(m_Select(m_OneUse(m_Cmp()), m_Value(NonRdxPhi), | ||
m_Specific(OrigPhi)), | ||
m_Select(m_OneUse(m_Cmp()), m_Specific(OrigPhi), | ||
m_Value(NonRdxPhi))))) | ||
return InstDesc(false, I); | ||
|
||
auto IsIncreasingLoopInduction = [&](Value *V) { | ||
Type *Ty = V->getType(); | ||
if (!SE.isSCEVable(Ty)) | ||
return false; | ||
|
||
auto *AR = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(V)); | ||
if (!AR || AR->getLoop() != TheLoop) | ||
return false; | ||
|
||
const SCEV *Step = AR->getStepRecurrence(SE); | ||
if (!SE.isKnownPositive(Step)) | ||
return false; | ||
|
||
const ConstantRange IVRange = SE.getSignedRange(AR); | ||
unsigned NumBits = Ty->getIntegerBitWidth(); | ||
// Keep the minimum value of the recurrence type as the sentinel value. | ||
// The maximum acceptable range for the increasing induction variable, | ||
// called the valid range, will be defined as | ||
// [<sentinel value> + 1, <sentinel value>) | ||
// where <sentinel value> is SignedMin(<recurrence type>) | ||
// TODO: This range restriction can be lifted by adding an additional | ||
// virtual OR reduction. | ||
const APInt Sentinel = APInt::getSignedMinValue(NumBits); | ||
Mel-Chen marked this conversation as resolved.
Show resolved
Hide resolved
|
||
const ConstantRange ValidRange = | ||
Mel-Chen marked this conversation as resolved.
Show resolved
Hide resolved
|
||
ConstantRange::getNonEmpty(Sentinel + 1, Sentinel); | ||
LLVM_DEBUG(dbgs() << "LV: FindLastIV valid range is " << ValidRange | ||
<< ", and the signed range of " << *AR << " is " | ||
<< IVRange << "\n"); | ||
// Ensure the induction variable does not wrap around by verifying that its | ||
// range is fully contained within the valid range. | ||
return ValidRange.contains(IVRange); | ||
Mel-Chen marked this conversation as resolved.
Show resolved
Hide resolved
|
||
}; | ||
|
||
// We are looking for selects of the form: | ||
Mel-Chen marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// select(cmp(), phi, increasing_loop_induction) or | ||
// select(cmp(), increasing_loop_induction, phi) | ||
// TODO: Support for monotonically decreasing induction variable | ||
if (!IsIncreasingLoopInduction(NonRdxPhi)) | ||
return InstDesc(false, I); | ||
|
||
return InstDesc(I, isa<ICmpInst>(I->getOperand(0)) ? RecurKind::IFindLastIV | ||
: RecurKind::FFindLastIV); | ||
} | ||
|
||
RecurrenceDescriptor::InstDesc | ||
RecurrenceDescriptor::isMinMaxPattern(Instruction *I, RecurKind Kind, | ||
const InstDesc &Prev) { | ||
|
@@ -756,10 +847,9 @@ RecurrenceDescriptor::isConditionalRdxPattern(RecurKind Kind, Instruction *I) { | |
return InstDesc(true, SI); | ||
} | ||
|
||
RecurrenceDescriptor::InstDesc | ||
RecurrenceDescriptor::isRecurrenceInstr(Loop *L, PHINode *OrigPhi, | ||
Instruction *I, RecurKind Kind, | ||
InstDesc &Prev, FastMathFlags FuncFMF) { | ||
RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr( | ||
Loop *L, PHINode *OrigPhi, Instruction *I, RecurKind Kind, InstDesc &Prev, | ||
FastMathFlags FuncFMF, ScalarEvolution *SE) { | ||
assert(Prev.getRecKind() == RecurKind::None || Prev.getRecKind() == Kind); | ||
switch (I->getOpcode()) { | ||
default: | ||
|
@@ -789,6 +879,8 @@ RecurrenceDescriptor::isRecurrenceInstr(Loop *L, PHINode *OrigPhi, | |
if (Kind == RecurKind::FAdd || Kind == RecurKind::FMul || | ||
Kind == RecurKind::Add || Kind == RecurKind::Mul) | ||
return isConditionalRdxPattern(Kind, I); | ||
if (isFindLastIVRecurrenceKind(Kind) && SE) | ||
return isFindLastIVPattern(L, OrigPhi, I, *SE); | ||
[[fallthrough]]; | ||
case Instruction::FCmp: | ||
case Instruction::ICmp: | ||
|
@@ -893,6 +985,15 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop, | |
<< *Phi << "\n"); | ||
return true; | ||
} | ||
if (AddReductionVar(Phi, RecurKind::IFindLastIV, TheLoop, FMF, RedDes, DB, AC, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we not also need one for FFindLastIV? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good question. We don't need to call There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The AnyOf recurrence kinds do the same thing, although there is a separate check for We would only see that when looking at iv-descriptors debug output, and there's no tests for that. This code at least reports the correct type for an fcmp. I figure it's fine with just the single case here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks like #118393 is attempting to improve this issue. |
||
DT, SE)) { | ||
LLVM_DEBUG(dbgs() << "Found a " | ||
<< (RedDes.getRecurrenceKind() == RecurKind::FFindLastIV | ||
? "F" | ||
: "I") | ||
<< "FindLastIV reduction PHI." << *Phi << "\n"); | ||
return true; | ||
} | ||
if (AddReductionVar(Phi, RecurKind::FMul, TheLoop, FMF, RedDes, DB, AC, DT, | ||
SE)) { | ||
LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n"); | ||
|
@@ -1048,12 +1149,14 @@ unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) { | |
case RecurKind::UMax: | ||
case RecurKind::UMin: | ||
case RecurKind::IAnyOf: | ||
case RecurKind::IFindLastIV: | ||
return Instruction::ICmp; | ||
case RecurKind::FMax: | ||
case RecurKind::FMin: | ||
case RecurKind::FMaximum: | ||
case RecurKind::FMinimum: | ||
case RecurKind::FAnyOf: | ||
case RecurKind::FFindLastIV: | ||
return Instruction::FCmp; | ||
default: | ||
llvm_unreachable("Unknown recurrence operation"); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5185,8 +5185,9 @@ LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF, | |
HasReductions && | ||
any_of(Legal->getReductionVars(), [&](auto &Reduction) -> bool { | ||
const RecurrenceDescriptor &RdxDesc = Reduction.second; | ||
return RecurrenceDescriptor::isAnyOfRecurrenceKind( | ||
RdxDesc.getRecurrenceKind()); | ||
RecurKind RK = RdxDesc.getRecurrenceKind(); | ||
return RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) || | ||
RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. needs test without forced interleave count ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I want to confirm whether you're trying to test this message?
or if you simply want to see test results without |
||
}); | ||
if (HasSelectCmpReductions) { | ||
LLVM_DEBUG(dbgs() << "LV: Not interleaving select-cmp reductions.\n"); | ||
|
@@ -9449,8 +9450,10 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( | |
|
||
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); | ||
RecurKind Kind = RdxDesc.getRecurrenceKind(); | ||
assert(!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) && | ||
"AnyOf reductions are not allowed for in-loop reductions"); | ||
assert( | ||
!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) && | ||
!RecurrenceDescriptor::isFindLastIVRecurrenceKind(Kind) && | ||
"AnyOf and FindLast reductions are not allowed for in-loop reductions"); | ||
|
||
// Collect the chain of "link" recipes for the reduction starting at PhiR. | ||
SetVector<VPSingleDefRecipe *> Worklist; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we get rid of the closure in favor of direct returns? I see you mention matching multiple selects above, but those would be evaluated independently later, since
AddReductionVar
will continue evaluating users.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Your comment suggests removing the lambda function
IsIncreasingLoopInduction
and replacing it with a direct return statements, if I understand correctly.This is indeed possible, but I worry that it could reduce the readability, making it difficult for developers to understand that this code section is confirming whether an induction variable is increasing.
Could you suggest a better approach that balances readability?