@@ -14078,16 +14078,43 @@ static Instruction *tryGetSecondaryReductionRoot(PHINode *Phi,
14078
14078
return nullptr;
14079
14079
}
14080
14080
14081
+ /// \p Returns the first operand of \p I that does not match \p Phi. If
14082
+ /// operand is not an instruction it returns nullptr.
14083
+ static Instruction *getNonPhiOperand(Instruction *I, PHINode *Phi) {
14084
+ Value *Op0 = nullptr;
14085
+ Value *Op1 = nullptr;
14086
+ if (!matchRdxBop(I, Op0, Op1))
14087
+ return nullptr;
14088
+ return dyn_cast<Instruction>(Op0 == Phi ? Op1 : Op0);
14089
+ }
14090
+
14091
+ /// \Returns true if \p I is a candidate instruction for reduction vectorization.
14092
+ static bool isReductionCandidate(Instruction *I) {
14093
+ bool IsSelect = match(I, m_Select(m_Value(), m_Value(), m_Value()));
14094
+ Value *B0 = nullptr, *B1 = nullptr;
14095
+ bool IsBinop = matchRdxBop(I, B0, B1);
14096
+ return IsBinop || IsSelect;
14097
+ }
14098
+
14081
14099
bool SLPVectorizerPass::vectorizeHorReduction(
14082
14100
PHINode *P, Instruction *Root, BasicBlock *BB, BoUpSLP &R, TargetTransformInfo *TTI,
14083
14101
SmallVectorImpl<WeakTrackingVH> &PostponedInsts) {
14084
14102
if (!ShouldVectorizeHor)
14085
14103
return false;
14086
- if (!isa<BinaryOperator>(Root))
14087
- P = nullptr;
14104
+ bool TryOperandsAsNewSeeds = P && isa<BinaryOperator>(Root);
14088
14105
14089
14106
if (Root->getParent() != BB || isa<PHINode>(Root))
14090
14107
return false;
14108
+
14109
+ // If we can find a secondary reduction root, use that instead.
14110
+ auto SelectRoot = [&]() {
14111
+ if (TryOperandsAsNewSeeds && isReductionCandidate(Root) &&
14112
+ HorizontalReduction::getRdxKind(Root) != RecurKind::None)
14113
+ if (Instruction *NewRoot = tryGetSecondaryReductionRoot(P, Root))
14114
+ return NewRoot;
14115
+ return Root;
14116
+ };
14117
+
14091
14118
// Start analysis starting from Root instruction. If horizontal reduction is
14092
14119
// found, try to vectorize it. If it is not a horizontal reduction or
14093
14120
// vectorization is not possible or not effective, and currently analyzed
@@ -14100,28 +14127,32 @@ bool SLPVectorizerPass::vectorizeHorReduction(
14100
14127
// If a horizintal reduction was not matched or vectorized we collect
14101
14128
// instructions for possible later attempts for vectorization.
14102
14129
std::queue<std::pair<Instruction *, unsigned>> Stack;
14103
- Stack.emplace(Root , 0);
14130
+ Stack.emplace(SelectRoot() , 0);
14104
14131
SmallPtrSet<Value *, 8> VisitedInstrs;
14105
14132
bool Res = false;
14106
- auto &&TryToReduce = [this, TTI, &P, &R](Instruction *Inst, Value *&B0,
14107
- Value *&B1) -> Value * {
14133
+ auto &&TryToReduce = [this, TTI, &R](Instruction *Inst) -> Value * {
14108
14134
if (R.isAnalyzedReductionRoot(Inst))
14109
14135
return nullptr;
14110
- bool IsBinop = matchRdxBop( Inst, B0, B1);
14111
- bool IsSelect = match(Inst, m_Select(m_Value(), m_Value(), m_Value())) ;
14112
- if (IsBinop || IsSelect) {
14113
- assert((!P || is_contained(P->operands(), Inst)) &&
14114
- "Phi needs to use the binary operator") ;
14115
- if (P && HorizontalReduction::getRdxKind(Inst) != RecurKind::None)
14116
- if (Instruction *NewRoot = tryGetSecondaryReductionRoot(P, Inst))
14117
- Inst = NewRoot;
14118
-
14119
- HorizontalReduction HorRdx ;
14120
- if (HorRdx.matchAssociativeReduction(Inst, *SE, *DL, *TLI) )
14121
- return HorRdx.tryToReduce(R, TTI, *TLI) ;
14136
+ if (!isReductionCandidate( Inst))
14137
+ return nullptr ;
14138
+ HorizontalReduction HorRdx;
14139
+ if (!HorRdx.matchAssociativeReduction(Inst, *SE, *DL, *TLI))
14140
+ return nullptr ;
14141
+ return HorRdx.tryToReduce(R, TTI, *TLI);
14142
+ };
14143
+ auto TryAppendToPostponedInsts = [&](Instruction *FutureSeed) {
14144
+ if (TryOperandsAsNewSeeds && FutureSeed == Root) {
14145
+ FutureSeed = getNonPhiOperand(Root, P) ;
14146
+ if (!FutureSeed )
14147
+ return false ;
14122
14148
}
14123
- return nullptr;
14149
+ // Do not collect CmpInst or InsertElementInst/InsertValueInst as their
14150
+ // analysis is done separately.
14151
+ if (!isa<CmpInst, InsertElementInst, InsertValueInst>(FutureSeed))
14152
+ PostponedInsts.push_back(FutureSeed);
14153
+ return true;
14124
14154
};
14155
+
14125
14156
while (!Stack.empty()) {
14126
14157
Instruction *Inst;
14127
14158
unsigned Level;
@@ -14132,37 +14163,19 @@ bool SLPVectorizerPass::vectorizeHorReduction(
14132
14163
// iteration while stack was populated before that happened.
14133
14164
if (R.isDeleted(Inst))
14134
14165
continue;
14135
- Value *B0 = nullptr, *B1 = nullptr;
14136
- if (Value *V = TryToReduce(Inst, B0, B1)) {
14166
+ if (Value *VectorizedV = TryToReduce(Inst)) {
14137
14167
Res = true;
14138
- // Set P to nullptr to avoid re-analysis of phi node in
14139
- // matchAssociativeReduction function unless this is the root node.
14140
- P = nullptr;
14141
- if (auto *I = dyn_cast<Instruction>(V)) {
14168
+ if (auto *I = dyn_cast<Instruction>(VectorizedV)) {
14142
14169
// Try to find another reduction.
14143
14170
Stack.emplace(I, Level);
14144
14171
continue;
14145
14172
}
14146
14173
} else {
14147
- bool IsBinop = B0 && B1;
14148
- if (P && IsBinop) {
14149
- Inst = dyn_cast<Instruction>(B0);
14150
- if (Inst == P)
14151
- Inst = dyn_cast<Instruction>(B1);
14152
- if (!Inst) {
14153
- // Set P to nullptr to avoid re-analysis of phi node in
14154
- // matchAssociativeReduction function unless this is the root node.
14155
- P = nullptr;
14156
- continue;
14157
- }
14174
+ // We could not vectorize `Inst` so try to use it as a future seed.
14175
+ if (!TryAppendToPostponedInsts(Inst)) {
14176
+ assert(Stack.empty() && "Expected empty stack");
14177
+ break;
14158
14178
}
14159
- // Set P to nullptr to avoid re-analysis of phi node in
14160
- // matchAssociativeReduction function unless this is the root node.
14161
- P = nullptr;
14162
- // Do not collect CmpInst or InsertElementInst/InsertValueInst as their
14163
- // analysis is done separately.
14164
- if (!isa<CmpInst, InsertElementInst, InsertValueInst>(Inst))
14165
- PostponedInsts.push_back(Inst);
14166
14179
}
14167
14180
14168
14181
// Try to vectorize operands.
0 commit comments