@@ -6653,19 +6653,22 @@ class HorizontalReduction {
6653
6653
6654
6654
// / Attempt to vectorize the tree found by
6655
6655
// / matchAssociativeReduction.
6656
- bool tryToReduce (BoUpSLP &V, TargetTransformInfo *TTI) {
6656
+ bool tryToReduce (BoUpSLP &V, TargetTransformInfo *TTI, bool Try2WayRdx ) {
6657
6657
if (ReducedVals.empty ())
6658
6658
return false ;
6659
6659
6660
6660
// If there is a sufficient number of reduction values, reduce
6661
6661
// to a nearby power-of-2. Can safely generate oversized
6662
6662
// vectors and rely on the backend to split them to legal sizes.
6663
6663
unsigned NumReducedVals = ReducedVals.size ();
6664
- if (NumReducedVals < 4 )
6664
+ if (Try2WayRdx && NumReducedVals != 2 )
6665
+ return false ;
6666
+ unsigned MinRdxVals = Try2WayRdx ? 2 : 4 ;
6667
+ if (NumReducedVals < MinRdxVals)
6665
6668
return false ;
6666
6669
6667
6670
unsigned ReduxWidth = PowerOf2Floor (NumReducedVals);
6668
-
6671
+ unsigned MinRdxWidth = Log2_32 (MinRdxVals);
6669
6672
Value *VectorizedTree = nullptr ;
6670
6673
6671
6674
// FIXME: Fast-math-flags should be set based on the instructions in the
@@ -6701,7 +6704,7 @@ class HorizontalReduction {
6701
6704
SmallVector<Value *, 16 > IgnoreList;
6702
6705
for (auto &V : ReductionOps)
6703
6706
IgnoreList.append (V.begin (), V.end ());
6704
- while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > 2 ) {
6707
+ while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > MinRdxWidth ) {
6705
6708
auto VL = makeArrayRef (&ReducedVals[i], ReduxWidth);
6706
6709
V.buildTree (VL, ExternallyUsedValues, IgnoreList);
6707
6710
Optional<ArrayRef<unsigned >> Order = V.bestOrder ();
@@ -7045,7 +7048,7 @@ static Value *getReductionValue(const DominatorTree *DT, PHINode *P,
7045
7048
// / performed.
7046
7049
static bool tryToVectorizeHorReductionOrInstOperands (
7047
7050
PHINode *P, Instruction *Root, BasicBlock *BB, BoUpSLP &R,
7048
- TargetTransformInfo *TTI,
7051
+ TargetTransformInfo *TTI, bool Try2WayRdx,
7049
7052
const function_ref<bool (Instruction *, BoUpSLP &)> Vectorize) {
7050
7053
if (!ShouldVectorizeHor)
7051
7054
return false ;
@@ -7076,7 +7079,7 @@ static bool tryToVectorizeHorReductionOrInstOperands(
7076
7079
if (BI || SI) {
7077
7080
HorizontalReduction HorRdx;
7078
7081
if (HorRdx.matchAssociativeReduction (P, Inst)) {
7079
- if (HorRdx.tryToReduce (R, TTI)) {
7082
+ if (HorRdx.tryToReduce (R, TTI, Try2WayRdx )) {
7080
7083
Res = true ;
7081
7084
// Set P to nullptr to avoid re-analysis of phi node in
7082
7085
// matchAssociativeReduction function unless this is the root node.
@@ -7119,7 +7122,8 @@ static bool tryToVectorizeHorReductionOrInstOperands(
7119
7122
7120
7123
bool SLPVectorizerPass::vectorizeRootInstruction (PHINode *P, Value *V,
7121
7124
BasicBlock *BB, BoUpSLP &R,
7122
- TargetTransformInfo *TTI) {
7125
+ TargetTransformInfo *TTI,
7126
+ bool Try2WayRdx) {
7123
7127
if (!V)
7124
7128
return false ;
7125
7129
auto *I = dyn_cast<Instruction>(V);
@@ -7132,7 +7136,7 @@ bool SLPVectorizerPass::vectorizeRootInstruction(PHINode *P, Value *V,
7132
7136
auto &&ExtraVectorization = [this ](Instruction *I, BoUpSLP &R) -> bool {
7133
7137
return tryToVectorize (I, R);
7134
7138
};
7135
- return tryToVectorizeHorReductionOrInstOperands (P, I, BB, R, TTI,
7139
+ return tryToVectorizeHorReductionOrInstOperands (P, I, BB, R, TTI, Try2WayRdx,
7136
7140
ExtraVectorization);
7137
7141
}
7138
7142
@@ -7328,6 +7332,23 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
7328
7332
PostProcessInstructions.push_back (&*it);
7329
7333
}
7330
7334
7335
+ // Make a final attempt to match a 2-way reduction if nothing else worked.
7336
+ // We do not try this above because it may interfere with other vectorization
7337
+ // attempts.
7338
+ // TODO: The constraints are copied from the above call to
7339
+ // vectorizeRootInstruction(), but that might be too restrictive?
7340
+ BasicBlock::iterator LastInst = --BB->end ();
7341
+ if (!Changed && LastInst->use_empty () &&
7342
+ (LastInst->getType ()->isVoidTy () || isa<CallInst>(LastInst) ||
7343
+ isa<InvokeInst>(LastInst))) {
7344
+ if (ShouldStartVectorizeHorAtStore || !isa<StoreInst>(LastInst)) {
7345
+ for (auto *V : LastInst->operand_values ()) {
7346
+ Changed |= vectorizeRootInstruction (nullptr , V, BB, R, TTI,
7347
+ /* Try2WayRdx */ true );
7348
+ }
7349
+ }
7350
+ }
7351
+
7331
7352
return Changed;
7332
7353
}
7333
7354
0 commit comments