@@ -4457,6 +4457,14 @@ bool LoopVectorizationPlanner::isCandidateForEpilogueVectorization(
4457
4457
return false ;
4458
4458
}
4459
4459
4460
+ // TODO: support epilogue vectorization for min/max with index.
4461
+ if (any_of (Legal->getReductionVars (), [](const auto &Reduction) {
4462
+ const RecurrenceDescriptor &RdxDesc = Reduction.second ;
4463
+ return RecurrenceDescriptor::isMinMaxIdxRecurrenceKind (
4464
+ RdxDesc.getRecurrenceKind ());
4465
+ }))
4466
+ return false ;
4467
+
4460
4468
// Epilogue vectorization code has not been auditted to ensure it handles
4461
4469
// non-latch exits properly. It may be fine, but it needs auditted and
4462
4470
// tested.
@@ -4901,7 +4909,8 @@ LoopVectorizationCostModel::selectInterleaveCount(VPlan &Plan, ElementCount VF,
4901
4909
const RecurrenceDescriptor &RdxDesc = Reduction.second ;
4902
4910
RecurKind RK = RdxDesc.getRecurrenceKind ();
4903
4911
return RecurrenceDescriptor::isAnyOfRecurrenceKind (RK) ||
4904
- RecurrenceDescriptor::isFindLastIVRecurrenceKind (RK);
4912
+ RecurrenceDescriptor::isFindLastIVRecurrenceKind (RK) ||
4913
+ RecurrenceDescriptor::isMinMaxIdxRecurrenceKind (RK);
4905
4914
});
4906
4915
if (HasSelectCmpReductions) {
4907
4916
LLVM_DEBUG (dbgs () << " LV: Not interleaving select-cmp reductions.\n " );
@@ -6618,6 +6627,10 @@ void LoopVectorizationCostModel::collectInLoopReductions() {
6618
6627
6619
6628
for (const auto &Reduction : Legal->getReductionVars ()) {
6620
6629
PHINode *Phi = Reduction.first ;
6630
+ // TODO: support in-loop min/max with index.
6631
+ if (Legal->isMinMaxRecurrence (Phi))
6632
+ continue ;
6633
+
6621
6634
const RecurrenceDescriptor &RdxDesc = Reduction.second ;
6622
6635
6623
6636
// We don't collect reductions that are type promoted (yet).
@@ -7231,6 +7244,8 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
7231
7244
EpiRedResult->getOpcode () != VPInstruction::ComputeFindLastIVResult))
7232
7245
return ;
7233
7246
7247
+ assert (EpiRedResult->getOpcode () != VPInstruction::ComputeMinMaxIdxResult);
7248
+
7234
7249
auto *EpiRedHeaderPhi =
7235
7250
cast<VPReductionPHIRecipe>(EpiRedResult->getOperand (0 ));
7236
7251
const RecurrenceDescriptor &RdxDesc =
@@ -8143,10 +8158,9 @@ void VPRecipeBuilder::collectScaledReductions(VFRange &Range) {
8143
8158
// Find all possible partial reductions.
8144
8159
SmallVector<std::pair<PartialReductionChain, unsigned >>
8145
8160
PartialReductionChains;
8146
- for (const auto &[Phi, RdxDesc] : Legal->getReductionVars ()) {
8147
- getScaledReductions (Phi, RdxDesc.getLoopExitInstr (), Range,
8148
- PartialReductionChains);
8149
- }
8161
+ for (const auto &[Phi, RdxDesc] : Legal->getReductionVars ())
8162
+ if (auto *ExitInstr = RdxDesc.getLoopExitInstr ())
8163
+ getScaledReductions (Phi, ExitInstr, Range, PartialReductionChains);
8150
8164
8151
8165
// A partial reduction is invalid if any of its extends are used by
8152
8166
// something that isn't another partial reduction. This is because the
@@ -9040,6 +9054,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9040
9054
assert (
9041
9055
!RecurrenceDescriptor::isAnyOfRecurrenceKind (Kind) &&
9042
9056
!RecurrenceDescriptor::isFindLastIVRecurrenceKind (Kind) &&
9057
+ !RecurrenceDescriptor::isMinMaxIdxRecurrenceKind (Kind) &&
9043
9058
" AnyOf and FindLast reductions are not allowed for in-loop reductions" );
9044
9059
9045
9060
// Collect the chain of "link" recipes for the reduction starting at PhiR.
@@ -9163,15 +9178,32 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9163
9178
PreviousLink = RedRecipe;
9164
9179
}
9165
9180
}
9181
+
9182
+ // Collect all VPReductionPHIRecipes in the header block, and sort them based
9183
+ // on the dependency order of the reductions. This ensures that results of
9184
+ // min/max reductions are computed before their corresponding index
9185
+ // reductions, since the index reduction relies on the result of the min/max
9186
+ // reduction to determine which lane produced the min/max.
9187
+ SmallVector<VPReductionPHIRecipe *> VPReductionPHIs;
9188
+ for (VPRecipeBase &R : Header->phis ())
9189
+ if (auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R))
9190
+ VPReductionPHIs.push_back (PhiR);
9191
+
9192
+ stable_sort (VPReductionPHIs, [this ](const VPReductionPHIRecipe *R1,
9193
+ const VPReductionPHIRecipe *R2) {
9194
+ auto *Phi1 = cast<PHINode>(R1->getUnderlyingInstr ());
9195
+ if (!Legal->isMinMaxRecurrence (Phi1))
9196
+ return false ;
9197
+
9198
+ auto *Phi2 = cast<PHINode>(R2->getUnderlyingInstr ());
9199
+ return Legal->getMinMaxRecurrences ().find (Phi1)->second == Phi2;
9200
+ });
9201
+
9166
9202
VPBasicBlock *LatchVPBB = VectorLoopRegion->getExitingBasicBlock ();
9167
9203
Builder.setInsertPoint (&*std::prev (std::prev (LatchVPBB->end ())));
9168
9204
VPBasicBlock::iterator IP = MiddleVPBB->getFirstNonPhi ();
9169
- for (VPRecipeBase &R :
9170
- Plan->getVectorLoopRegion ()->getEntryBasicBlock ()->phis ()) {
9171
- VPReductionPHIRecipe *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
9172
- if (!PhiR)
9173
- continue ;
9174
-
9205
+ SmallDenseMap<VPReductionPHIRecipe *, VPValue *> IdxReductionMasks;
9206
+ for (auto *PhiR : VPReductionPHIs) {
9175
9207
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor ();
9176
9208
Type *PhiTy = PhiR->getOperand (0 )->getLiveInIRValue ()->getType ();
9177
9209
// If tail is folded by masking, introduce selects between the phi
@@ -9198,7 +9230,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9198
9230
cast<VPInstruction>(&U)->getOpcode () ==
9199
9231
VPInstruction::ComputeReductionResult ||
9200
9232
cast<VPInstruction>(&U)->getOpcode () ==
9201
- VPInstruction::ComputeFindLastIVResult);
9233
+ VPInstruction::ComputeFindLastIVResult ||
9234
+ cast<VPInstruction>(&U)->getOpcode () ==
9235
+ VPInstruction::ComputeMinMaxIdxResult);
9202
9236
});
9203
9237
if (CM.usePredicatedReductionSelect ())
9204
9238
PhiR->setOperand (1 , NewExitingVPV);
@@ -9242,8 +9276,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9242
9276
VPInstruction *FinalReductionResult;
9243
9277
VPBuilder::InsertPointGuard Guard (Builder);
9244
9278
Builder.setInsertPoint (MiddleVPBB, IP);
9245
- if ( RecurrenceDescriptor::isFindLastIVRecurrenceKind (
9246
- RdxDesc. getRecurrenceKind () )) {
9279
+ RecurKind Kind = RdxDesc. getRecurrenceKind ();
9280
+ if ( RecurrenceDescriptor::isFindLastIVRecurrenceKind (Kind )) {
9247
9281
VPValue *Start = PhiR->getStartValue ();
9248
9282
FinalReductionResult =
9249
9283
Builder.createNaryOp (VPInstruction::ComputeFindLastIVResult,
@@ -9254,6 +9288,19 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9254
9288
FinalReductionResult =
9255
9289
Builder.createNaryOp (VPInstruction::ComputeAnyOfResult,
9256
9290
{PhiR, Start, NewExitingVPV}, ExitDL);
9291
+ } else if (RecurrenceDescriptor::isMinMaxIdxRecurrenceKind (Kind)) {
9292
+ // Mask out lanes that cannot be the index of the min/max value.
9293
+ VPValue *Mask = IdxReductionMasks.at (PhiR);
9294
+ Value *Iden = llvm::getRecurrenceIdentity (
9295
+ Kind == RecurKind::MinMaxFirstIdx ? RecurKind::SMin : RecurKind::SMax,
9296
+ PhiTy, RdxDesc.getFastMathFlags ());
9297
+ NewExitingVPV = Builder.createSelect (Mask, NewExitingVPV,
9298
+ Plan->getOrAddLiveIn (Iden), ExitDL);
9299
+
9300
+ VPValue *Start = PhiR->getStartValue ();
9301
+ FinalReductionResult =
9302
+ Builder.createNaryOp (VPInstruction::ComputeMinMaxIdxResult,
9303
+ {PhiR, Start, NewExitingVPV}, ExitDL);
9257
9304
} else {
9258
9305
VPIRFlags Flags = RecurrenceDescriptor::isFloatingPointRecurrenceKind (
9259
9306
RdxDesc.getRecurrenceKind ())
@@ -9265,11 +9312,25 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9265
9312
}
9266
9313
// Update all users outside the vector region.
9267
9314
OrigExitingVPV->replaceUsesWithIf (
9268
- FinalReductionResult, [FinalReductionResult](VPUser &User, unsigned ) {
9315
+ FinalReductionResult,
9316
+ [FinalReductionResult, NewExitingVPV](VPUser &User, unsigned ) {
9269
9317
auto *Parent = cast<VPRecipeBase>(&User)->getParent ();
9270
- return FinalReductionResult != &User && !Parent->getParent ();
9318
+ return FinalReductionResult != &User &&
9319
+ NewExitingVPV->getDefiningRecipe () != &User &&
9320
+ !Parent->getParent ();
9271
9321
});
9272
9322
9323
+ // Generate a mask for the index reduction.
9324
+ auto *Phi = cast<PHINode>(PhiR->getUnderlyingInstr ());
9325
+ if (Legal->isMinMaxRecurrence (Phi)) {
9326
+ VPValue *IdxRdxMask = Builder.createICmp (CmpInst::ICMP_EQ, NewExitingVPV,
9327
+ FinalReductionResult, ExitDL);
9328
+ PHINode *IdxPhi = Legal->getMinMaxRecurrences ().find (Phi)->second ;
9329
+ IdxReductionMasks.try_emplace (
9330
+ cast<VPReductionPHIRecipe>(RecipeBuilder.getRecipe (IdxPhi)),
9331
+ IdxRdxMask);
9332
+ }
9333
+
9273
9334
// Adjust AnyOf reductions; replace the reduction phi for the selected value
9274
9335
// with a boolean reduction phi node to check if the condition is true in
9275
9336
// any iteration. The final value is selected by the final
@@ -9304,11 +9365,11 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9304
9365
continue ;
9305
9366
}
9306
9367
9307
- if (RecurrenceDescriptor::isFindLastIVRecurrenceKind (
9308
- RdxDesc. getRecurrenceKind () )) {
9309
- // Adjust the start value for FindLastIV recurrences to use the sentinel
9310
- // value after generating the ResumePhi recipe, which uses the original
9311
- // start value.
9368
+ if (RecurrenceDescriptor::isFindLastIVRecurrenceKind (Kind) ||
9369
+ RecurrenceDescriptor::isMinMaxIdxRecurrenceKind (Kind )) {
9370
+ // Adjust the start value for FindLastIV/MinMaxIdx recurrences to use the
9371
+ // sentinel value after generating the ResumePhi recipe, which uses the
9372
+ // original start value.
9312
9373
PhiR->setOperand (0 , Plan->getOrAddLiveIn (RdxDesc.getSentinelValue ()));
9313
9374
}
9314
9375
}
0 commit comments