@@ -4457,6 +4457,14 @@ bool LoopVectorizationPlanner::isCandidateForEpilogueVectorization(
4457
4457
return false ;
4458
4458
}
4459
4459
4460
+ // TODO: support epilogue vectorization for min/max with index.
4461
+ if (any_of (Legal->getReductionVars (), [](const auto &Reduction) {
4462
+ const RecurrenceDescriptor &RdxDesc = Reduction.second ;
4463
+ return RecurrenceDescriptor::isMinMaxIdxRecurrenceKind (
4464
+ RdxDesc.getRecurrenceKind ());
4465
+ }))
4466
+ return false ;
4467
+
4460
4468
// Epilogue vectorization code has not been auditted to ensure it handles
4461
4469
// non-latch exits properly. It may be fine, but it needs auditted and
4462
4470
// tested.
@@ -4901,7 +4909,8 @@ LoopVectorizationCostModel::selectInterleaveCount(VPlan &Plan, ElementCount VF,
4901
4909
const RecurrenceDescriptor &RdxDesc = Reduction.second ;
4902
4910
RecurKind RK = RdxDesc.getRecurrenceKind ();
4903
4911
return RecurrenceDescriptor::isAnyOfRecurrenceKind (RK) ||
4904
- RecurrenceDescriptor::isFindLastIVRecurrenceKind (RK);
4912
+ RecurrenceDescriptor::isFindLastIVRecurrenceKind (RK) ||
4913
+ RecurrenceDescriptor::isMinMaxIdxRecurrenceKind (RK);
4905
4914
});
4906
4915
if (HasSelectCmpReductions) {
4907
4916
LLVM_DEBUG (dbgs () << " LV: Not interleaving select-cmp reductions.\n " );
@@ -6618,6 +6627,10 @@ void LoopVectorizationCostModel::collectInLoopReductions() {
6618
6627
6619
6628
for (const auto &Reduction : Legal->getReductionVars ()) {
6620
6629
PHINode *Phi = Reduction.first ;
6630
+ // TODO: support in-loop min/max with index.
6631
+ if (Legal->isMinMaxRecurrence (Phi))
6632
+ continue ;
6633
+
6621
6634
const RecurrenceDescriptor &RdxDesc = Reduction.second ;
6622
6635
6623
6636
// We don't collect reductions that are type promoted (yet).
@@ -7233,6 +7246,8 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
7233
7246
EpiRedResult->getOpcode () != VPInstruction::ComputeFindLastIVResult))
7234
7247
return ;
7235
7248
7249
+ assert (EpiRedResult->getOpcode () != VPInstruction::ComputeMinMaxIdxResult);
7250
+
7236
7251
auto *EpiRedHeaderPhi =
7237
7252
cast<VPReductionPHIRecipe>(EpiRedResult->getOperand (0 ));
7238
7253
const RecurrenceDescriptor &RdxDesc =
@@ -8140,10 +8155,9 @@ void VPRecipeBuilder::collectScaledReductions(VFRange &Range) {
8140
8155
// Find all possible partial reductions.
8141
8156
SmallVector<std::pair<PartialReductionChain, unsigned >>
8142
8157
PartialReductionChains;
8143
- for (const auto &[Phi, RdxDesc] : Legal->getReductionVars ()) {
8144
- getScaledReductions (Phi, RdxDesc.getLoopExitInstr (), Range,
8145
- PartialReductionChains);
8146
- }
8158
+ for (const auto &[Phi, RdxDesc] : Legal->getReductionVars ())
8159
+ if (auto *ExitInstr = RdxDesc.getLoopExitInstr ())
8160
+ getScaledReductions (Phi, ExitInstr, Range, PartialReductionChains);
8147
8161
8148
8162
// A partial reduction is invalid if any of its extends are used by
8149
8163
// something that isn't another partial reduction. This is because the
@@ -9037,6 +9051,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9037
9051
assert (
9038
9052
!RecurrenceDescriptor::isAnyOfRecurrenceKind (Kind) &&
9039
9053
!RecurrenceDescriptor::isFindLastIVRecurrenceKind (Kind) &&
9054
+ !RecurrenceDescriptor::isMinMaxIdxRecurrenceKind (Kind) &&
9040
9055
" AnyOf and FindLast reductions are not allowed for in-loop reductions" );
9041
9056
9042
9057
// Collect the chain of "link" recipes for the reduction starting at PhiR.
@@ -9160,15 +9175,32 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9160
9175
PreviousLink = RedRecipe;
9161
9176
}
9162
9177
}
9178
+
9179
+ // Collect all VPReductionPHIRecipes in the header block, and sort them based
9180
+ // on the dependency order of the reductions. This ensures that results of
9181
+ // min/max reductions are computed before their corresponding index
9182
+ // reductions, since the index reduction relies on the result of the min/max
9183
+ // reduction to determine which lane produced the min/max.
9184
+ SmallVector<VPReductionPHIRecipe *> VPReductionPHIs;
9185
+ for (VPRecipeBase &R : Header->phis ())
9186
+ if (auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R))
9187
+ VPReductionPHIs.push_back (PhiR);
9188
+
9189
+ stable_sort (VPReductionPHIs, [this ](const VPReductionPHIRecipe *R1,
9190
+ const VPReductionPHIRecipe *R2) {
9191
+ auto *Phi1 = cast<PHINode>(R1->getUnderlyingInstr ());
9192
+ if (!Legal->isMinMaxRecurrence (Phi1))
9193
+ return false ;
9194
+
9195
+ auto *Phi2 = cast<PHINode>(R2->getUnderlyingInstr ());
9196
+ return Legal->getMinMaxRecurrences ().find (Phi1)->second == Phi2;
9197
+ });
9198
+
9163
9199
VPBasicBlock *LatchVPBB = VectorLoopRegion->getExitingBasicBlock ();
9164
9200
Builder.setInsertPoint (&*std::prev (std::prev (LatchVPBB->end ())));
9165
9201
VPBasicBlock::iterator IP = MiddleVPBB->getFirstNonPhi ();
9166
- for (VPRecipeBase &R :
9167
- Plan->getVectorLoopRegion ()->getEntryBasicBlock ()->phis ()) {
9168
- VPReductionPHIRecipe *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
9169
- if (!PhiR)
9170
- continue ;
9171
-
9202
+ SmallDenseMap<VPReductionPHIRecipe *, VPValue *> IdxReductionMasks;
9203
+ for (auto *PhiR : VPReductionPHIs) {
9172
9204
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor ();
9173
9205
Type *PhiTy = PhiR->getUnderlyingValue ()->getType ();
9174
9206
// If tail is folded by masking, introduce selects between the phi
@@ -9195,7 +9227,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9195
9227
cast<VPInstruction>(&U)->getOpcode () ==
9196
9228
VPInstruction::ComputeReductionResult ||
9197
9229
cast<VPInstruction>(&U)->getOpcode () ==
9198
- VPInstruction::ComputeFindLastIVResult);
9230
+ VPInstruction::ComputeFindLastIVResult ||
9231
+ cast<VPInstruction>(&U)->getOpcode () ==
9232
+ VPInstruction::ComputeMinMaxIdxResult);
9199
9233
});
9200
9234
if (CM.usePredicatedReductionSelect ())
9201
9235
PhiR->setOperand (1 , NewExitingVPV);
@@ -9239,6 +9273,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9239
9273
VPInstruction *FinalReductionResult;
9240
9274
VPBuilder::InsertPointGuard Guard (Builder);
9241
9275
Builder.setInsertPoint (MiddleVPBB, IP);
9276
+ RecurKind RK = RdxDesc.getRecurrenceKind ();
9242
9277
if (RecurrenceDescriptor::isFindLastIVRecurrenceKind (
9243
9278
RdxDesc.getRecurrenceKind ())) {
9244
9279
VPValue *Start = PhiR->getStartValue ();
@@ -9251,6 +9286,19 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9251
9286
FinalReductionResult =
9252
9287
Builder.createNaryOp (VPInstruction::ComputeAnyOfResult,
9253
9288
{PhiR, Start, NewExitingVPV}, ExitDL);
9289
+ } else if (RecurrenceDescriptor::isMinMaxIdxRecurrenceKind (RK)) {
9290
+ // Mask out lanes that cannot be the index of the min/max value.
9291
+ VPValue *Mask = IdxReductionMasks.at (PhiR);
9292
+ Value *Iden = llvm::getRecurrenceIdentity (
9293
+ RK == RecurKind::MinMaxFirstIdx ? RecurKind::SMin : RecurKind::SMax,
9294
+ PhiTy, RdxDesc.getFastMathFlags ());
9295
+ NewExitingVPV = Builder.createSelect (Mask, NewExitingVPV,
9296
+ Plan->getOrAddLiveIn (Iden), ExitDL);
9297
+
9298
+ VPValue *Start = PhiR->getStartValue ();
9299
+ FinalReductionResult =
9300
+ Builder.createNaryOp (VPInstruction::ComputeMinMaxIdxResult,
9301
+ {PhiR, Start, NewExitingVPV}, ExitDL);
9254
9302
} else {
9255
9303
VPIRFlags Flags = RecurrenceDescriptor::isFloatingPointRecurrenceKind (
9256
9304
RdxDesc.getRecurrenceKind ())
@@ -9262,11 +9310,25 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9262
9310
}
9263
9311
// Update all users outside the vector region.
9264
9312
OrigExitingVPV->replaceUsesWithIf (
9265
- FinalReductionResult, [FinalReductionResult](VPUser &User, unsigned ) {
9313
+ FinalReductionResult,
9314
+ [FinalReductionResult, NewExitingVPV](VPUser &User, unsigned ) {
9266
9315
auto *Parent = cast<VPRecipeBase>(&User)->getParent ();
9267
- return FinalReductionResult != &User && !Parent->getParent ();
9316
+ return FinalReductionResult != &User &&
9317
+ NewExitingVPV->getDefiningRecipe () != &User &&
9318
+ !Parent->getParent ();
9268
9319
});
9269
9320
9321
+ // Generate a mask for the index reduction.
9322
+ auto *Phi = cast<PHINode>(PhiR->getUnderlyingInstr ());
9323
+ if (Legal->isMinMaxRecurrence (Phi)) {
9324
+ VPValue *IdxRdxMask = Builder.createICmp (CmpInst::ICMP_EQ, NewExitingVPV,
9325
+ FinalReductionResult, ExitDL);
9326
+ PHINode *IdxPhi = Legal->getMinMaxRecurrences ().find (Phi)->second ;
9327
+ IdxReductionMasks.try_emplace (
9328
+ cast<VPReductionPHIRecipe>(RecipeBuilder.getRecipe (IdxPhi)),
9329
+ IdxRdxMask);
9330
+ }
9331
+
9270
9332
// Adjust AnyOf reductions; replace the reduction phi for the selected value
9271
9333
// with a boolean reduction phi node to check if the condition is true in
9272
9334
// any iteration. The final value is selected by the final
@@ -9301,16 +9363,17 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9301
9363
continue ;
9302
9364
}
9303
9365
9304
- if (RecurrenceDescriptor::isFindLastIVRecurrenceKind (
9305
- RdxDesc. getRecurrenceKind () )) {
9306
- // Adjust the start value for FindLastIV recurrences to use the sentinel
9307
- // value after generating the ResumePhi recipe, which uses the original
9308
- // start value.
9366
+ if (RecurrenceDescriptor::isFindLastIVRecurrenceKind (RK) ||
9367
+ RecurrenceDescriptor::isMinMaxIdxRecurrenceKind (RK )) {
9368
+ // Adjust the start value for FindLastIV/MinMaxIdx recurrences to use the
9369
+ // sentinel value after generating the ResumePhi recipe, which uses the
9370
+ // original start value.
9309
9371
PhiR->setOperand (0 , Plan->getOrAddLiveIn (RdxDesc.getSentinelValue ()));
9310
9372
}
9311
- RecurKind RK = RdxDesc. getRecurrenceKind ();
9373
+
9312
9374
if ((!RecurrenceDescriptor::isAnyOfRecurrenceKind (RK) &&
9313
9375
!RecurrenceDescriptor::isFindLastIVRecurrenceKind (RK) &&
9376
+ !RecurrenceDescriptor::isMinMaxIdxRecurrenceKind (RK) &&
9314
9377
!RecurrenceDescriptor::isMinMaxRecurrenceKind (RK))) {
9315
9378
VPBuilder PHBuilder (Plan->getVectorPreheader ());
9316
9379
VPValue *Iden = Plan->getOrAddLiveIn (
0 commit comments