@@ -8112,10 +8112,9 @@ void VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlan &Plan) {
8112
8112
BlockMaskCache[BB] = BlockMask;
8113
8113
}
8114
8114
8115
- VPRecipeBase *VPRecipeBuilder::tryToWidenMemory (Instruction *I,
8116
- ArrayRef<VPValue *> Operands,
8117
- VFRange &Range,
8118
- VPlanPtr &Plan) {
8115
+ VPWidenMemoryInstructionRecipe *
8116
+ VPRecipeBuilder::tryToWidenMemory (Instruction *I, ArrayRef<VPValue *> Operands,
8117
+ VFRange &Range, VPlanPtr &Plan) {
8119
8118
assert ((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
8120
8119
" Must be called with either a load or store" );
8121
8120
@@ -8187,7 +8186,7 @@ createWidenInductionRecipes(PHINode *Phi, Instruction *PhiOrTrunc,
8187
8186
return new VPWidenIntOrFpInductionRecipe (Phi, Start, Step, IndDesc);
8188
8187
}
8189
8188
8190
- VPRecipeBase *VPRecipeBuilder::tryToOptimizeInductionPHI (
8189
+ VPHeaderPHIRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI (
8191
8190
PHINode *Phi, ArrayRef<VPValue *> Operands, VPlan &Plan, VFRange &Range) {
8192
8191
8193
8192
// Check if this is an integer or fp induction. If so, build the recipe that
@@ -8239,31 +8238,10 @@ VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate(
8239
8238
return nullptr ;
8240
8239
}
8241
8240
8242
- VPRecipeOrVPValueTy VPRecipeBuilder::tryToBlend (PHINode *Phi,
8243
- ArrayRef<VPValue *> Operands,
8244
- VPlanPtr &Plan) {
8245
- // If all incoming values are equal, the incoming VPValue can be used directly
8246
- // instead of creating a new VPBlendRecipe.
8247
- if (llvm::all_equal (Operands))
8248
- return Operands[0 ];
8249
-
8241
+ VPBlendRecipe *VPRecipeBuilder::tryToBlend (PHINode *Phi,
8242
+ ArrayRef<VPValue *> Operands,
8243
+ VPlanPtr &Plan) {
8250
8244
unsigned NumIncoming = Phi->getNumIncomingValues ();
8251
- // For in-loop reductions, we do not need to create an additional select.
8252
- VPValue *InLoopVal = nullptr ;
8253
- for (unsigned In = 0 ; In < NumIncoming; In++) {
8254
- PHINode *PhiOp =
8255
- dyn_cast_or_null<PHINode>(Operands[In]->getUnderlyingValue ());
8256
- if (PhiOp && CM.isInLoopReduction (PhiOp)) {
8257
- assert (!InLoopVal && " Found more than one in-loop reduction!" );
8258
- InLoopVal = Operands[In];
8259
- }
8260
- }
8261
-
8262
- assert ((!InLoopVal || NumIncoming == 2 ) &&
8263
- " Found an in-loop reduction for PHI with unexpected number of "
8264
- " incoming values" );
8265
- if (InLoopVal)
8266
- return Operands[Operands[0 ] == InLoopVal ? 1 : 0 ];
8267
8245
8268
8246
// We know that all PHIs in non-header blocks are converted into selects, so
8269
8247
// we don't have to worry about the insertion order and we can just use the
@@ -8273,15 +8251,18 @@ VPRecipeOrVPValueTy VPRecipeBuilder::tryToBlend(PHINode *Phi,
8273
8251
SmallVector<VPValue *, 2 > OperandsWithMask;
8274
8252
8275
8253
for (unsigned In = 0 ; In < NumIncoming; In++) {
8254
+ OperandsWithMask.push_back (Operands[In]);
8276
8255
VPValue *EdgeMask =
8277
8256
createEdgeMask (Phi->getIncomingBlock (In), Phi->getParent (), *Plan);
8278
- assert ((EdgeMask || NumIncoming == 1 ) &&
8279
- " Multiple predecessors with one having a full mask" );
8280
- OperandsWithMask.push_back (Operands[In]);
8281
- if (EdgeMask)
8282
- OperandsWithMask.push_back (EdgeMask);
8257
+ if (!EdgeMask) {
8258
+ assert (In == 0 && " Both null and non-null edge masks found" );
8259
+ assert (all_equal (Operands) &&
8260
+ " Distinct incoming values with one having a full mask" );
8261
+ break ;
8262
+ }
8263
+ OperandsWithMask.push_back (EdgeMask);
8283
8264
}
8284
- return toVPRecipeResult ( new VPBlendRecipe (Phi, OperandsWithMask) );
8265
+ return new VPBlendRecipe (Phi, OperandsWithMask);
8285
8266
}
8286
8267
8287
8268
VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall (CallInst *CI,
@@ -8390,9 +8371,9 @@ bool VPRecipeBuilder::shouldWiden(Instruction *I, VFRange &Range) const {
8390
8371
Range);
8391
8372
}
8392
8373
8393
- VPRecipeBase *VPRecipeBuilder::tryToWiden (Instruction *I,
8394
- ArrayRef<VPValue *> Operands,
8395
- VPBasicBlock *VPBB, VPlanPtr &Plan) {
8374
+ VPWidenRecipe *VPRecipeBuilder::tryToWiden (Instruction *I,
8375
+ ArrayRef<VPValue *> Operands,
8376
+ VPBasicBlock *VPBB, VPlanPtr &Plan) {
8396
8377
switch (I->getOpcode ()) {
8397
8378
default :
8398
8379
return nullptr ;
@@ -8449,9 +8430,9 @@ void VPRecipeBuilder::fixHeaderPhis() {
8449
8430
}
8450
8431
}
8451
8432
8452
- VPRecipeOrVPValueTy VPRecipeBuilder::handleReplication (Instruction *I,
8453
- VFRange &Range,
8454
- VPlan &Plan) {
8433
+ VPReplicateRecipe * VPRecipeBuilder::handleReplication (Instruction *I,
8434
+ VFRange &Range,
8435
+ VPlan &Plan) {
8455
8436
bool IsUniform = LoopVectorizationPlanner::getDecisionAndClampRange (
8456
8437
[&](ElementCount VF) { return CM.isUniformAfterVectorization (I, VF); },
8457
8438
Range);
@@ -8503,14 +8484,12 @@ VPRecipeOrVPValueTy VPRecipeBuilder::handleReplication(Instruction *I,
8503
8484
8504
8485
auto *Recipe = new VPReplicateRecipe (I, Plan.mapToVPValues (I->operands ()),
8505
8486
IsUniform, BlockInMask);
8506
- return toVPRecipeResult ( Recipe) ;
8487
+ return Recipe;
8507
8488
}
8508
8489
8509
- VPRecipeOrVPValueTy
8510
- VPRecipeBuilder::tryToCreateWidenRecipe (Instruction *Instr,
8511
- ArrayRef<VPValue *> Operands,
8512
- VFRange &Range, VPBasicBlock *VPBB,
8513
- VPlanPtr &Plan) {
8490
+ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe (
8491
+ Instruction *Instr, ArrayRef<VPValue *> Operands, VFRange &Range,
8492
+ VPBasicBlock *VPBB, VPlanPtr &Plan) {
8514
8493
// First, check for specific widening recipes that deal with inductions, Phi
8515
8494
// nodes, calls and memory operations.
8516
8495
VPRecipeBase *Recipe;
@@ -8523,7 +8502,7 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
8523
8502
recordRecipeOf (Phi);
8524
8503
8525
8504
if ((Recipe = tryToOptimizeInductionPHI (Phi, Operands, *Plan, Range)))
8526
- return toVPRecipeResult ( Recipe) ;
8505
+ return Recipe;
8527
8506
8528
8507
VPHeaderPHIRecipe *PhiRecipe = nullptr ;
8529
8508
assert ((Legal->isReductionVariable (Phi) ||
@@ -8555,43 +8534,43 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
8555
8534
recordRecipeOf (Inc);
8556
8535
8557
8536
PhisToFix.push_back (PhiRecipe);
8558
- return toVPRecipeResult ( PhiRecipe) ;
8537
+ return PhiRecipe;
8559
8538
}
8560
8539
8561
8540
if (isa<TruncInst>(Instr) &&
8562
8541
(Recipe = tryToOptimizeInductionTruncate (cast<TruncInst>(Instr), Operands,
8563
8542
Range, *Plan)))
8564
- return toVPRecipeResult ( Recipe) ;
8543
+ return Recipe;
8565
8544
8566
8545
// All widen recipes below deal only with VF > 1.
8567
8546
if (LoopVectorizationPlanner::getDecisionAndClampRange (
8568
8547
[&](ElementCount VF) { return VF.isScalar (); }, Range))
8569
8548
return nullptr ;
8570
8549
8571
8550
if (auto *CI = dyn_cast<CallInst>(Instr))
8572
- return toVPRecipeResult ( tryToWidenCall (CI, Operands, Range, Plan) );
8551
+ return tryToWidenCall (CI, Operands, Range, Plan);
8573
8552
8574
8553
if (isa<LoadInst>(Instr) || isa<StoreInst>(Instr))
8575
- return toVPRecipeResult ( tryToWidenMemory (Instr, Operands, Range, Plan) );
8554
+ return tryToWidenMemory (Instr, Operands, Range, Plan);
8576
8555
8577
8556
if (!shouldWiden (Instr, Range))
8578
8557
return nullptr ;
8579
8558
8580
8559
if (auto GEP = dyn_cast<GetElementPtrInst>(Instr))
8581
- return toVPRecipeResult ( new VPWidenGEPRecipe (
8582
- GEP, make_range (Operands.begin (), Operands.end () )));
8560
+ return new VPWidenGEPRecipe (GEP,
8561
+ make_range (Operands.begin (), Operands.end ()));
8583
8562
8584
8563
if (auto *SI = dyn_cast<SelectInst>(Instr)) {
8585
- return toVPRecipeResult ( new VPWidenSelectRecipe (
8586
- *SI, make_range (Operands.begin (), Operands.end ()))) ;
8564
+ return new VPWidenSelectRecipe (
8565
+ *SI, make_range (Operands.begin (), Operands.end ()));
8587
8566
}
8588
8567
8589
8568
if (auto *CI = dyn_cast<CastInst>(Instr)) {
8590
- return toVPRecipeResult ( new VPWidenCastRecipe (CI->getOpcode (), Operands[0 ],
8591
- CI-> getType (), *CI) );
8569
+ return new VPWidenCastRecipe (CI->getOpcode (), Operands[0 ], CI-> getType () ,
8570
+ *CI);
8592
8571
}
8593
8572
8594
- return toVPRecipeResult ( tryToWiden (Instr, Operands, VPBB, Plan) );
8573
+ return tryToWiden (Instr, Operands, VPBB, Plan);
8595
8574
}
8596
8575
8597
8576
void LoopVectorizationPlanner::buildVPlansWithVPRecipes (ElementCount MinVF,
@@ -8779,22 +8758,10 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
8779
8758
Legal->isInvariantAddressOfReduction (SI->getPointerOperand ()))
8780
8759
continue ;
8781
8760
8782
- auto RecipeOrValue = RecipeBuilder.tryToCreateWidenRecipe (
8761
+ VPRecipeBase *Recipe = RecipeBuilder.tryToCreateWidenRecipe (
8783
8762
Instr, Operands, Range, VPBB, Plan);
8784
- if (!RecipeOrValue)
8785
- RecipeOrValue = RecipeBuilder.handleReplication (Instr, Range, *Plan);
8786
- // If Instr can be simplified to an existing VPValue, use it.
8787
- if (isa<VPValue *>(RecipeOrValue)) {
8788
- auto *VPV = cast<VPValue *>(RecipeOrValue);
8789
- Plan->addVPValue (Instr, VPV);
8790
- // If the re-used value is a recipe, register the recipe for the
8791
- // instruction, in case the recipe for Instr needs to be recorded.
8792
- if (VPRecipeBase *R = VPV->getDefiningRecipe ())
8793
- RecipeBuilder.setRecipe (Instr, R);
8794
- continue ;
8795
- }
8796
- // Otherwise, add the new recipe.
8797
- VPRecipeBase *Recipe = cast<VPRecipeBase *>(RecipeOrValue);
8763
+ if (!Recipe)
8764
+ Recipe = RecipeBuilder.handleReplication (Instr, Range, *Plan);
8798
8765
for (auto *Def : Recipe->definedValues ()) {
8799
8766
auto *UV = Def->getUnderlyingValue ();
8800
8767
Plan->addVPValue (UV, Def);
@@ -9041,7 +9008,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9041
9008
// the phi until LoopExitValue. We keep track of the previous item
9042
9009
// (PreviousLink) to tell which of the two operands of a Link will remain
9043
9010
// scalar and which will be reduced. For minmax by select(cmp), Link will be
9044
- // the select instructions.
9011
+ // the select instructions. Blend recipes of in-loop reduction phi's will
9012
+ // get folded to their non-phi operand, as the reduction recipe handles the
9013
+ // condition directly.
9045
9014
VPSingleDefRecipe *PreviousLink = PhiR; // Aka Worklist[0].
9046
9015
for (VPSingleDefRecipe *CurrentLink : Worklist.getArrayRef ().drop_front ()) {
9047
9016
Instruction *CurrentLinkI = CurrentLink->getUnderlyingInstr ();
@@ -9072,6 +9041,20 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9072
9041
LinkVPBB->insert (FMulRecipe, CurrentLink->getIterator ());
9073
9042
VecOp = FMulRecipe;
9074
9043
} else {
9044
+ auto *Blend = dyn_cast<VPBlendRecipe>(CurrentLink);
9045
+ if (PhiR->isInLoop () && Blend) {
9046
+ assert (Blend->getNumIncomingValues () == 2 &&
9047
+ " Blend must have 2 incoming values" );
9048
+ if (Blend->getIncomingValue (0 ) == PhiR)
9049
+ Blend->replaceAllUsesWith (Blend->getIncomingValue (1 ));
9050
+ else {
9051
+ assert (Blend->getIncomingValue (1 ) == PhiR &&
9052
+ " PhiR must be an operand of the blend" );
9053
+ Blend->replaceAllUsesWith (Blend->getIncomingValue (0 ));
9054
+ }
9055
+ continue ;
9056
+ }
9057
+
9075
9058
if (RecurrenceDescriptor::isMinMaxRecurrenceKind (Kind)) {
9076
9059
if (isa<VPWidenRecipe>(CurrentLink)) {
9077
9060
assert (isa<CmpInst>(CurrentLinkI) &&
0 commit comments