@@ -7898,8 +7898,7 @@ void LoopVectorizationPlanner::buildVPlans(ElementCount MinVF,
7898
7898
}
7899
7899
}
7900
7900
7901
- VPValue *VPRecipeBuilder::createEdgeMask (BasicBlock *Src, BasicBlock *Dst,
7902
- VPlan &Plan) {
7901
+ VPValue *VPRecipeBuilder::createEdgeMask (BasicBlock *Src, BasicBlock *Dst) {
7903
7902
assert (is_contained (predecessors (Dst), Src) && " Invalid edge" );
7904
7903
7905
7904
// Look for cached value.
@@ -7954,7 +7953,7 @@ VPValue *VPRecipeBuilder::getEdgeMask(BasicBlock *Src, BasicBlock *Dst) const {
7954
7953
return ECEntryIt->second ;
7955
7954
}
7956
7955
7957
- void VPRecipeBuilder::createHeaderMask (VPlan &Plan ) {
7956
+ void VPRecipeBuilder::createHeaderMask () {
7958
7957
BasicBlock *Header = OrigLoop->getHeader ();
7959
7958
7960
7959
// When not folding the tail, use nullptr to model all-true mask.
@@ -7989,7 +7988,7 @@ VPValue *VPRecipeBuilder::getBlockInMask(BasicBlock *BB) const {
7989
7988
return BCEntryIt->second ;
7990
7989
}
7991
7990
7992
- void VPRecipeBuilder::createBlockInMask (BasicBlock *BB, VPlan &Plan ) {
7991
+ void VPRecipeBuilder::createBlockInMask (BasicBlock *BB) {
7993
7992
assert (OrigLoop->contains (BB) && " Block is not a part of a loop" );
7994
7993
assert (BlockMaskCache.count (BB) == 0 && " Mask for block already computed" );
7995
7994
assert (OrigLoop->getHeader () != BB &&
@@ -8000,7 +7999,7 @@ void VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlan &Plan) {
8000
7999
VPValue *BlockMask = nullptr ;
8001
8000
// This is the block mask. We OR all incoming edges.
8002
8001
for (auto *Predecessor : predecessors (BB)) {
8003
- VPValue *EdgeMask = createEdgeMask (Predecessor, BB, Plan );
8002
+ VPValue *EdgeMask = createEdgeMask (Predecessor, BB);
8004
8003
if (!EdgeMask) { // Mask of predecessor is all-one so mask of block is too.
8005
8004
BlockMaskCache[BB] = EdgeMask;
8006
8005
return ;
@@ -8019,7 +8018,7 @@ void VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlan &Plan) {
8019
8018
8020
8019
VPWidenMemoryInstructionRecipe *
8021
8020
VPRecipeBuilder::tryToWidenMemory (Instruction *I, ArrayRef<VPValue *> Operands,
8022
- VFRange &Range, VPlanPtr &Plan ) {
8021
+ VFRange &Range) {
8023
8022
assert ((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
8024
8023
" Must be called with either a load or store" );
8025
8024
@@ -8092,7 +8091,7 @@ createWidenInductionRecipes(PHINode *Phi, Instruction *PhiOrTrunc,
8092
8091
}
8093
8092
8094
8093
VPHeaderPHIRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI (
8095
- PHINode *Phi, ArrayRef<VPValue *> Operands, VPlan &Plan, VFRange &Range) {
8094
+ PHINode *Phi, ArrayRef<VPValue *> Operands, VFRange &Range) {
8096
8095
8097
8096
// Check if this is an integer or fp induction. If so, build the recipe that
8098
8097
// produces its scalar and vector values.
@@ -8116,7 +8115,7 @@ VPHeaderPHIRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI(
8116
8115
}
8117
8116
8118
8117
VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate (
8119
- TruncInst *I, ArrayRef<VPValue *> Operands, VFRange &Range, VPlan &Plan ) {
8118
+ TruncInst *I, ArrayRef<VPValue *> Operands, VFRange &Range) {
8120
8119
// Optimize the special case where the source is a constant integer
8121
8120
// induction variable. Notice that we can only optimize the 'trunc' case
8122
8121
// because (a) FP conversions lose precision, (b) sext/zext may wrap, and
@@ -8144,8 +8143,7 @@ VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate(
8144
8143
}
8145
8144
8146
8145
VPBlendRecipe *VPRecipeBuilder::tryToBlend (PHINode *Phi,
8147
- ArrayRef<VPValue *> Operands,
8148
- VPlanPtr &Plan) {
8146
+ ArrayRef<VPValue *> Operands) {
8149
8147
unsigned NumIncoming = Phi->getNumIncomingValues ();
8150
8148
8151
8149
// We know that all PHIs in non-header blocks are converted into selects, so
@@ -8158,7 +8156,7 @@ VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi,
8158
8156
for (unsigned In = 0 ; In < NumIncoming; In++) {
8159
8157
OperandsWithMask.push_back (Operands[In]);
8160
8158
VPValue *EdgeMask =
8161
- createEdgeMask (Phi->getIncomingBlock (In), Phi->getParent (), *Plan );
8159
+ createEdgeMask (Phi->getIncomingBlock (In), Phi->getParent ());
8162
8160
if (!EdgeMask) {
8163
8161
assert (In == 0 && " Both null and non-null edge masks found" );
8164
8162
assert (all_equal (Operands) &&
@@ -8172,8 +8170,7 @@ VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi,
8172
8170
8173
8171
VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall (CallInst *CI,
8174
8172
ArrayRef<VPValue *> Operands,
8175
- VFRange &Range,
8176
- VPlanPtr &Plan) {
8173
+ VFRange &Range) {
8177
8174
bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange (
8178
8175
[this , CI](ElementCount VF) {
8179
8176
return CM.isScalarWithPredication (CI, VF);
@@ -8248,7 +8245,7 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
8248
8245
if (Legal->isMaskRequired (CI))
8249
8246
Mask = getBlockInMask (CI->getParent ());
8250
8247
else
8251
- Mask = Plan-> getVPValueOrAddLiveIn (ConstantInt::getTrue (
8248
+ Mask = Plan. getVPValueOrAddLiveIn (ConstantInt::getTrue (
8252
8249
IntegerType::getInt1Ty (Variant->getFunctionType ()->getContext ())));
8253
8250
8254
8251
Ops.insert (Ops.begin () + *MaskPos, Mask);
@@ -8278,7 +8275,7 @@ bool VPRecipeBuilder::shouldWiden(Instruction *I, VFRange &Range) const {
8278
8275
8279
8276
VPWidenRecipe *VPRecipeBuilder::tryToWiden (Instruction *I,
8280
8277
ArrayRef<VPValue *> Operands,
8281
- VPBasicBlock *VPBB, VPlanPtr &Plan ) {
8278
+ VPBasicBlock *VPBB) {
8282
8279
switch (I->getOpcode ()) {
8283
8280
default :
8284
8281
return nullptr ;
@@ -8291,8 +8288,8 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
8291
8288
if (CM.isPredicatedInst (I)) {
8292
8289
SmallVector<VPValue *> Ops (Operands.begin (), Operands.end ());
8293
8290
VPValue *Mask = getBlockInMask (I->getParent ());
8294
- VPValue *One = Plan-> getVPValueOrAddLiveIn (
8295
- ConstantInt::get (I->getType (), 1u , false ));
8291
+ VPValue *One =
8292
+ Plan. getVPValueOrAddLiveIn ( ConstantInt::get (I->getType (), 1u , false ));
8296
8293
auto *SafeRHS =
8297
8294
new VPInstruction (Instruction::Select, {Mask, Ops[1 ], One},
8298
8295
I->getDebugLoc ());
@@ -8336,8 +8333,7 @@ void VPRecipeBuilder::fixHeaderPhis() {
8336
8333
}
8337
8334
8338
8335
VPReplicateRecipe *VPRecipeBuilder::handleReplication (Instruction *I,
8339
- VFRange &Range,
8340
- VPlan &Plan) {
8336
+ VFRange &Range) {
8341
8337
bool IsUniform = LoopVectorizationPlanner::getDecisionAndClampRange (
8342
8338
[&](ElementCount VF) { return CM.isUniformAfterVectorization (I, VF); },
8343
8339
Range);
@@ -8392,21 +8388,22 @@ VPReplicateRecipe *VPRecipeBuilder::handleReplication(Instruction *I,
8392
8388
return Recipe;
8393
8389
}
8394
8390
8395
- VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe (
8396
- Instruction *Instr, ArrayRef<VPValue *> Operands, VFRange &Range,
8397
- VPBasicBlock *VPBB, VPlanPtr &Plan) {
8391
+ VPRecipeBase *
8392
+ VPRecipeBuilder::tryToCreateWidenRecipe (Instruction *Instr,
8393
+ ArrayRef<VPValue *> Operands,
8394
+ VFRange &Range, VPBasicBlock *VPBB) {
8398
8395
// First, check for specific widening recipes that deal with inductions, Phi
8399
8396
// nodes, calls and memory operations.
8400
8397
VPRecipeBase *Recipe;
8401
8398
if (auto Phi = dyn_cast<PHINode>(Instr)) {
8402
8399
if (Phi->getParent () != OrigLoop->getHeader ())
8403
- return tryToBlend (Phi, Operands, Plan );
8400
+ return tryToBlend (Phi, Operands);
8404
8401
8405
8402
// Always record recipes for header phis. Later first-order recurrence phis
8406
8403
// can have earlier phis as incoming values.
8407
8404
recordRecipeOf (Phi);
8408
8405
8409
- if ((Recipe = tryToOptimizeInductionPHI (Phi, Operands, *Plan, Range)))
8406
+ if ((Recipe = tryToOptimizeInductionPHI (Phi, Operands, Range)))
8410
8407
return Recipe;
8411
8408
8412
8409
VPHeaderPHIRecipe *PhiRecipe = nullptr ;
@@ -8442,9 +8439,8 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(
8442
8439
return PhiRecipe;
8443
8440
}
8444
8441
8445
- if (isa<TruncInst>(Instr) &&
8446
- (Recipe = tryToOptimizeInductionTruncate (cast<TruncInst>(Instr), Operands,
8447
- Range, *Plan)))
8442
+ if (isa<TruncInst>(Instr) && (Recipe = tryToOptimizeInductionTruncate (
8443
+ cast<TruncInst>(Instr), Operands, Range)))
8448
8444
return Recipe;
8449
8445
8450
8446
// All widen recipes below deal only with VF > 1.
@@ -8453,10 +8449,10 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(
8453
8449
return nullptr ;
8454
8450
8455
8451
if (auto *CI = dyn_cast<CallInst>(Instr))
8456
- return tryToWidenCall (CI, Operands, Range, Plan );
8452
+ return tryToWidenCall (CI, Operands, Range);
8457
8453
8458
8454
if (isa<LoadInst>(Instr) || isa<StoreInst>(Instr))
8459
- return tryToWidenMemory (Instr, Operands, Range, Plan );
8455
+ return tryToWidenMemory (Instr, Operands, Range);
8460
8456
8461
8457
if (!shouldWiden (Instr, Range))
8462
8458
return nullptr ;
@@ -8475,7 +8471,7 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(
8475
8471
*CI);
8476
8472
}
8477
8473
8478
- return tryToWiden (Instr, Operands, VPBB, Plan );
8474
+ return tryToWiden (Instr, Operands, VPBB);
8479
8475
}
8480
8476
8481
8477
void LoopVectorizationPlanner::buildVPlansWithVPRecipes (ElementCount MinVF,
@@ -8547,37 +8543,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
8547
8543
8548
8544
SmallPtrSet<const InterleaveGroup<Instruction> *, 1 > InterleaveGroups;
8549
8545
8550
- VPRecipeBuilder RecipeBuilder (OrigLoop, TLI, Legal, CM, PSE, Builder);
8551
-
8552
- // ---------------------------------------------------------------------------
8553
- // Pre-construction: record ingredients whose recipes we'll need to further
8554
- // process after constructing the initial VPlan.
8555
- // ---------------------------------------------------------------------------
8556
-
8557
- // For each interleave group which is relevant for this (possibly trimmed)
8558
- // Range, add it to the set of groups to be later applied to the VPlan and add
8559
- // placeholders for its members' Recipes which we'll be replacing with a
8560
- // single VPInterleaveRecipe.
8561
- for (InterleaveGroup<Instruction> *IG : IAI.getInterleaveGroups ()) {
8562
- auto applyIG = [IG, this ](ElementCount VF) -> bool {
8563
- bool Result = (VF.isVector () && // Query is illegal for VF == 1
8564
- CM.getWideningDecision (IG->getInsertPos (), VF) ==
8565
- LoopVectorizationCostModel::CM_Interleave);
8566
- // For scalable vectors, the only interleave factor currently supported
8567
- // is 2 since we require the (de)interleave2 intrinsics instead of
8568
- // shufflevectors.
8569
- assert ((!Result || !VF.isScalable () || IG->getFactor () == 2 ) &&
8570
- " Unsupported interleave factor for scalable vectors" );
8571
- return Result;
8572
- };
8573
- if (!getDecisionAndClampRange (applyIG, Range))
8574
- continue ;
8575
- InterleaveGroups.insert (IG);
8576
- for (unsigned i = 0 ; i < IG->getFactor (); i++)
8577
- if (Instruction *Member = IG->getMember (i))
8578
- RecipeBuilder.recordRecipeOf (Member);
8579
- };
8580
-
8581
8546
// ---------------------------------------------------------------------------
8582
8547
// Build initial VPlan: Scan the body of the loop in a topological order to
8583
8548
// visit each basic block after having visited its predecessor basic blocks.
@@ -8612,6 +8577,41 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
8612
8577
bool HasNUW = Style == TailFoldingStyle::None;
8613
8578
addCanonicalIVRecipes (*Plan, Legal->getWidestInductionType (), HasNUW, DL);
8614
8579
8580
+ VPRecipeBuilder RecipeBuilder (*Plan, OrigLoop, TLI, Legal, CM, PSE, Builder);
8581
+
8582
+ // ---------------------------------------------------------------------------
8583
+ // Pre-construction: record ingredients whose recipes we'll need to further
8584
+ // process after constructing the initial VPlan.
8585
+ // ---------------------------------------------------------------------------
8586
+
8587
+ // For each interleave group which is relevant for this (possibly trimmed)
8588
+ // Range, add it to the set of groups to be later applied to the VPlan and add
8589
+ // placeholders for its members' Recipes which we'll be replacing with a
8590
+ // single VPInterleaveRecipe.
8591
+ for (InterleaveGroup<Instruction> *IG : IAI.getInterleaveGroups ()) {
8592
+ auto applyIG = [IG, this ](ElementCount VF) -> bool {
8593
+ bool Result = (VF.isVector () && // Query is illegal for VF == 1
8594
+ CM.getWideningDecision (IG->getInsertPos (), VF) ==
8595
+ LoopVectorizationCostModel::CM_Interleave);
8596
+ // For scalable vectors, the only interleave factor currently supported
8597
+ // is 2 since we require the (de)interleave2 intrinsics instead of
8598
+ // shufflevectors.
8599
+ assert ((!Result || !VF.isScalable () || IG->getFactor () == 2 ) &&
8600
+ " Unsupported interleave factor for scalable vectors" );
8601
+ return Result;
8602
+ };
8603
+ if (!getDecisionAndClampRange (applyIG, Range))
8604
+ continue ;
8605
+ InterleaveGroups.insert (IG);
8606
+ for (unsigned i = 0 ; i < IG->getFactor (); i++)
8607
+ if (Instruction *Member = IG->getMember (i))
8608
+ RecipeBuilder.recordRecipeOf (Member);
8609
+ };
8610
+
8611
+ // ---------------------------------------------------------------------------
8612
+ // Construct recipes for the instructions in the loop
8613
+ // ---------------------------------------------------------------------------
8614
+
8615
8615
// Scan the body of the loop in a topological order to visit each basic block
8616
8616
// after having visited its predecessor basic blocks.
8617
8617
LoopBlocksDFS DFS (OrigLoop);
@@ -8633,9 +8633,9 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
8633
8633
Builder.setInsertPoint (VPBB);
8634
8634
8635
8635
if (VPBB == HeaderVPBB)
8636
- RecipeBuilder.createHeaderMask (*Plan );
8636
+ RecipeBuilder.createHeaderMask ();
8637
8637
else if (NeedsMasks)
8638
- RecipeBuilder.createBlockInMask (BB, *Plan );
8638
+ RecipeBuilder.createBlockInMask (BB);
8639
8639
8640
8640
// Introduce each ingredient into VPlan.
8641
8641
// TODO: Model and preserve debug intrinsics in VPlan.
@@ -8658,10 +8658,10 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
8658
8658
Legal->isInvariantAddressOfReduction (SI->getPointerOperand ()))
8659
8659
continue ;
8660
8660
8661
- VPRecipeBase *Recipe = RecipeBuilder. tryToCreateWidenRecipe (
8662
- Instr, Operands, Range, VPBB, Plan );
8661
+ VPRecipeBase *Recipe =
8662
+ RecipeBuilder. tryToCreateWidenRecipe ( Instr, Operands, Range, VPBB);
8663
8663
if (!Recipe)
8664
- Recipe = RecipeBuilder.handleReplication (Instr, Range, *Plan );
8664
+ Recipe = RecipeBuilder.handleReplication (Instr, Range);
8665
8665
for (auto *Def : Recipe->definedValues ()) {
8666
8666
auto *UV = Def->getUnderlyingValue ();
8667
8667
Plan->addVPValue (UV, Def);
0 commit comments