@@ -3542,10 +3542,7 @@ static Value *addFastMathFlag(Value *V) {
3542
3542
// / \brief Estimate the overhead of scalarizing a value based on its type.
3543
3543
// / Insert and Extract are set if the result needs to be inserted and/or
3544
3544
// / extracted from vectors.
3545
- // / If the instruction is also to be predicated, add the cost of a PHI
3546
- // / node to the insertion cost.
3547
3545
static unsigned getScalarizationOverhead (Type *Ty, bool Insert, bool Extract,
3548
- bool Predicated,
3549
3546
const TargetTransformInfo &TTI) {
3550
3547
if (Ty->isVoidTy ())
3551
3548
return 0 ;
@@ -3556,41 +3553,30 @@ static unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract,
3556
3553
for (unsigned I = 0 , E = Ty->getVectorNumElements (); I < E; ++I) {
3557
3554
if (Extract)
3558
3555
Cost += TTI.getVectorInstrCost (Instruction::ExtractElement, Ty, I);
3559
- if (Insert) {
3556
+ if (Insert)
3560
3557
Cost += TTI.getVectorInstrCost (Instruction::InsertElement, Ty, I);
3561
- if (Predicated)
3562
- Cost += TTI.getCFInstrCost (Instruction::PHI);
3563
- }
3564
3558
}
3565
3559
3566
- // If we have a predicated instruction, it may not be executed for each
3567
- // vector lane. Scale the cost by the probability of executing the
3568
- // predicated block.
3569
- if (Predicated)
3570
- Cost /= getReciprocalPredBlockProb ();
3571
-
3572
3560
return Cost;
3573
3561
}
3574
3562
3575
3563
// / \brief Estimate the overhead of scalarizing an Instruction based on the
3576
3564
// / types of its operands and return value.
3577
3565
static unsigned getScalarizationOverhead (SmallVectorImpl<Type *> &OpTys,
3578
- Type *RetTy, bool Predicated,
3566
+ Type *RetTy,
3579
3567
const TargetTransformInfo &TTI) {
3580
3568
unsigned ScalarizationCost =
3581
- getScalarizationOverhead (RetTy, true , false , Predicated, TTI);
3569
+ getScalarizationOverhead (RetTy, true , false , TTI);
3582
3570
3583
3571
for (Type *Ty : OpTys)
3584
- ScalarizationCost +=
3585
- getScalarizationOverhead (Ty, false , true , Predicated, TTI);
3572
+ ScalarizationCost += getScalarizationOverhead (Ty, false , true , TTI);
3586
3573
3587
3574
return ScalarizationCost;
3588
3575
}
3589
3576
3590
3577
// / \brief Estimate the overhead of scalarizing an instruction. This is a
3591
3578
// / convenience wrapper for the type-based getScalarizationOverhead API.
3592
3579
static unsigned getScalarizationOverhead (Instruction *I, unsigned VF,
3593
- bool Predicated,
3594
3580
const TargetTransformInfo &TTI) {
3595
3581
if (VF == 1 )
3596
3582
return 0 ;
@@ -3602,7 +3588,7 @@ static unsigned getScalarizationOverhead(Instruction *I, unsigned VF,
3602
3588
for (unsigned OpInd = 0 ; OpInd < OperandsNum; ++OpInd)
3603
3589
OpTys.push_back (ToVectorTy (I->getOperand (OpInd)->getType (), VF));
3604
3590
3605
- return getScalarizationOverhead (OpTys, RetTy, Predicated, TTI);
3591
+ return getScalarizationOverhead (OpTys, RetTy, TTI);
3606
3592
}
3607
3593
3608
3594
// Estimate cost of a call instruction CI if it were vectorized with factor VF.
@@ -3635,7 +3621,7 @@ static unsigned getVectorCallCost(CallInst *CI, unsigned VF,
3635
3621
3636
3622
// Compute costs of unpacking argument values for the scalar calls and
3637
3623
// packing the return values to a vector.
3638
- unsigned ScalarizationCost = getScalarizationOverhead (Tys, RetTy, false , TTI);
3624
+ unsigned ScalarizationCost = getScalarizationOverhead (Tys, RetTy, TTI);
3639
3625
3640
3626
unsigned Cost = ScalarCallCost * VF + ScalarizationCost;
3641
3627
@@ -6536,10 +6522,27 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
6536
6522
// vector lane. Get the scalarization cost and scale this amount by the
6537
6523
// probability of executing the predicated block. If the instruction is not
6538
6524
// predicated, we fall through to the next case.
6539
- if (VF > 1 && Legal->isScalarWithPredication (I))
6540
- return VF * TTI.getArithmeticInstrCost (I->getOpcode (), RetTy) /
6541
- getReciprocalPredBlockProb () +
6542
- getScalarizationOverhead (I, VF, true , TTI);
6525
+ if (VF > 1 && Legal->isScalarWithPredication (I)) {
6526
+ unsigned Cost = 0 ;
6527
+
6528
+ // These instructions have a non-void type, so account for the phi nodes
6529
+ // that we will create. This cost is likely to be zero. The phi node
6530
+ // cost, if any, should be scaled by the block probability because it
6531
+ // models a copy at the end of each predicated block.
6532
+ Cost += VF * TTI.getCFInstrCost (Instruction::PHI);
6533
+
6534
+ // The cost of the non-predicated instruction.
6535
+ Cost += VF * TTI.getArithmeticInstrCost (I->getOpcode (), RetTy);
6536
+
6537
+ // The cost of insertelement and extractelement instructions needed for
6538
+ // scalarization.
6539
+ Cost += getScalarizationOverhead (I, VF, TTI);
6540
+
6541
+ // Scale the cost by the probability of executing the predicated blocks.
6542
+ // This assumes the predicated block for each vector lane is equally
6543
+ // likely.
6544
+ return Cost / getReciprocalPredBlockProb ();
6545
+ }
6543
6546
case Instruction::Add:
6544
6547
case Instruction::FAdd:
6545
6548
case Instruction::Sub:
@@ -6695,7 +6698,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
6695
6698
6696
6699
// Get the overhead of the extractelement and insertelement instructions
6697
6700
// we might create due to scalarization.
6698
- Cost += getScalarizationOverhead (I, VF, false , TTI);
6701
+ Cost += getScalarizationOverhead (I, VF, TTI);
6699
6702
6700
6703
return Cost;
6701
6704
}
@@ -6782,7 +6785,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
6782
6785
// The cost of executing VF copies of the scalar instruction. This opcode
6783
6786
// is unknown. Assume that it is the same as 'mul'.
6784
6787
return VF * TTI.getArithmeticInstrCost (Instruction::Mul, VectorTy) +
6785
- getScalarizationOverhead (I, VF, false , TTI);
6788
+ getScalarizationOverhead (I, VF, TTI);
6786
6789
} // end of switch.
6787
6790
}
6788
6791
0 commit comments