@@ -743,17 +743,39 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
743
743
Instruction::Or, cast<VectorType>(VecTy), std::nullopt, Ctx.CostKind );
744
744
}
745
745
case VPInstruction::BranchOnCount: {
746
- // BranchOnCount will genearte icmp_eq + br instructions and the
747
- // cost of branch will be calculated in VPRegionBlock.
748
- // If the vector loop only executed once, ignore the cost of the cmp.
749
746
Type *ValTy = Ctx.Types .inferScalarType (getOperand (0 ));
747
+
748
+ // If the vector loop only executed once, ignore the cost.
749
+ // TODO: We can remove this after hoist `unrollByUF` and
750
+ // `optimizeForVFandUF` which will should optimize BranchOnCount out.
750
751
auto TC = dyn_cast_if_present<ConstantInt>(
751
752
getParent ()->getPlan ()->getTripCount ()->getUnderlyingValue ());
752
753
if (TC && VF.isFixed () && TC->getSExtValue () == VF.getFixedValue ())
753
754
return 0 ;
755
+
756
+ // BranchOnCount will generate icmp_eq + br instructions and the
757
+ // cost of branch will be calculated in VPRegionBlock.
754
758
return Ctx.TTI .getCmpSelInstrCost (Instruction::ICmp, ValTy, nullptr ,
755
759
CmpInst::ICMP_EQ, Ctx.CostKind );
756
760
}
761
+ case VPInstruction::BranchOnCond: {
762
+ // BranchOnCond will generate `extractelement` when the condition is vector
763
+ // type.
764
+ VPValue *Op = getOperand (0 );
765
+ VPRecipeBase *R = Op->getDefiningRecipe ();
766
+ if (R &&
767
+ any_of (R->operands (), [&](VPValue *V) { return !R->usesScalars (V); }) &&
768
+ VF.isVector ())
769
+ return Ctx.TTI .getVectorInstrCost (
770
+ Instruction::ExtractElement,
771
+ cast<VectorType>(
772
+ toVectorTy (Ctx.Types .inferScalarType (getOperand (0 )), VF)),
773
+ Ctx.CostKind , 0 , nullptr , nullptr );
774
+
775
+ // Otherwise, BranchOnCond is free since the branch cost is already
776
+ // calculated by VPBB.
777
+ return 0 ;
778
+ }
757
779
default :
758
780
// TODO: Compute cost other VPInstructions once the legacy cost model has
759
781
// been retired.
0 commit comments