@@ -744,17 +744,39 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
744
744
Instruction::Or, cast<VectorType>(VecTy), std::nullopt, Ctx.CostKind );
745
745
}
746
746
case VPInstruction::BranchOnCount: {
747
- // BranchOnCount will genearte icmp_eq + br instructions and the
748
- // cost of branch will be calculated in VPRegionBlock.
749
- // If the vector loop only executed once, ignore the cost of the cmp.
750
747
Type *ValTy = Ctx.Types .inferScalarType (getOperand (0 ));
748
+
749
+ // If the vector loop only executed once, ignore the cost.
750
+ // TODO: We can remove this after hoist `unrollByUF` and
751
+ // `optimizeForVFandUF` which will should optimize BranchOnCount out.
751
752
auto TC = dyn_cast_if_present<ConstantInt>(
752
753
getParent ()->getPlan ()->getTripCount ()->getUnderlyingValue ());
753
754
if (TC && VF.isFixed () && TC->getSExtValue () == VF.getFixedValue ())
754
755
return 0 ;
756
+
757
+ // BranchOnCount will generate icmp_eq + br instructions and the
758
+ // cost of branch will be calculated in VPRegionBlock.
755
759
return Ctx.TTI .getCmpSelInstrCost (Instruction::ICmp, ValTy, nullptr ,
756
760
CmpInst::ICMP_EQ, Ctx.CostKind );
757
761
}
762
+ case VPInstruction::BranchOnCond: {
763
+ // BranchOnCond will generate `extractelement` when the condition is vector
764
+ // type.
765
+ VPValue *Op = getOperand (0 );
766
+ VPRecipeBase *R = Op->getDefiningRecipe ();
767
+ if (R &&
768
+ any_of (R->operands (), [&](VPValue *V) { return !R->usesScalars (V); }) &&
769
+ VF.isVector ())
770
+ return Ctx.TTI .getVectorInstrCost (
771
+ Instruction::ExtractElement,
772
+ cast<VectorType>(
773
+ toVectorTy (Ctx.Types .inferScalarType (getOperand (0 )), VF)),
774
+ Ctx.CostKind , 0 , nullptr , nullptr );
775
+
776
+ // Otherwise, BranchOnCond is free since the branch cost is already
777
+ // calculated by VPBB.
778
+ return 0 ;
779
+ }
758
780
default :
759
781
// TODO: Compute cost other VPInstructions once the legacy cost model has
760
782
// been retired.
0 commit comments