-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV][CostModel] Add getRISCVInstructionCost() to TTI for Cost… #73651
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
e3965f7
9ff94a2
45ac18e
d136582
3d6c042
3cfbca0
0921879
fce7beb
5edd165
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,6 +34,65 @@ static cl::opt<unsigned> SLPMaxVF( | |
"exclusively by SLP vectorizer."), | ||
cl::Hidden); | ||
|
||
InstructionCost | ||
RISCVTTIImpl::getRISCVInstructionCost(ArrayRef<unsigned> OpCodes, MVT VT, | ||
TTI::TargetCostKind CostKind) { | ||
size_t NumInstr = OpCodes.size(); | ||
if (CostKind == TTI::TCK_CodeSize) | ||
return NumInstr; | ||
InstructionCost LMULCost = TLI->getLMULCost(VT); | ||
if ((CostKind != TTI::TCK_RecipThroughput) && (CostKind != TTI::TCK_Latency)) | ||
return LMULCost * NumInstr; | ||
InstructionCost Cost = 0; | ||
for (auto Op : OpCodes) { | ||
switch (Op) { | ||
case RISCV::VRGATHER_VI: | ||
Cost += TLI->getVRGatherVICost(VT); | ||
break; | ||
case RISCV::VRGATHER_VV: | ||
Cost += TLI->getVRGatherVVCost(VT); | ||
break; | ||
case RISCV::VSLIDEUP_VI: | ||
case RISCV::VSLIDEDOWN_VI: | ||
Cost += TLI->getVSlideVICost(VT); | ||
break; | ||
case RISCV::VSLIDEUP_VX: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. .vx and .vi slides are somewhat different. The .vi instructions know at decode time how far to slide so hardware can know early which source DLEN pieces are needed for each DLEN piece of the result. .vx requires the sources to determined after looking at the scalar register. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I think it would be good to expand getVSlideCost to getVSlideVXCost and getVSlideVICost. Fixed. |
||
case RISCV::VSLIDEDOWN_VX: | ||
Cost += TLI->getVSlideVXCost(VT); | ||
break; | ||
case RISCV::VREDMAX_VS: | ||
case RISCV::VREDMIN_VS: | ||
case RISCV::VREDMAXU_VS: | ||
case RISCV::VREDMINU_VS: | ||
case RISCV::VREDSUM_VS: | ||
case RISCV::VREDAND_VS: | ||
case RISCV::VREDOR_VS: | ||
case RISCV::VREDXOR_VS: | ||
case RISCV::VFREDMAX_VS: | ||
case RISCV::VFREDMIN_VS: | ||
case RISCV::VFREDUSUM_VS: { | ||
unsigned VL = VT.getVectorMinNumElements(); | ||
if (!VT.isFixedLengthVector()) | ||
VL *= *getVScaleForTuning(); | ||
Cost += Log2_32_Ceil(VL); | ||
break; | ||
} | ||
case RISCV::VFREDOSUM_VS: { | ||
unsigned VL = VT.getVectorMinNumElements(); | ||
if (!VT.isFixedLengthVector()) | ||
VL *= *getVScaleForTuning(); | ||
Cost += VL; | ||
break; | ||
} | ||
case RISCV::VMV_S_X: | ||
// FIXME: VMV_S_X doesn't use LMUL, the cost should be 1 | ||
default: | ||
Cost += LMULCost; | ||
} | ||
} | ||
return Cost; | ||
} | ||
|
||
InstructionCost RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty, | ||
TTI::TargetCostKind CostKind) { | ||
assert(Ty->isIntegerTy() && | ||
|
@@ -279,7 +338,8 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, | |
// Example sequence: | ||
// vnsrl.wi v10, v8, 0 | ||
if (equal(DeinterleaveMask, Mask)) | ||
return LT.first * TLI->getLMULCost(LT.second); | ||
return LT.first * getRISCVInstructionCost(RISCV::VNSRL_WI, | ||
LT.second, CostKind); | ||
} | ||
} | ||
} | ||
|
@@ -290,7 +350,8 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, | |
LT.second.getVectorNumElements() <= 256)) { | ||
VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, Tp->getContext()); | ||
InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind); | ||
return IndexCost + TLI->getVRGatherVVCost(LT.second); | ||
return IndexCost + | ||
getRISCVInstructionCost(RISCV::VRGATHER_VV, LT.second, CostKind); | ||
} | ||
[[fallthrough]]; | ||
} | ||
|
@@ -308,7 +369,10 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, | |
VectorType *MaskTy = VectorType::get(IntegerType::getInt1Ty(C), EC); | ||
InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind); | ||
InstructionCost MaskCost = getConstantPoolLoadCost(MaskTy, CostKind); | ||
return 2 * IndexCost + 2 * TLI->getVRGatherVVCost(LT.second) + MaskCost; | ||
return 2 * IndexCost + | ||
getRISCVInstructionCost({RISCV::VRGATHER_VV, RISCV::VRGATHER_VV}, | ||
LT.second, CostKind) + | ||
MaskCost; | ||
} | ||
[[fallthrough]]; | ||
} | ||
|
@@ -363,19 +427,24 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, | |
// Example sequence: | ||
// vsetivli zero, 4, e8, mf2, tu, ma (ignored) | ||
// vslidedown.vi v8, v9, 2 | ||
return LT.first * TLI->getVSlideCost(LT.second); | ||
return LT.first * | ||
getRISCVInstructionCost(RISCV::VSLIDEDOWN_VI, LT.second, CostKind); | ||
case TTI::SK_InsertSubvector: | ||
// Example sequence: | ||
// vsetivli zero, 4, e8, mf2, tu, ma (ignored) | ||
// vslideup.vi v8, v9, 2 | ||
return LT.first * TLI->getVSlideCost(LT.second); | ||
return LT.first * | ||
getRISCVInstructionCost(RISCV::VSLIDEUP_VI, LT.second, CostKind); | ||
case TTI::SK_Select: { | ||
// Example sequence: | ||
// li a0, 90 | ||
// vsetivli zero, 8, e8, mf2, ta, ma (ignored) | ||
// vmv.s.x v0, a0 | ||
// vmerge.vvm v8, v9, v8, v0 | ||
return LT.first * 3 * TLI->getLMULCost(LT.second); | ||
return LT.first * | ||
(TLI->getLMULCost(LT.second) + // FIXME: should be 1 for li | ||
getRISCVInstructionCost({RISCV::VMV_S_X, RISCV::VMERGE_VVM}, | ||
LT.second, CostKind)); | ||
} | ||
case TTI::SK_Broadcast: { | ||
bool HasScalar = (Args.size() > 0) && (Operator::getOpcode(Args[0]) == | ||
|
@@ -387,7 +456,10 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, | |
// vsetivli zero, 2, e8, mf8, ta, ma (ignored) | ||
// vmv.v.x v8, a0 | ||
// vmsne.vi v0, v8, 0 | ||
return LT.first * TLI->getLMULCost(LT.second) * 3; | ||
return LT.first * | ||
(TLI->getLMULCost(LT.second) + // FIXME: should be 1 for andi | ||
getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI}, | ||
LT.second, CostKind)); | ||
} | ||
// Example sequence: | ||
// vsetivli zero, 2, e8, mf8, ta, mu (ignored) | ||
|
@@ -398,24 +470,40 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, | |
// vmv.v.x v8, a0 | ||
// vmsne.vi v0, v8, 0 | ||
|
||
return LT.first * TLI->getLMULCost(LT.second) * 6; | ||
return LT.first * | ||
(TLI->getLMULCost(LT.second) + // FIXME: this should be 1 for andi | ||
TLI->getLMULCost( | ||
LT.second) + // FIXME: vmv.x.s is the same as extractelement | ||
getRISCVInstructionCost({RISCV::VMV_V_I, RISCV::VMERGE_VIM, | ||
RISCV::VMV_V_X, RISCV::VMSNE_VI}, | ||
LT.second, CostKind)); | ||
} | ||
|
||
if (HasScalar) { | ||
// Example sequence: | ||
// vmv.v.x v8, a0 | ||
return LT.first * TLI->getLMULCost(LT.second); | ||
return LT.first * | ||
getRISCVInstructionCost(RISCV::VMV_V_X, LT.second, CostKind); | ||
} | ||
|
||
// Example sequence: | ||
// vrgather.vi v9, v8, 0 | ||
return LT.first * TLI->getVRGatherVICost(LT.second); | ||
return LT.first * | ||
getRISCVInstructionCost(RISCV::VRGATHER_VI, LT.second, CostKind); | ||
} | ||
case TTI::SK_Splice: | ||
case TTI::SK_Splice: { | ||
// vslidedown+vslideup. | ||
// TODO: Multiplying by LT.first implies this legalizes into multiple copies | ||
// of similar code, but I think we expand through memory. | ||
return 2 * LT.first * TLI->getVSlideCost(LT.second); | ||
ArrayRef<unsigned> Opcodes; | ||
if (Index >= 0 && Index < 32) | ||
Opcodes = {RISCV::VSLIDEDOWN_VI, RISCV::VSLIDEUP_VX}; | ||
else if (Index < 0 && Index > -32) | ||
Opcodes = {RISCV::VSLIDEDOWN_VX, RISCV::VSLIDEUP_VI}; | ||
else | ||
Opcodes = {RISCV::VSLIDEDOWN_VX, RISCV::VSLIDEUP_VX}; | ||
return LT.first * getRISCVInstructionCost(Opcodes, LT.second, CostKind); | ||
} | ||
case TTI::SK_Reverse: { | ||
// TODO: Cases to improve here: | ||
// * Illegal vector types | ||
|
@@ -435,7 +523,9 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, | |
if (LT.second.isFixedLengthVector()) | ||
// vrsub.vi has a 5 bit immediate field, otherwise an li suffices | ||
LenCost = isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1; | ||
InstructionCost GatherCost = 2 + TLI->getVRGatherVVCost(LT.second); | ||
// FIXME: replace the constant `2` below with cost of {VID_V,VRSUB_VX} | ||
InstructionCost GatherCost = | ||
2 + getRISCVInstructionCost(RISCV::VRGATHER_VV, LT.second, CostKind); | ||
// Mask operation additionally required extend and truncate | ||
InstructionCost ExtendCost = Tp->getElementType()->isIntegerTy(1) ? 3 : 0; | ||
return LT.first * (LenCost + GatherCost + ExtendCost); | ||
|
Uh oh!
There was an error while loading. Please reload this page.