Skip to content

[TTI][RISCV] Deduplicate type-based VP costing #115983

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 61 additions & 62 deletions llvm/include/llvm/CodeGen/BasicTTIImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1574,6 +1574,67 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
if (Intrinsic::isTargetIntrinsic(IID))
return TargetTransformInfo::TCC_Basic;

// VP Intrinsics should have the same cost as their non-vp counterpart.
// TODO: Adjust the cost to make the vp intrinsic cheaper than its non-vp
// counterpart when the vector length argument is smaller than the maximum
// vector length.
// TODO: Support other kinds of VPIntrinsics
if (VPIntrinsic::isVPIntrinsic(ICA.getID())) {
std::optional<unsigned> FOp =
VPIntrinsic::getFunctionalOpcodeForVP(ICA.getID());
if (FOp) {
if (ICA.getID() == Intrinsic::vp_load) {
Align Alignment;
if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst()))
Alignment = VPI->getPointerAlignment().valueOrOne();
unsigned AS = 0;
if (ICA.getArgTypes().size() > 1)
if (auto *PtrTy = dyn_cast<PointerType>(ICA.getArgTypes()[0]))
AS = PtrTy->getAddressSpace();
return thisT()->getMemoryOpCost(*FOp, ICA.getReturnType(), Alignment,
AS, CostKind);
}
if (ICA.getID() == Intrinsic::vp_store) {
Align Alignment;
if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst()))
Alignment = VPI->getPointerAlignment().valueOrOne();
unsigned AS = 0;
if (ICA.getArgTypes().size() >= 2)
if (auto *PtrTy = dyn_cast<PointerType>(ICA.getArgTypes()[1]))
AS = PtrTy->getAddressSpace();
return thisT()->getMemoryOpCost(*FOp, ICA.getArgTypes()[0], Alignment,
AS, CostKind);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Querying getMemoryOpCost() without underlying instruction which may cause ARM TTI to return wrong costs.
But I think it's fine since they are not using vp intrinsics.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. This is from the existing code though so the non type-based cost would have been incorrect anyway

}
if (VPBinOpIntrinsic::isVPBinOp(ICA.getID())) {
return thisT()->getArithmeticInstrCost(*FOp, ICA.getReturnType(),
CostKind);
}
}

std::optional<Intrinsic::ID> FID =
VPIntrinsic::getFunctionalIntrinsicIDForVP(ICA.getID());
if (FID) {
// Non-vp version will have same arg types except mask and vector
// length.
assert(ICA.getArgTypes().size() >= 2 &&
"Expected VPIntrinsic to have Mask and Vector Length args and "
"types");
ArrayRef<Type *> NewTys = ArrayRef(ICA.getArgTypes()).drop_back(2);

// VPReduction intrinsics have a start value argument that their non-vp
// counterparts do not have, except for the fadd and fmul non-vp
// counterpart.
if (VPReductionIntrinsic::isVPReduction(ICA.getID()) &&
*FID != Intrinsic::vector_reduce_fadd &&
*FID != Intrinsic::vector_reduce_fmul)
NewTys = NewTys.drop_front();

IntrinsicCostAttributes NewICA(*FID, ICA.getReturnType(), NewTys,
ICA.getFlags());
return thisT()->getIntrinsicInstrCost(NewICA, CostKind);
}
}

if (ICA.isTypeBasedOnly())
return getTypeBasedIntrinsicInstrCost(ICA, CostKind);

Expand Down Expand Up @@ -1834,68 +1895,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
}
}

// VP Intrinsics should have the same cost as their non-vp counterpart.
// TODO: Adjust the cost to make the vp intrinsic cheaper than its non-vp
// counterpart when the vector length argument is smaller than the maximum
// vector length.
// TODO: Support other kinds of VPIntrinsics
if (VPIntrinsic::isVPIntrinsic(ICA.getID())) {
std::optional<unsigned> FOp =
VPIntrinsic::getFunctionalOpcodeForVP(ICA.getID());
if (FOp) {
if (ICA.getID() == Intrinsic::vp_load) {
Align Alignment;
if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst()))
Alignment = VPI->getPointerAlignment().valueOrOne();
unsigned AS = 0;
if (ICA.getArgs().size() > 1)
if (auto *PtrTy =
dyn_cast<PointerType>(ICA.getArgs()[0]->getType()))
AS = PtrTy->getAddressSpace();
return thisT()->getMemoryOpCost(*FOp, ICA.getReturnType(), Alignment,
AS, CostKind);
}
if (ICA.getID() == Intrinsic::vp_store) {
Align Alignment;
if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst()))
Alignment = VPI->getPointerAlignment().valueOrOne();
unsigned AS = 0;
if (ICA.getArgs().size() >= 2)
if (auto *PtrTy =
dyn_cast<PointerType>(ICA.getArgs()[1]->getType()))
AS = PtrTy->getAddressSpace();
return thisT()->getMemoryOpCost(*FOp, Args[0]->getType(), Alignment,
AS, CostKind);
}
if (VPBinOpIntrinsic::isVPBinOp(ICA.getID())) {
return thisT()->getArithmeticInstrCost(*FOp, ICA.getReturnType(),
CostKind);
}
}

std::optional<Intrinsic::ID> FID =
VPIntrinsic::getFunctionalIntrinsicIDForVP(ICA.getID());
if (FID) {
// Non-vp version will have same Args/Tys except mask and vector length.
assert(ICA.getArgs().size() >= 2 && ICA.getArgTypes().size() >= 2 &&
"Expected VPIntrinsic to have Mask and Vector Length args and "
"types");
ArrayRef<Type *> NewTys = ArrayRef(ICA.getArgTypes()).drop_back(2);

// VPReduction intrinsics have a start value argument that their non-vp
// counterparts do not have, except for the fadd and fmul non-vp
// counterpart.
if (VPReductionIntrinsic::isVPReduction(ICA.getID()) &&
*FID != Intrinsic::vector_reduce_fadd &&
*FID != Intrinsic::vector_reduce_fmul)
NewTys = NewTys.drop_front();

IntrinsicCostAttributes NewICA(*FID, ICA.getReturnType(), NewTys,
ICA.getFlags());
return thisT()->getIntrinsicInstrCost(NewICA, CostKind);
}
}

// Assume that we need to scalarize this intrinsic.)
// Compute the scalarization overhead based on Args for a vector
// intrinsic.
Expand Down
37 changes: 0 additions & 37 deletions llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1104,26 +1104,6 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
return Cost * LT.first;
break;
}
// vp integer arithmetic ops.
case Intrinsic::vp_add:
case Intrinsic::vp_and:
case Intrinsic::vp_ashr:
case Intrinsic::vp_lshr:
case Intrinsic::vp_mul:
case Intrinsic::vp_or:
case Intrinsic::vp_sdiv:
case Intrinsic::vp_shl:
case Intrinsic::vp_srem:
case Intrinsic::vp_sub:
case Intrinsic::vp_udiv:
case Intrinsic::vp_urem:
case Intrinsic::vp_xor:
// vp float arithmetic ops.
case Intrinsic::vp_fadd:
case Intrinsic::vp_fsub:
case Intrinsic::vp_fmul:
case Intrinsic::vp_fdiv:
case Intrinsic::vp_frem:
case Intrinsic::vp_fneg: {
std::optional<unsigned> FOp =
VPIntrinsic::getFunctionalOpcodeForVP(ICA.getID());
Expand Down Expand Up @@ -1164,23 +1144,6 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
return getCmpSelInstrCost(*FOp, ICA.getArgTypes()[0], ICA.getReturnType(),
UI->getPredicate(), CostKind);
}
// vp load/store
case Intrinsic::vp_load:
case Intrinsic::vp_store: {
if (!ICA.getInst())
break;
Intrinsic::ID IID = ICA.getID();
std::optional<unsigned> FOp = VPIntrinsic::getFunctionalOpcodeForVP(IID);
assert(FOp.has_value());
auto *UI = cast<VPIntrinsic>(ICA.getInst());
if (ICA.getID() == Intrinsic::vp_load)
return getMemoryOpCost(
*FOp, ICA.getReturnType(), UI->getPointerAlignment(),
UI->getOperand(0)->getType()->getPointerAddressSpace(), CostKind);
return getMemoryOpCost(
*FOp, ICA.getArgTypes()[0], UI->getPointerAlignment(),
UI->getOperand(1)->getType()->getPointerAddressSpace(), CostKind);
}
case Intrinsic::vp_select: {
Intrinsic::ID IID = ICA.getID();
std::optional<unsigned> FOp = VPIntrinsic::getFunctionalOpcodeForVP(IID);
Expand Down
Loading
Loading