Skip to content

Commit 8aee783

Browse files
committed
[VP] Cost model for VPMemory operations on PowerPC.
PPC Implementation of getVPMemoryOpCost and hasActiveVectorLength. Reviewed By: Roland Froese Differential Revision: https://reviews.llvm.org/D109417
1 parent 81f9dc8 commit 8aee783

File tree

2 files changed

+94
-0
lines changed

2 files changed

+94
-0
lines changed

llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1388,3 +1388,86 @@ bool PPCTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
13881388

13891389
return false;
13901390
}
1391+
1392+
bool PPCTTIImpl::hasActiveVectorLength(unsigned Opcode, Type *DataType,
1393+
Align Alignment) const {
1394+
// Only load and stores instructions can have variable vector length on Power.
1395+
if (Opcode != Instruction::Load && Opcode != Instruction::Store)
1396+
return false;
1397+
// Loads/stores with length instructions use bits 0-7 of the GPR operand and
1398+
// therefore cannot be used in 32-bit mode.
1399+
if ((!ST->hasP9Vector() && !ST->hasP10Vector()) || !ST->isPPC64())
1400+
return false;
1401+
if (auto *VecTy = dyn_cast<FixedVectorType>(DataType)) {
1402+
unsigned VecWidth = DataType->getPrimitiveSizeInBits();
1403+
return VecWidth == 128;
1404+
}
1405+
Type *ScalarTy = DataType->getScalarType();
1406+
1407+
if (ScalarTy->isPointerTy())
1408+
return true;
1409+
1410+
if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy())
1411+
return true;
1412+
1413+
if (!ScalarTy->isIntegerTy())
1414+
return false;
1415+
1416+
unsigned IntWidth = ScalarTy->getIntegerBitWidth();
1417+
return IntWidth == 8 || IntWidth == 16 || IntWidth == 32 || IntWidth == 64;
1418+
}
1419+
1420+
InstructionCost PPCTTIImpl::getVPMemoryOpCost(unsigned Opcode, Type *Src,
1421+
Align Alignment,
1422+
unsigned AddressSpace,
1423+
TTI::TargetCostKind CostKind,
1424+
const Instruction *I) {
1425+
InstructionCost Cost = BaseT::getVPMemoryOpCost(Opcode, Src, Alignment,
1426+
AddressSpace, CostKind, I);
1427+
if (TLI->getValueType(DL, Src, true) == MVT::Other)
1428+
return Cost;
1429+
// TODO: Handle other cost kinds.
1430+
if (CostKind != TTI::TCK_RecipThroughput)
1431+
return Cost;
1432+
1433+
assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
1434+
"Invalid Opcode");
1435+
1436+
auto *SrcVTy = dyn_cast<FixedVectorType>(Src);
1437+
assert(SrcVTy && "Expected a vector type for VP memory operations");
1438+
1439+
if (hasActiveVectorLength(Opcode, Src, Alignment)) {
1440+
std::pair<InstructionCost, MVT> LT =
1441+
TLI->getTypeLegalizationCost(DL, SrcVTy);
1442+
1443+
InstructionCost CostFactor =
1444+
vectorCostAdjustmentFactor(Opcode, Src, nullptr);
1445+
if (!CostFactor.isValid())
1446+
return InstructionCost::getMax();
1447+
1448+
InstructionCost Cost = LT.first * CostFactor;
1449+
assert(Cost.isValid() && "Expected valid cost");
1450+
1451+
// On P9 but not on P10, if the op is misaligned then it will cause a
1452+
// pipeline flush. Otherwise the VSX masked memops cost the same as unmasked
1453+
// ones.
1454+
const Align DesiredAlignment(16);
1455+
if (Alignment >= DesiredAlignment || ST->getCPUDirective() != PPC::DIR_PWR9)
1456+
return Cost;
1457+
1458+
// Since alignment may be under estimated, we try to compute the probability
1459+
// that the actual address is aligned to the desired boundary. For example
1460+
// an 8-byte aligned load is assumed to be actually 16-byte aligned half the
1461+
// time, while a 4-byte aligned load has a 25% chance of being 16-byte
1462+
// aligned.
1463+
float AlignmentProb = ((float)Alignment.value()) / DesiredAlignment.value();
1464+
float MisalignmentProb = 1.0 - AlignmentProb;
1465+
return (MisalignmentProb * P9PipelineFlushEstimate) +
1466+
(AlignmentProb * *Cost.getValue());
1467+
}
1468+
1469+
// Usually we should not get to this point, but the following is an attempt to
1470+
// model the cost of legalization. Currently we can only lower intrinsics with
1471+
// evl but no mask, on Power 9/10. Otherwise, we must scalarize.
1472+
return getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind);
1473+
}

llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,17 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
137137
bool areFunctionArgsABICompatible(const Function *Caller,
138138
const Function *Callee,
139139
SmallPtrSetImpl<Argument *> &Args) const;
140+
bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
141+
Align Alignment) const;
142+
InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
143+
unsigned AddressSpace,
144+
TTI::TargetCostKind CostKind,
145+
const Instruction *I = nullptr);
146+
147+
private:
148+
// The following constant is used for estimating costs on power9.
149+
static const InstructionCost::CostType P9PipelineFlushEstimate = 80;
150+
140151
/// @}
141152
};
142153

0 commit comments

Comments
 (0)