-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AArch64][CostModel] Improve cost estimate of scalarizing a vector di… #118055
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,6 +18,7 @@ | |
#include "llvm/CodeGen/BasicTTIImpl.h" | ||
#include "llvm/CodeGen/CostTable.h" | ||
#include "llvm/CodeGen/TargetLowering.h" | ||
#include "llvm/IR/Constants.h" | ||
#include "llvm/IR/IntrinsicInst.h" | ||
#include "llvm/IR/Intrinsics.h" | ||
#include "llvm/IR/IntrinsicsAArch64.h" | ||
|
@@ -3572,6 +3573,40 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost( | |
Cost *= 4; | ||
return Cost; | ||
} else { | ||
// If the information about individual scalars being vectorized is | ||
// available, this yeilds better cost estimation. | ||
if (auto *VTy = dyn_cast<FixedVectorType>(Ty); VTy && !Args.empty()) { | ||
assert(Args.size() % 2 == 0 && "Args size should be even"); | ||
InstructionCost InsertExtractCost = | ||
ST->getVectorInsertExtractBaseCost(); | ||
// If the cost of single sdiv is inquired through the cost-model. | ||
// FIXME: remove the isa checks once the PR 122236 lands. | ||
if (Args.size() == 2 && | ||
!(isa<ConstantVector>(Args[1]) || | ||
isa<ConstantDataVector>(Args[1]) || | ||
isa<ConstantExpr>(Args[1])) && | ||
Comment on lines
+3584
to
+3587
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I assume you do not need to pass args for this, instead you can rely on Op1Info and Op2Info There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for the test where Op1 only or Op2 only is constant(see the added tests), simply relying on Op1Info and Op2Info does not work. In such cases, we need to pass args There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the cost of the division when the divisor is constant is less than the case where the divisor is unknown. When we are considering scalarizing cost, we are considering div cost of each lane and additional insert/extract cost. This is where this patch yeilds less cost with extra knowledge about the values that go into that lanes There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Op2Info provides info about constantness of the divisor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, I told this before already, if the node is scalarized, it must be represented as a buildvector node. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please check the case There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I have tried to justify this why this wont be a better option already |
||
none_of(Args, IsaPred<UndefValue, PoisonValue>)) { | ||
unsigned NElts = VTy->getNumElements(); | ||
// Compute per element cost | ||
Cost = getArithmeticInstrCost(Opcode, VTy->getScalarType(), | ||
CostKind, Op1Info.getNoProps(), | ||
Op2Info.getNoProps()); | ||
Cost += 3 * InsertExtractCost; | ||
Cost *= NElts; | ||
return Cost; | ||
} else if (Args.size() > 2) // vectorization cost is inquired | ||
{ | ||
Cost = (3 * InsertExtractCost) * VTy->getNumElements(); | ||
for (int i = 0, Sz = Args.size(); i < Sz; i += 2) { | ||
Cost += | ||
getArithmeticInstrCost(Opcode, VTy->getScalarType(), CostKind, | ||
TTI::getOperandInfo(Args[i]), | ||
TTI::getOperandInfo(Args[i + 1])); | ||
} | ||
return Cost; | ||
} | ||
} | ||
|
||
// If one of the operands is a uniform constant then the cost for each | ||
// element is Cost for insertion, extraction and division. | ||
// Insertion cost = 2, Extraction Cost = 2, Division = cost for the | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11650,9 +11650,20 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals, | |
unsigned OpIdx = isa<UnaryOperator>(VL0) ? 0 : 1; | ||
TTI::OperandValueInfo Op1Info = getOperandInfo(E->getOperand(0)); | ||
TTI::OperandValueInfo Op2Info = getOperandInfo(E->getOperand(OpIdx)); | ||
return TTI->getArithmeticInstrCost(ShuffleOrOp, VecTy, CostKind, Op1Info, | ||
Op2Info, {}, nullptr, TLI) + | ||
CommonCost; | ||
SmallVector<Value *, 16> Operands; | ||
if (all_of(E->Scalars, [ShuffleOrOp](Value *V) { | ||
return !IsaPred<UndefValue, PoisonValue>(V) && | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
cast<Instruction>(V)->getOpcode() == ShuffleOrOp; | ||
})) { | ||
for (auto *Scalar : E->Scalars) { | ||
Instruction *I = cast<Instruction>(Scalar); | ||
auto IOperands = I->operand_values(); | ||
Operands.insert(Operands.end(), IOperands.begin(), IOperands.end()); | ||
} | ||
} | ||
return CommonCost + | ||
TTI->getArithmeticInstrCost(ShuffleOrOp, VecTy, CostKind, Op1Info, | ||
Op2Info, Operands, nullptr, TLI); | ||
}; | ||
return GetCostDiff(GetScalarCost, GetVectorCost); | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
will remove this in next version. Got this accidently