-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[LLVM][InstCombine][SVE] Refactor sve.mul/fmul combines. #134116
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1112,6 +1112,19 @@ struct SVEIntrinsicInfo { | |
return *this; | ||
} | ||
|
||
bool hasMatchingIROpode() const { return IROpcode != 0; } | ||
|
||
unsigned getMatchingIROpode() const { | ||
assert(hasMatchingIROpode() && "Propery not set!"); | ||
return IROpcode; | ||
} | ||
|
||
SVEIntrinsicInfo &setMatchingIROpcode(unsigned Opcode) { | ||
assert(!hasMatchingIROpode() && "Cannot set property twice!"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: I'm not sure I see the value of this assert, as I could imagine a use-case for someone wanting override a previously set opcode? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I consider that a bug. The intent is for an intrinsic's |
||
IROpcode = Opcode; | ||
return *this; | ||
} | ||
|
||
// | ||
// Properties relating to the result of inactive lanes. | ||
// | ||
|
@@ -1187,6 +1200,7 @@ struct SVEIntrinsicInfo { | |
unsigned GoverningPredicateIdx = std::numeric_limits<unsigned>::max(); | ||
|
||
Intrinsic::ID UndefIntrinsic = Intrinsic::not_intrinsic; | ||
unsigned IROpcode = 0; | ||
|
||
enum PredicationStyle { | ||
Uninitialized, | ||
|
@@ -1270,7 +1284,8 @@ static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) { | |
case Intrinsic::aarch64_sve_fmls: | ||
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fmls_u); | ||
case Intrinsic::aarch64_sve_fmul: | ||
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fmul_u); | ||
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fmul_u) | ||
.setMatchingIROpcode(Instruction::FMul); | ||
case Intrinsic::aarch64_sve_fmulx: | ||
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fmulx_u); | ||
case Intrinsic::aarch64_sve_fnmla: | ||
|
@@ -1286,7 +1301,8 @@ static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) { | |
case Intrinsic::aarch64_sve_mls: | ||
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_mls_u); | ||
case Intrinsic::aarch64_sve_mul: | ||
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_mul_u); | ||
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_mul_u) | ||
.setMatchingIROpcode(Instruction::Mul); | ||
case Intrinsic::aarch64_sve_sabd: | ||
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_sabd_u); | ||
case Intrinsic::aarch64_sve_smax: | ||
|
@@ -1324,6 +1340,13 @@ static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) { | |
case Intrinsic::aarch64_sve_uqsub: | ||
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_uqsub_u); | ||
|
||
case Intrinsic::aarch64_sve_fmul_u: | ||
return SVEIntrinsicInfo::defaultUndefOp().setMatchingIROpcode( | ||
Instruction::FMul); | ||
case Intrinsic::aarch64_sve_mul_u: | ||
return SVEIntrinsicInfo::defaultUndefOp().setMatchingIROpcode( | ||
Instruction::Mul); | ||
|
||
case Intrinsic::aarch64_sve_addqv: | ||
case Intrinsic::aarch64_sve_and_z: | ||
case Intrinsic::aarch64_sve_bic_z: | ||
|
@@ -2206,45 +2229,63 @@ static std::optional<Instruction *> instCombineSVEVectorSub(InstCombiner &IC, | |
return std::nullopt; | ||
} | ||
|
||
static std::optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC, | ||
IntrinsicInst &II) { | ||
auto *OpPredicate = II.getOperand(0); | ||
auto *OpMultiplicand = II.getOperand(1); | ||
auto *OpMultiplier = II.getOperand(2); | ||
// Simplify `V` by only considering the operations that affect active lanes. | ||
// This function should only return existing Values or newly created Constants. | ||
static Value *stripInactiveLanes(Value *V, const Value *Pg) { | ||
auto *Dup = dyn_cast<IntrinsicInst>(V); | ||
if (Dup && Dup->getIntrinsicID() == Intrinsic::aarch64_sve_dup && | ||
Dup->getOperand(1) == Pg && isa<Constant>(Dup->getOperand(2))) | ||
return ConstantVector::getSplat( | ||
cast<VectorType>(V->getType())->getElementCount(), | ||
cast<Constant>(Dup->getOperand(2))); | ||
|
||
return V; | ||
} | ||
|
||
// Return true if a given instruction is a unit splat value, false otherwise. | ||
auto IsUnitSplat = [](auto *I) { | ||
auto *SplatValue = getSplatValue(I); | ||
if (!SplatValue) | ||
return false; | ||
return match(SplatValue, m_FPOne()) || match(SplatValue, m_One()); | ||
}; | ||
static std::optional<Instruction *> | ||
instCombineSVEVectorMul(InstCombiner &IC, IntrinsicInst &II, | ||
const SVEIntrinsicInfo &IInfo) { | ||
const unsigned Opc = IInfo.getMatchingIROpode(); | ||
if (!Instruction::isBinaryOp(Opc)) | ||
return std::nullopt; | ||
|
||
// Return true if a given instruction is an aarch64_sve_dup intrinsic call | ||
// with a unit splat value, false otherwise. | ||
auto IsUnitDup = [](auto *I) { | ||
auto *IntrI = dyn_cast<IntrinsicInst>(I); | ||
if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::aarch64_sve_dup) | ||
return false; | ||
Value *Pg = II.getOperand(0); | ||
Value *Op1 = II.getOperand(1); | ||
Value *Op2 = II.getOperand(2); | ||
const DataLayout &DL = II.getDataLayout(); | ||
|
||
auto *SplatValue = IntrI->getOperand(2); | ||
return match(SplatValue, m_FPOne()) || match(SplatValue, m_One()); | ||
}; | ||
// Canonicalise constants to the RHS. | ||
if (Instruction::isCommutative(Opc) && IInfo.inactiveLanesAreNotDefined() && | ||
isa<Constant>(Op1) && !isa<Constant>(Op2)) { | ||
IC.replaceOperand(II, 1, Op2); | ||
IC.replaceOperand(II, 2, Op1); | ||
return &II; | ||
} | ||
Comment on lines
+2257
to
+2263
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If this code would be removed, would this case normally be handled by There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. This change is not related to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If I look at the code-path it takes, then removing this code would always send it down the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, I think I misunderstood the original question? I figured you meant "without this code does Canonicalisation is not a simplification because simplification can only return an existing operand (or child operand) or generate a new constant. Canonicalising the operands would require There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Thanks, that clarifies it, my question was indeed if this function did the canonicalisation. If the answer is no, this code makes sense. |
||
|
||
if (IsUnitSplat(OpMultiplier)) { | ||
// [f]mul pg %n, (dupx 1) => %n | ||
OpMultiplicand->takeName(&II); | ||
return IC.replaceInstUsesWith(II, OpMultiplicand); | ||
} else if (IsUnitDup(OpMultiplier)) { | ||
// [f]mul pg %n, (dup pg 1) => %n | ||
auto *DupInst = cast<IntrinsicInst>(OpMultiplier); | ||
auto *DupPg = DupInst->getOperand(1); | ||
// TODO: this is naive. The optimization is still valid if DupPg | ||
// 'encompasses' OpPredicate, not only if they're the same predicate. | ||
if (OpPredicate == DupPg) { | ||
OpMultiplicand->takeName(&II); | ||
return IC.replaceInstUsesWith(II, OpMultiplicand); | ||
} | ||
// Only active lanes matter when simplifying the operation. | ||
Op1 = stripInactiveLanes(Op1, Pg); | ||
Op2 = stripInactiveLanes(Op2, Pg); | ||
Comment on lines
+2265
to
+2267
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there value in stripping the inactive lanes before the "Canonicalise constants to the RHS" above, so that the case above may hit more often? (e.g. a case of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The canonicalisation does not relate to The benefit would be to simplify the writing of future combines to remove the need to check both operands. Is this something you want me to implement now or defer until such combines exist? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If there's no value at this point, I don't think there's a need. |
||
|
||
Value *SimpleII; | ||
if (auto FII = dyn_cast<FPMathOperator>(&II)) | ||
SimpleII = simplifyBinOp(Opc, Op1, Op2, FII->getFastMathFlags(), DL); | ||
else | ||
SimpleII = simplifyBinOp(Opc, Op1, Op2, DL); | ||
|
||
if (SimpleII) { | ||
if (IInfo.inactiveLanesAreNotDefined()) | ||
return IC.replaceInstUsesWith(II, SimpleII); | ||
|
||
Value *Inactive = | ||
II.getOperand(IInfo.getOperandIdxInactiveLanesTakenFrom()); | ||
|
||
// The intrinsic does nothing (e.g. sve.mul(pg, A, 1.0)). | ||
if (SimpleII == Inactive) | ||
return IC.replaceInstUsesWith(II, SimpleII); | ||
|
||
// Inactive lanes must be preserved. | ||
SimpleII = IC.Builder.CreateSelect(Pg, SimpleII, Inactive); | ||
return IC.replaceInstUsesWith(II, SimpleII); | ||
} | ||
|
||
return instCombineSVEVectorBinOp(IC, II); | ||
|
@@ -2651,9 +2692,9 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, | |
case Intrinsic::aarch64_sve_fadd_u: | ||
return instCombineSVEVectorFAddU(IC, II); | ||
case Intrinsic::aarch64_sve_fmul: | ||
return instCombineSVEVectorMul(IC, II); | ||
return instCombineSVEVectorMul(IC, II, IInfo); | ||
case Intrinsic::aarch64_sve_fmul_u: | ||
return instCombineSVEVectorMul(IC, II); | ||
return instCombineSVEVectorMul(IC, II, IInfo); | ||
case Intrinsic::aarch64_sve_fsub: | ||
return instCombineSVEVectorFSub(IC, II); | ||
case Intrinsic::aarch64_sve_fsub_u: | ||
|
@@ -2665,9 +2706,9 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, | |
Intrinsic::aarch64_sve_mla_u>( | ||
IC, II, true); | ||
case Intrinsic::aarch64_sve_mul: | ||
return instCombineSVEVectorMul(IC, II); | ||
return instCombineSVEVectorMul(IC, II, IInfo); | ||
case Intrinsic::aarch64_sve_mul_u: | ||
return instCombineSVEVectorMul(IC, II); | ||
return instCombineSVEVectorMul(IC, II, IInfo); | ||
case Intrinsic::aarch64_sve_sub: | ||
return instCombineSVEVectorSub(IC, II); | ||
case Intrinsic::aarch64_sve_sub_u: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There's an existing comment above that hopefully gives enough information but please shout if you feel something more bespoke is required.