-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[X86] Don't always separate conditions in (br (and/or cond0, cond1))
into separate branches
#81689
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,6 +26,7 @@ | |
#include "llvm/Analysis/Loads.h" | ||
#include "llvm/Analysis/MemoryLocation.h" | ||
#include "llvm/Analysis/TargetLibraryInfo.h" | ||
#include "llvm/Analysis/TargetTransformInfo.h" | ||
#include "llvm/Analysis/ValueTracking.h" | ||
#include "llvm/Analysis/VectorUtils.h" | ||
#include "llvm/CodeGen/Analysis.h" | ||
|
@@ -93,6 +94,7 @@ | |
#include "llvm/Support/CommandLine.h" | ||
#include "llvm/Support/Compiler.h" | ||
#include "llvm/Support/Debug.h" | ||
#include "llvm/Support/InstructionCost.h" | ||
#include "llvm/Support/MathExtras.h" | ||
#include "llvm/Support/raw_ostream.h" | ||
#include "llvm/Target/TargetIntrinsicInfo.h" | ||
|
@@ -2446,6 +2448,152 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, | |
SL->SwitchCases.push_back(CB); | ||
} | ||
|
||
// Collect dependencies on V recursively. This is used for the cost analysis in | ||
// `shouldKeepJumpConditionsTogether`. | ||
static bool collectInstructionDeps( | ||
SmallMapVector<const Instruction *, bool, 8> *Deps, const Value *V, | ||
SmallMapVector<const Instruction *, bool, 8> *Necessary = nullptr, | ||
unsigned Depth = 0) { | ||
// Return false if we have an incomplete count. | ||
if (Depth >= SelectionDAG::MaxRecursionDepth) | ||
return false; | ||
|
||
auto *I = dyn_cast<Instruction>(V); | ||
if (I == nullptr) | ||
return true; | ||
|
||
if (Necessary != nullptr) { | ||
// This instruction is necessary for the other side of the condition so | ||
// don't count it. | ||
if (Necessary->contains(I)) | ||
return true; | ||
} | ||
|
||
// Already added this dep. | ||
if (!Deps->try_emplace(I, false).second) | ||
return true; | ||
|
||
for (unsigned OpIdx = 0, E = I->getNumOperands(); OpIdx < E; ++OpIdx) | ||
if (!collectInstructionDeps(Deps, I->getOperand(OpIdx), Necessary, | ||
Depth + 1)) | ||
return false; | ||
return true; | ||
} | ||
|
||
bool SelectionDAGBuilder::shouldKeepJumpConditionsTogether( | ||
const FunctionLoweringInfo &FuncInfo, const BranchInst &I, | ||
Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs, | ||
TargetLoweringBase::CondMergingParams Params) const { | ||
if (I.getNumSuccessors() != 2) | ||
return false; | ||
|
||
if (!I.isConditional()) | ||
return false; | ||
|
||
if (Params.BaseCost < 0) | ||
return false; | ||
|
||
// Baseline cost. | ||
InstructionCost CostThresh = Params.BaseCost; | ||
|
||
BranchProbabilityInfo *BPI = nullptr; | ||
if (Params.LikelyBias || Params.UnlikelyBias) | ||
BPI = FuncInfo.BPI; | ||
if (BPI != nullptr) { | ||
// See if we are either likely to get an early out or compute both lhs/rhs | ||
// of the condition. | ||
BasicBlock *IfFalse = I.getSuccessor(0); | ||
BasicBlock *IfTrue = I.getSuccessor(1); | ||
|
||
std::optional<bool> Likely; | ||
if (BPI->isEdgeHot(I.getParent(), IfTrue)) | ||
Likely = true; | ||
else if (BPI->isEdgeHot(I.getParent(), IfFalse)) | ||
Likely = false; | ||
|
||
if (Likely) { | ||
if (Opc == (*Likely ? Instruction::And : Instruction::Or)) | ||
// Its likely we will have to compute both lhs and rhs of condition | ||
CostThresh += Params.LikelyBias; | ||
else { | ||
if (Params.UnlikelyBias < 0) | ||
return false; | ||
// Its likely we will get an early out. | ||
CostThresh -= Params.UnlikelyBias; | ||
} | ||
} | ||
} | ||
|
||
if (CostThresh <= 0) | ||
return false; | ||
|
||
// Collect "all" instructions that lhs condition is dependent on. | ||
// Use map for stable iteration (to avoid non-determanism of iteration of | ||
// SmallPtrSet). The `bool` value is just a dummy. | ||
SmallMapVector<const Instruction *, bool, 8> LhsDeps, RhsDeps; | ||
collectInstructionDeps(&LhsDeps, Lhs); | ||
// Collect "all" instructions that rhs condition is dependent on AND are | ||
// dependencies of lhs. This gives us an estimate on which instructions we | ||
// stand to save by splitting the condition. | ||
if (!collectInstructionDeps(&RhsDeps, Rhs, &LhsDeps)) | ||
return false; | ||
// Add the compare instruction itself unless its a dependency on the LHS. | ||
if (const auto *RhsI = dyn_cast<Instruction>(Rhs)) | ||
if (!LhsDeps.contains(RhsI)) | ||
RhsDeps.try_emplace(RhsI, false); | ||
|
||
const auto &TLI = DAG.getTargetLoweringInfo(); | ||
const auto &TTI = | ||
TLI.getTargetMachine().getTargetTransformInfo(*I.getFunction()); | ||
|
||
InstructionCost CostOfIncluding = 0; | ||
// See if this instruction will need to computed independently of whether RHS | ||
// is. | ||
Value *BrCond = I.getCondition(); | ||
auto ShouldCountInsn = [&RhsDeps, &BrCond](const Instruction *Ins) { | ||
for (const auto *U : Ins->users()) { | ||
// If user is independent of RHS calculation we don't need to count it. | ||
if (auto *UIns = dyn_cast<Instruction>(U)) | ||
if (UIns != BrCond && !RhsDeps.contains(UIns)) | ||
return false; | ||
} | ||
return true; | ||
}; | ||
|
||
// Prune instructions from RHS Deps that are dependencies of unrelated | ||
// instructions. The value (SelectionDAG::MaxRecursionDepth) is fairly | ||
// arbitrary and just meant to cap the how much time we spend in the pruning | ||
// loop. Its highly unlikely to come into affect. | ||
const unsigned MaxPruneIters = SelectionDAG::MaxRecursionDepth; | ||
// Stop after a certain point. No incorrectness from including too many | ||
// instructions. | ||
for (unsigned PruneIters = 0; PruneIters < MaxPruneIters; ++PruneIters) { | ||
const Instruction *ToDrop = nullptr; | ||
for (const auto &InsPair : RhsDeps) { | ||
if (!ShouldCountInsn(InsPair.first)) { | ||
ToDrop = InsPair.first; | ||
break; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just a question: Does it work or improve if we would erase all found Instructions, not only first found one?
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes we could do it in bulk, but still need to repeat after each change (or max prune iters is reached). Think since we have a maximum number of pruning iterations, we still potentially could have issues if iteration order is not fixed. |
||
} | ||
} | ||
if (ToDrop == nullptr) | ||
break; | ||
RhsDeps.erase(ToDrop); | ||
} | ||
|
||
for (const auto &InsPair : RhsDeps) { | ||
// Finally accumulate latency that we can only attribute to computing the | ||
// RHS condition. Use latency because we are essentially trying to calculate | ||
// the cost of the dependency chain. | ||
// Possible TODO: We could try to estimate ILP and make this more precise. | ||
CostOfIncluding += | ||
TTI.getInstructionCost(InsPair.first, TargetTransformInfo::TCK_Latency); | ||
|
||
if (CostOfIncluding > CostThresh) | ||
return false; | ||
} | ||
return true; | ||
} | ||
|
||
void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, | ||
MachineBasicBlock *TBB, | ||
MachineBasicBlock *FBB, | ||
|
@@ -2660,8 +2808,13 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { | |
else if (match(BOp, m_LogicalOr(m_Value(BOp0), m_Value(BOp1)))) | ||
Opcode = Instruction::Or; | ||
|
||
if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) && | ||
match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) { | ||
if (Opcode && | ||
!(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) && | ||
match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value()))) && | ||
!shouldKeepJumpConditionsTogether( | ||
FuncInfo, I, Opcode, BOp0, BOp1, | ||
DAG.getTargetLoweringInfo().getJumpConditionMergingParams( | ||
Opcode, BOp0, BOp1))) { | ||
FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, Opcode, | ||
getEdgeProbability(BrMBB, Succ0MBB), | ||
getEdgeProbability(BrMBB, Succ1MBB), | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add comments