Skip to content

Commit a4951ec

Browse files
committed
Recommit "[X86] Don't always separate conditions in (br (and/or cond0, cond1)) into separate branches" (2nd Try)
Changes in Recommit: 1) Fix non-determanism by using `SmallMapVector` instead of `SmallPtrSet`. 2) Fix bug in dependency pruning where we discounted the actual `and/or` combining the two conditions. This lead to over pruning. Closes #81689
1 parent a81a7b9 commit a4951ec

25 files changed

+793
-622
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -596,6 +596,42 @@ class TargetLoweringBase {
596596
/// avoided.
597597
bool isJumpExpensive() const { return JumpIsExpensive; }
598598

599+
// Costs parameters used by
600+
// SelectionDAGBuilder::shouldKeepJumpConditionsTogether.
601+
// shouldKeepJumpConditionsTogether will use these parameter value to
602+
// determine if two conditions in the form `br (and/or cond1, cond2)` should
603+
// be split into two branches or left as one.
604+
//
605+
// BaseCost is the cost threshold (in latency). If the estimated latency of
606+
// computing both `cond1` and `cond2` is below the cost of just computing
607+
// `cond1` + BaseCost, the two conditions will be kept together. Otherwise
608+
// they will be split.
609+
//
610+
// LikelyBias increases BaseCost if branch probability info indicates that it
611+
// is likely that both `cond1` and `cond2` will be computed.
612+
//
613+
// UnlikelyBias decreases BaseCost if branch probability info indicates that
614+
// it is likely that both `cond1` and `cond2` will be computed.
615+
//
616+
// Set any field to -1 to make it ignored (setting BaseCost to -1 results in
617+
// `shouldKeepJumpConditionsTogether` always returning false).
618+
struct CondMergingParams {
619+
int BaseCost;
620+
int LikelyBias;
621+
int UnlikelyBias;
622+
};
623+
// Return params for deciding if we should keep two branch conditions merged
624+
// or split them into two separate branches.
625+
// Arg0: The binary op joining the two conditions (and/or).
626+
// Arg1: The first condition (cond1)
627+
// Arg2: The second condition (cond2)
628+
virtual CondMergingParams
629+
getJumpConditionMergingParams(Instruction::BinaryOps, const Value *,
630+
const Value *) const {
631+
// -1 will always result in splitting.
632+
return {-1, -1, -1};
633+
}
634+
599635
/// Return true if selects are only cheaper than branches if the branch is
600636
/// unlikely to be predicted right.
601637
bool isPredictableSelectExpensive() const {

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 155 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "llvm/Analysis/Loads.h"
2727
#include "llvm/Analysis/MemoryLocation.h"
2828
#include "llvm/Analysis/TargetLibraryInfo.h"
29+
#include "llvm/Analysis/TargetTransformInfo.h"
2930
#include "llvm/Analysis/ValueTracking.h"
3031
#include "llvm/Analysis/VectorUtils.h"
3132
#include "llvm/CodeGen/Analysis.h"
@@ -93,6 +94,7 @@
9394
#include "llvm/Support/CommandLine.h"
9495
#include "llvm/Support/Compiler.h"
9596
#include "llvm/Support/Debug.h"
97+
#include "llvm/Support/InstructionCost.h"
9698
#include "llvm/Support/MathExtras.h"
9799
#include "llvm/Support/raw_ostream.h"
98100
#include "llvm/Target/TargetIntrinsicInfo.h"
@@ -2446,6 +2448,152 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
24462448
SL->SwitchCases.push_back(CB);
24472449
}
24482450

2451+
// Collect dependencies on V recursively. This is used for the cost analysis in
2452+
// `shouldKeepJumpConditionsTogether`.
2453+
static bool collectInstructionDeps(
2454+
SmallMapVector<const Instruction *, bool, 8> *Deps, const Value *V,
2455+
SmallMapVector<const Instruction *, bool, 8> *Necessary = nullptr,
2456+
unsigned Depth = 0) {
2457+
// Return false if we have an incomplete count.
2458+
if (Depth >= SelectionDAG::MaxRecursionDepth)
2459+
return false;
2460+
2461+
auto *I = dyn_cast<Instruction>(V);
2462+
if (I == nullptr)
2463+
return true;
2464+
2465+
if (Necessary != nullptr) {
2466+
// This instruction is necessary for the other side of the condition so
2467+
// don't count it.
2468+
if (Necessary->contains(I))
2469+
return true;
2470+
}
2471+
2472+
// Already added this dep.
2473+
if (!Deps->try_emplace(I, false).second)
2474+
return true;
2475+
2476+
for (unsigned OpIdx = 0, E = I->getNumOperands(); OpIdx < E; ++OpIdx)
2477+
if (!collectInstructionDeps(Deps, I->getOperand(OpIdx), Necessary,
2478+
Depth + 1))
2479+
return false;
2480+
return true;
2481+
}
2482+
2483+
bool SelectionDAGBuilder::shouldKeepJumpConditionsTogether(
2484+
const FunctionLoweringInfo &FuncInfo, const BranchInst &I,
2485+
Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs,
2486+
TargetLoweringBase::CondMergingParams Params) const {
2487+
if (I.getNumSuccessors() != 2)
2488+
return false;
2489+
2490+
if (!I.isConditional())
2491+
return false;
2492+
2493+
if (Params.BaseCost < 0)
2494+
return false;
2495+
2496+
// Baseline cost.
2497+
InstructionCost CostThresh = Params.BaseCost;
2498+
2499+
BranchProbabilityInfo *BPI = nullptr;
2500+
if (Params.LikelyBias || Params.UnlikelyBias)
2501+
BPI = FuncInfo.BPI;
2502+
if (BPI != nullptr) {
2503+
// See if we are either likely to get an early out or compute both lhs/rhs
2504+
// of the condition.
2505+
BasicBlock *IfFalse = I.getSuccessor(0);
2506+
BasicBlock *IfTrue = I.getSuccessor(1);
2507+
2508+
std::optional<bool> Likely;
2509+
if (BPI->isEdgeHot(I.getParent(), IfTrue))
2510+
Likely = true;
2511+
else if (BPI->isEdgeHot(I.getParent(), IfFalse))
2512+
Likely = false;
2513+
2514+
if (Likely) {
2515+
if (Opc == (*Likely ? Instruction::And : Instruction::Or))
2516+
// Its likely we will have to compute both lhs and rhs of condition
2517+
CostThresh += Params.LikelyBias;
2518+
else {
2519+
if (Params.UnlikelyBias < 0)
2520+
return false;
2521+
// Its likely we will get an early out.
2522+
CostThresh -= Params.UnlikelyBias;
2523+
}
2524+
}
2525+
}
2526+
2527+
if (CostThresh <= 0)
2528+
return false;
2529+
2530+
// Collect "all" instructions that lhs condition is dependent on.
2531+
// Use map for stable iteration (to avoid non-determanism of iteration of
2532+
// SmallPtrSet). The `bool` value is just a dummy.
2533+
SmallMapVector<const Instruction *, bool, 8> LhsDeps, RhsDeps;
2534+
collectInstructionDeps(&LhsDeps, Lhs);
2535+
// Collect "all" instructions that rhs condition is dependent on AND are
2536+
// dependencies of lhs. This gives us an estimate on which instructions we
2537+
// stand to save by splitting the condition.
2538+
if (!collectInstructionDeps(&RhsDeps, Rhs, &LhsDeps))
2539+
return false;
2540+
// Add the compare instruction itself unless its a dependency on the LHS.
2541+
if (const auto *RhsI = dyn_cast<Instruction>(Rhs))
2542+
if (!LhsDeps.contains(RhsI))
2543+
RhsDeps.try_emplace(RhsI, false);
2544+
2545+
const auto &TLI = DAG.getTargetLoweringInfo();
2546+
const auto &TTI =
2547+
TLI.getTargetMachine().getTargetTransformInfo(*I.getFunction());
2548+
2549+
InstructionCost CostOfIncluding = 0;
2550+
// See if this instruction will need to computed independently of whether RHS
2551+
// is.
2552+
Value *BrCond = I.getCondition();
2553+
auto ShouldCountInsn = [&RhsDeps, &BrCond](const Instruction *Ins) {
2554+
for (const auto *U : Ins->users()) {
2555+
// If user is independent of RHS calculation we don't need to count it.
2556+
if (auto *UIns = dyn_cast<Instruction>(U))
2557+
if (UIns != BrCond && !RhsDeps.contains(UIns))
2558+
return false;
2559+
}
2560+
return true;
2561+
};
2562+
2563+
// Prune instructions from RHS Deps that are dependencies of unrelated
2564+
// instructions. The value (SelectionDAG::MaxRecursionDepth) is fairly
2565+
// arbitrary and just meant to cap the how much time we spend in the pruning
2566+
// loop. Its highly unlikely to come into affect.
2567+
const unsigned MaxPruneIters = SelectionDAG::MaxRecursionDepth;
2568+
// Stop after a certain point. No incorrectness from including too many
2569+
// instructions.
2570+
for (unsigned PruneIters = 0; PruneIters < MaxPruneIters; ++PruneIters) {
2571+
const Instruction *ToDrop = nullptr;
2572+
for (const auto &InsPair : RhsDeps) {
2573+
if (!ShouldCountInsn(InsPair.first)) {
2574+
ToDrop = InsPair.first;
2575+
break;
2576+
}
2577+
}
2578+
if (ToDrop == nullptr)
2579+
break;
2580+
RhsDeps.erase(ToDrop);
2581+
}
2582+
2583+
for (const auto &InsPair : RhsDeps) {
2584+
// Finally accumulate latency that we can only attribute to computing the
2585+
// RHS condition. Use latency because we are essentially trying to calculate
2586+
// the cost of the dependency chain.
2587+
// Possible TODO: We could try to estimate ILP and make this more precise.
2588+
CostOfIncluding +=
2589+
TTI.getInstructionCost(InsPair.first, TargetTransformInfo::TCK_Latency);
2590+
2591+
if (CostOfIncluding > CostThresh)
2592+
return false;
2593+
}
2594+
return true;
2595+
}
2596+
24492597
void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
24502598
MachineBasicBlock *TBB,
24512599
MachineBasicBlock *FBB,
@@ -2660,8 +2808,13 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
26602808
else if (match(BOp, m_LogicalOr(m_Value(BOp0), m_Value(BOp1))))
26612809
Opcode = Instruction::Or;
26622810

2663-
if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
2664-
match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) {
2811+
if (Opcode &&
2812+
!(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
2813+
match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value()))) &&
2814+
!shouldKeepJumpConditionsTogether(
2815+
FuncInfo, I, Opcode, BOp0, BOp1,
2816+
DAG.getTargetLoweringInfo().getJumpConditionMergingParams(
2817+
Opcode, BOp0, BOp1))) {
26652818
FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, Opcode,
26662819
getEdgeProbability(BrMBB, Succ0MBB),
26672820
getEdgeProbability(BrMBB, Succ1MBB),

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,11 @@ class SelectionDAGBuilder {
385385
N = NewN;
386386
}
387387

388+
bool shouldKeepJumpConditionsTogether(
389+
const FunctionLoweringInfo &FuncInfo, const BranchInst &I,
390+
Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs,
391+
TargetLoweringBase::CondMergingParams Params) const;
392+
388393
void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB,
389394
MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
390395
MachineBasicBlock *SwitchBB,

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,40 @@ static cl::opt<int> ExperimentalPrefInnermostLoopAlignment(
7777
"alignment set by x86-experimental-pref-loop-alignment."),
7878
cl::Hidden);
7979

80+
static cl::opt<int> BrMergingBaseCostThresh(
81+
"x86-br-merging-base-cost", cl::init(2),
82+
cl::desc(
83+
"Sets the cost threshold for when multiple conditionals will be merged "
84+
"into one branch versus be split in multiple branches. Merging "
85+
"conditionals saves branches at the cost of additional instructions. "
86+
"This value sets the instruction cost limit, below which conditionals "
87+
"will be merged, and above which conditionals will be split. Set to -1 "
88+
"to never merge branches."),
89+
cl::Hidden);
90+
91+
static cl::opt<int> BrMergingLikelyBias(
92+
"x86-br-merging-likely-bias", cl::init(0),
93+
cl::desc("Increases 'x86-br-merging-base-cost' in cases that it is likely "
94+
"that all conditionals will be executed. For example for merging "
95+
"the conditionals (a == b && c > d), if its known that a == b is "
96+
"likely, then it is likely that if the conditionals are split "
97+
"both sides will be executed, so it may be desirable to increase "
98+
"the instruction cost threshold. Set to -1 to never merge likely "
99+
"branches."),
100+
cl::Hidden);
101+
102+
static cl::opt<int> BrMergingUnlikelyBias(
103+
"x86-br-merging-unlikely-bias", cl::init(-1),
104+
cl::desc(
105+
"Decreases 'x86-br-merging-base-cost' in cases that it is unlikely "
106+
"that all conditionals will be executed. For example for merging "
107+
"the conditionals (a == b && c > d), if its known that a == b is "
108+
"unlikely, then it is unlikely that if the conditionals are split "
109+
"both sides will be executed, so it may be desirable to decrease "
110+
"the instruction cost threshold. Set to -1 to never merge unlikely "
111+
"branches."),
112+
cl::Hidden);
113+
80114
static cl::opt<bool> MulConstantOptimization(
81115
"mul-constant-optimization", cl::init(true),
82116
cl::desc("Replace 'mul x, Const' with more effective instructions like "
@@ -3338,6 +3372,24 @@ unsigned X86TargetLowering::preferedOpcodeForCmpEqPiecesOfOperand(
33383372
return ISD::SRL;
33393373
}
33403374

3375+
TargetLoweringBase::CondMergingParams
3376+
X86TargetLowering::getJumpConditionMergingParams(Instruction::BinaryOps Opc,
3377+
const Value *Lhs,
3378+
const Value *Rhs) const {
3379+
using namespace llvm::PatternMatch;
3380+
int BaseCost = BrMergingBaseCostThresh.getValue();
3381+
// a == b && a == c is a fast pattern on x86.
3382+
ICmpInst::Predicate Pred;
3383+
if (BaseCost >= 0 && Opc == Instruction::And &&
3384+
match(Lhs, m_ICmp(Pred, m_Value(), m_Value())) &&
3385+
Pred == ICmpInst::ICMP_EQ &&
3386+
match(Rhs, m_ICmp(Pred, m_Value(), m_Value())) &&
3387+
Pred == ICmpInst::ICMP_EQ)
3388+
BaseCost += 1;
3389+
return {BaseCost, BrMergingLikelyBias.getValue(),
3390+
BrMergingUnlikelyBias.getValue()};
3391+
}
3392+
33413393
bool X86TargetLowering::preferScalarizeSplat(SDNode *N) const {
33423394
return N->getOpcode() != ISD::FP_EXTEND;
33433395
}

llvm/lib/Target/X86/X86ISelLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1150,6 +1150,10 @@ namespace llvm {
11501150

11511151
bool preferScalarizeSplat(SDNode *N) const override;
11521152

1153+
CondMergingParams
1154+
getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs,
1155+
const Value *Rhs) const override;
1156+
11531157
bool shouldFoldConstantShiftPairToMask(const SDNode *N,
11541158
CombineLevel Level) const override;
11551159

llvm/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,16 @@ define i1 @loadAndRLEsource_no_exit_2E_1_label_2E_0(i32 %tmp.21.reload, i32 %tmp
1818
; CHECK-NEXT: movl _block, %esi
1919
; CHECK-NEXT: movb %al, 1(%esi,%edx)
2020
; CHECK-NEXT: cmpl %ecx, _last
21-
; CHECK-NEXT: jge LBB0_3
22-
; CHECK-NEXT: ## %bb.1: ## %label.0
21+
; CHECK-NEXT: setl %cl
2322
; CHECK-NEXT: cmpl $257, %eax ## imm = 0x101
24-
; CHECK-NEXT: je LBB0_3
25-
; CHECK-NEXT: ## %bb.2: ## %label.0.no_exit.1_crit_edge.exitStub
23+
; CHECK-NEXT: setne %al
24+
; CHECK-NEXT: testb %al, %cl
25+
; CHECK-NEXT: je LBB0_2
26+
; CHECK-NEXT: ## %bb.1: ## %label.0.no_exit.1_crit_edge.exitStub
2627
; CHECK-NEXT: movb $1, %al
2728
; CHECK-NEXT: popl %esi
2829
; CHECK-NEXT: retl
29-
; CHECK-NEXT: LBB0_3: ## %codeRepl5.exitStub
30+
; CHECK-NEXT: LBB0_2: ## %codeRepl5.exitStub
3031
; CHECK-NEXT: xorl %eax, %eax
3132
; CHECK-NEXT: popl %esi
3233
; CHECK-NEXT: retl

0 commit comments

Comments
 (0)