Skip to content

Commit cb4616f

Browse files
committed
[X86] Don't always seperate conditions in (br (and/or cond0, cond1)) into seperate branches
It makes sense to split if the cost of computing `cond1` is high (proportionally to how likely `cond0` is), but it doesn't really make sense to introduce a second branch if its only a few instructions. Splitting can also get in the way of potentially folding patterns. This patch introduces some logic to try to check if the cost of computing `cond1` is relatively low, and if so don't split the branches. Modest improvement on clang bootstrap build: https://llvm-compile-time-tracker.com/compare.php?from=79ce933114e46c891a5632f7ad4a004b93a5b808&to=978278eabc0bafe2f390ca8fcdad24154f954020&stat=cycles Average stage2-O3: 0.59% Improvement (cycles) Average stage2-O0-g: 1.20% Improvement (cycles) Likewise on llvm-test-suite on SKX saw a net 0.84% improvement (cycles) There is also a modest compile time improvement with this patch: https://llvm-compile-time-tracker.com/compare.php?from=79ce933114e46c891a5632f7ad4a004b93a5b808&to=978278eabc0bafe2f390ca8fcdad24154f954020&stat=instructions%3Au Note that the stage2 instruction count increases is expected, this patch trades instructions for decreasing branch-misses (which is proportionately lower): https://llvm-compile-time-tracker.com/compare.php?from=79ce933114e46c891a5632f7ad4a004b93a5b808&to=978278eabc0bafe2f390ca8fcdad24154f954020&stat=branch-misses NB: This will also likely help for APX targets with the new `CCMP` and `CTEST` instructions.
1 parent 7c4c274 commit cb4616f

28 files changed

+951
-703
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -596,6 +596,20 @@ class TargetLoweringBase {
596596
/// avoided.
597597
bool isJumpExpensive() const { return JumpIsExpensive; }
598598

599+
struct CondMergingParams {
600+
int BaseCost;
601+
int LikelyBias;
602+
int UnlikelyBias;
603+
};
604+
// Return params for deciding if we should keep two branch conditions merged
605+
// or split them into two seperate branches.
606+
virtual CondMergingParams
607+
getJumpConditionMergingParams(Instruction::BinaryOps, const Value *,
608+
const Value *) const {
609+
// -1 will always result in splitting.
610+
return {-1, -1, -1};
611+
}
612+
599613
/// Return true if selects are only cheaper than branches if the branch is
600614
/// unlikely to be predicted right.
601615
bool isPredictableSelectExpensive() const {

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 144 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "llvm/Analysis/Loads.h"
2727
#include "llvm/Analysis/MemoryLocation.h"
2828
#include "llvm/Analysis/TargetLibraryInfo.h"
29+
#include "llvm/Analysis/TargetTransformInfo.h"
2930
#include "llvm/Analysis/ValueTracking.h"
3031
#include "llvm/Analysis/VectorUtils.h"
3132
#include "llvm/CodeGen/Analysis.h"
@@ -93,6 +94,7 @@
9394
#include "llvm/Support/CommandLine.h"
9495
#include "llvm/Support/Compiler.h"
9596
#include "llvm/Support/Debug.h"
97+
#include "llvm/Support/InstructionCost.h"
9698
#include "llvm/Support/MathExtras.h"
9799
#include "llvm/Support/raw_ostream.h"
98100
#include "llvm/Target/TargetIntrinsicInfo.h"
@@ -2432,6 +2434,141 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
24322434
SL->SwitchCases.push_back(CB);
24332435
}
24342436

2437+
// Collect dependings on V recursively. This is used for the cost analysis in
2438+
// `shouldKeepJumpConditionsTogether`.
2439+
static bool
2440+
collectInstructionDeps(SmallPtrSet<const Instruction *, 8> *Deps,
2441+
const Value *V,
2442+
SmallPtrSet<const Instruction *, 8> *Necessary = nullptr,
2443+
unsigned Depth = 0) {
2444+
// Return false if we have an incomplete count.
2445+
if (Depth >= SelectionDAG::MaxRecursionDepth)
2446+
return false;
2447+
2448+
auto *I = dyn_cast<Instruction>(V);
2449+
if (I == nullptr)
2450+
return true;
2451+
2452+
if (Necessary != nullptr) {
2453+
// This instruction is necessary for the other side of the condition so
2454+
// don't count it.
2455+
if (Necessary->contains(I))
2456+
return true;
2457+
}
2458+
2459+
// Already added this dep.
2460+
if (!Deps->insert(I).second)
2461+
return true;
2462+
2463+
for (unsigned OpIdx = 0; OpIdx < I->getNumOperands(); ++OpIdx)
2464+
if (!collectInstructionDeps(Deps, I->getOperand(OpIdx), Necessary,
2465+
Depth + 1))
2466+
return false;
2467+
return true;
2468+
}
2469+
2470+
bool SelectionDAGBuilder::shouldKeepJumpConditionsTogether(
2471+
const FunctionLoweringInfo &FuncInfo, const BranchInst &I,
2472+
Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs,
2473+
TargetLoweringBase::CondMergingParams Params) const {
2474+
if (I.getNumSuccessors() != 2)
2475+
return false;
2476+
2477+
if (Params.BaseCost < 0)
2478+
return false;
2479+
2480+
// Baseline cost.
2481+
InstructionCost CostThresh = Params.BaseCost;
2482+
2483+
BranchProbabilityInfo *BPI = nullptr;
2484+
if (Params.LikelyBias || Params.UnlikelyBias)
2485+
BPI = FuncInfo.BPI;
2486+
if (BPI != nullptr) {
2487+
// See if we are either likely to get an early out or compute both lhs/rhs
2488+
// of the condition.
2489+
BasicBlock *IfFalse = I.getSuccessor(0);
2490+
BasicBlock *IfTrue = I.getSuccessor(1);
2491+
2492+
std::optional<bool> Likely;
2493+
if (BPI->isEdgeHot(I.getParent(), IfTrue))
2494+
Likely = true;
2495+
else if (BPI->isEdgeHot(I.getParent(), IfFalse))
2496+
Likely = false;
2497+
2498+
if (Likely) {
2499+
if (Opc == (*Likely ? Instruction::And : Instruction::Or))
2500+
// Its likely we will have to compute both lhs and rhs of condition
2501+
CostThresh += Params.LikelyBias;
2502+
else {
2503+
if (Params.UnlikelyBias < 0)
2504+
return false;
2505+
// Its likely we will get an early out.
2506+
CostThresh -= Params.UnlikelyBias;
2507+
}
2508+
}
2509+
}
2510+
2511+
if (CostThresh <= 0)
2512+
return false;
2513+
2514+
// Collect "all" instructions that lhs condition is dependent on.
2515+
SmallPtrSet<const Instruction *, 8> LhsDeps, RhsDeps;
2516+
collectInstructionDeps(&LhsDeps, Lhs);
2517+
// Collect "all" instructions that rhs condition is dependent on AND are
2518+
// dependencies of lhs. This gives us an estimate on which instructions we
2519+
// stand to save by splitting the condition.
2520+
if (!collectInstructionDeps(&RhsDeps, Rhs, &LhsDeps))
2521+
return false;
2522+
// Add the compare instruction itself unless its a dependency on the LHS.
2523+
if (const auto *RhsI = dyn_cast<Instruction>(Rhs))
2524+
if (!LhsDeps.contains(RhsI))
2525+
RhsDeps.insert(RhsI);
2526+
2527+
const auto &TLI = DAG.getTargetLoweringInfo();
2528+
const auto &TTI =
2529+
TLI.getTargetMachine().getTargetTransformInfo(*I.getFunction());
2530+
2531+
InstructionCost CostOfIncluding = 0;
2532+
// See if this instruction will need to computed independently of whether RHS
2533+
// is.
2534+
auto ShouldCountInsn = [&RhsDeps](const Instruction *Ins) {
2535+
for (const auto *U : Ins->users()) {
2536+
// If user is independent of RHS calculation we don't need to count it.
2537+
if (auto *UIns = dyn_cast<Instruction>(U))
2538+
if (!RhsDeps.contains(UIns))
2539+
return false;
2540+
}
2541+
return true;
2542+
};
2543+
2544+
// Prune instructions from RHS Deps that are dependencies of unrelated
2545+
// instructions.
2546+
const unsigned MaxPruneIters = 8;
2547+
// Stop after a certain point. No incorrectness from including too many
2548+
// instructions.
2549+
for (unsigned PruneIters = 0; PruneIters < MaxPruneIters; ++PruneIters) {
2550+
const Instruction *ToDrop = nullptr;
2551+
for (const auto *Ins : RhsDeps) {
2552+
if (!ShouldCountInsn(Ins)) {
2553+
ToDrop = Ins;
2554+
break;
2555+
}
2556+
}
2557+
if (ToDrop == nullptr)
2558+
break;
2559+
RhsDeps.erase(ToDrop);
2560+
}
2561+
2562+
for (const auto *Ins : RhsDeps) {
2563+
CostOfIncluding +=
2564+
TTI.getInstructionCost(Ins, TargetTransformInfo::TCK_Latency);
2565+
2566+
if (CostOfIncluding > CostThresh)
2567+
return false;
2568+
}
2569+
return true;
2570+
}
2571+
24352572
void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
24362573
MachineBasicBlock *TBB,
24372574
MachineBasicBlock *FBB,
@@ -2646,8 +2783,13 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
26462783
else if (match(BOp, m_LogicalOr(m_Value(BOp0), m_Value(BOp1))))
26472784
Opcode = Instruction::Or;
26482785

2649-
if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
2650-
match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) {
2786+
if (Opcode &&
2787+
!(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
2788+
match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value()))) &&
2789+
!shouldKeepJumpConditionsTogether(
2790+
FuncInfo, I, Opcode, BOp0, BOp1,
2791+
DAG.getTargetLoweringInfo().getJumpConditionMergingParams(
2792+
Opcode, BOp0, BOp1))) {
26512793
FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, Opcode,
26522794
getEdgeProbability(BrMBB, Succ0MBB),
26532795
getEdgeProbability(BrMBB, Succ1MBB),

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,11 @@ class SelectionDAGBuilder {
385385
N = NewN;
386386
}
387387

388+
bool shouldKeepJumpConditionsTogether(
389+
const FunctionLoweringInfo &FuncInfo, const BranchInst &I,
390+
Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs,
391+
TargetLoweringBase::CondMergingParams Params) const;
392+
388393
void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB,
389394
MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
390395
MachineBasicBlock *SwitchBB,

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,12 @@
2727
#include "llvm/ADT/StringExtras.h"
2828
#include "llvm/ADT/StringSwitch.h"
2929
#include "llvm/Analysis/BlockFrequencyInfo.h"
30+
#include "llvm/Analysis/BranchProbabilityInfo.h"
3031
#include "llvm/Analysis/ObjCARCUtil.h"
3132
#include "llvm/Analysis/ProfileSummaryInfo.h"
33+
#include "llvm/Analysis/TargetTransformInfo.h"
3234
#include "llvm/Analysis/VectorUtils.h"
35+
#include "llvm/CodeGen/FunctionLoweringInfo.h"
3336
#include "llvm/CodeGen/IntrinsicLowering.h"
3437
#include "llvm/CodeGen/MachineFrameInfo.h"
3538
#include "llvm/CodeGen/MachineFunction.h"
@@ -55,6 +58,7 @@
5558
#include "llvm/MC/MCContext.h"
5659
#include "llvm/MC/MCExpr.h"
5760
#include "llvm/MC/MCSymbol.h"
61+
#include "llvm/Support/BranchProbability.h"
5862
#include "llvm/Support/CommandLine.h"
5963
#include "llvm/Support/Debug.h"
6064
#include "llvm/Support/ErrorHandling.h"
@@ -77,6 +81,37 @@ static cl::opt<int> ExperimentalPrefInnermostLoopAlignment(
7781
"alignment set by x86-experimental-pref-loop-alignment."),
7882
cl::Hidden);
7983

84+
static cl::opt<int> BrMergingBaseCostThresh(
85+
"x86-br-merging-base-cost", cl::init(1),
86+
cl::desc(
87+
"Sets the cost threshold for when multiple conditionals will be merged "
88+
"into one branch versus be split in multiple branches. Merging "
89+
"conditionals saves branches at the cost of additional instructions. "
90+
"This value sets the instruction cost limit, below which conditionals "
91+
"will be merged, and above which conditionals will be split."),
92+
cl::Hidden);
93+
94+
static cl::opt<int> BrMergingLikelyBias(
95+
"x86-br-merging-likely-bias", cl::init(0),
96+
cl::desc("Increases 'x86-br-merging-base-cost' in cases that it is likely "
97+
"that all conditionals will be executed. For example for merging "
98+
"the conditionals (a == b && c > d), if its known that a == b is "
99+
"likely, then it is likely that if the conditionals are split "
100+
"both sides will be executed, so it may be desirable to increase "
101+
"the instruction cost threshold."),
102+
cl::Hidden);
103+
104+
static cl::opt<int> BrMergingUnlikelyBias(
105+
"x86-br-merging-unlikely-bias", cl::init(1),
106+
cl::desc(
107+
"Decreases 'x86-br-merging-base-cost' in cases that it is unlikely "
108+
"that all conditionals will be executed. For example for merging "
109+
"the conditionals (a == b && c > d), if its known that a == b is "
110+
"unlikely, then it is unlikely that if the conditionals are split "
111+
"both sides will be executed, so it may be desirable to decrease "
112+
"the instruction cost threshold."),
113+
cl::Hidden);
114+
80115
static cl::opt<bool> MulConstantOptimization(
81116
"mul-constant-optimization", cl::init(true),
82117
cl::desc("Replace 'mul x, Const' with more effective instructions like "
@@ -3339,6 +3374,24 @@ unsigned X86TargetLowering::preferedOpcodeForCmpEqPiecesOfOperand(
33393374
return ISD::SRL;
33403375
}
33413376

3377+
TargetLoweringBase::CondMergingParams
3378+
X86TargetLowering::getJumpConditionMergingParams(Instruction::BinaryOps Opc,
3379+
const Value *Lhs,
3380+
const Value *Rhs) const {
3381+
using namespace llvm::PatternMatch;
3382+
int BaseCost = BrMergingBaseCostThresh.getValue();
3383+
// a == b && a == c is a fast pattern on x86.
3384+
ICmpInst::Predicate Pred;
3385+
if (BaseCost >= 0 && Opc == Instruction::And &&
3386+
match(Lhs, m_ICmp(Pred, m_Value(), m_Value())) &&
3387+
Pred == ICmpInst::ICMP_EQ &&
3388+
match(Rhs, m_ICmp(Pred, m_Value(), m_Value())) &&
3389+
Pred == ICmpInst::ICMP_EQ)
3390+
BaseCost += 1;
3391+
return {BaseCost, BrMergingLikelyBias.getValue(),
3392+
BrMergingUnlikelyBias.getValue()};
3393+
}
3394+
33423395
bool X86TargetLowering::preferScalarizeSplat(SDNode *N) const {
33433396
return N->getOpcode() != ISD::FP_EXTEND;
33443397
}

llvm/lib/Target/X86/X86ISelLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1150,6 +1150,10 @@ namespace llvm {
11501150

11511151
bool preferScalarizeSplat(SDNode *N) const override;
11521152

1153+
CondMergingParams
1154+
getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs,
1155+
const Value *Rhs) const override;
1156+
11531157
bool shouldFoldConstantShiftPairToMask(const SDNode *N,
11541158
CombineLevel Level) const override;
11551159

llvm/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,16 @@ define i1 @loadAndRLEsource_no_exit_2E_1_label_2E_0(i32 %tmp.21.reload, i32 %tmp
1818
; CHECK-NEXT: movl _block, %esi
1919
; CHECK-NEXT: movb %al, 1(%esi,%edx)
2020
; CHECK-NEXT: cmpl %ecx, _last
21-
; CHECK-NEXT: jge LBB0_3
22-
; CHECK-NEXT: ## %bb.1: ## %label.0
21+
; CHECK-NEXT: setl %cl
2322
; CHECK-NEXT: cmpl $257, %eax ## imm = 0x101
24-
; CHECK-NEXT: je LBB0_3
25-
; CHECK-NEXT: ## %bb.2: ## %label.0.no_exit.1_crit_edge.exitStub
23+
; CHECK-NEXT: setne %al
24+
; CHECK-NEXT: testb %al, %cl
25+
; CHECK-NEXT: je LBB0_2
26+
; CHECK-NEXT: ## %bb.1: ## %label.0.no_exit.1_crit_edge.exitStub
2627
; CHECK-NEXT: movb $1, %al
2728
; CHECK-NEXT: popl %esi
2829
; CHECK-NEXT: retl
29-
; CHECK-NEXT: LBB0_3: ## %codeRepl5.exitStub
30+
; CHECK-NEXT: LBB0_2: ## %codeRepl5.exitStub
3031
; CHECK-NEXT: xorl %eax, %eax
3132
; CHECK-NEXT: popl %esi
3233
; CHECK-NEXT: retl

0 commit comments

Comments
 (0)