Skip to content

[X86] Don't always separate conditions in (br (and/or cond0, cond1)) into separate branches #81689

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions llvm/include/llvm/CodeGen/TargetLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,42 @@ class TargetLoweringBase {
/// avoided.
bool isJumpExpensive() const { return JumpIsExpensive; }

// Costs parameters used by
// SelectionDAGBuilder::shouldKeepJumpConditionsTogether.
// shouldKeepJumpConditionsTogether will use these parameter value to
// determine if two conditions in the form `br (and/or cond1, cond2)` should
// be split into two branches or left as one.
//
// BaseCost is the cost threshold (in latency). If the estimated latency of
// computing both `cond1` and `cond2` is below the cost of just computing
// `cond1` + BaseCost, the two conditions will be kept together. Otherwise
// they will be split.
//
// LikelyBias increases BaseCost if branch probability info indicates that it
// is likely that both `cond1` and `cond2` will be computed.
//
// UnlikelyBias decreases BaseCost if branch probability info indicates that
// it is likely that both `cond1` and `cond2` will be computed.
//
// Set any field to -1 to make it ignored (setting BaseCost to -1 results in
// `shouldKeepJumpConditionsTogether` always returning false).
struct CondMergingParams {
int BaseCost;
int LikelyBias;
int UnlikelyBias;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add comments

};
// Return params for deciding if we should keep two branch conditions merged
// or split them into two separate branches.
// Arg0: The binary op joining the two conditions (and/or).
// Arg1: The first condition (cond1)
// Arg2: The second condition (cond2)
virtual CondMergingParams
getJumpConditionMergingParams(Instruction::BinaryOps, const Value *,
const Value *) const {
// -1 will always result in splitting.
return {-1, -1, -1};
}

/// Return true if selects are only cheaper than branches if the branch is
/// unlikely to be predicted right.
bool isPredictableSelectExpensive() const {
Expand Down
157 changes: 155 additions & 2 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Analysis.h"
Expand Down Expand Up @@ -93,6 +94,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/InstructionCost.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
Expand Down Expand Up @@ -2446,6 +2448,152 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
SL->SwitchCases.push_back(CB);
}

// Collect dependencies on V recursively. This is used for the cost analysis in
// `shouldKeepJumpConditionsTogether`.
static bool collectInstructionDeps(
SmallMapVector<const Instruction *, bool, 8> *Deps, const Value *V,
SmallMapVector<const Instruction *, bool, 8> *Necessary = nullptr,
unsigned Depth = 0) {
// Return false if we have an incomplete count.
if (Depth >= SelectionDAG::MaxRecursionDepth)
return false;

auto *I = dyn_cast<Instruction>(V);
if (I == nullptr)
return true;

if (Necessary != nullptr) {
// This instruction is necessary for the other side of the condition so
// don't count it.
if (Necessary->contains(I))
return true;
}

// Already added this dep.
if (!Deps->try_emplace(I, false).second)
return true;

for (unsigned OpIdx = 0, E = I->getNumOperands(); OpIdx < E; ++OpIdx)
if (!collectInstructionDeps(Deps, I->getOperand(OpIdx), Necessary,
Depth + 1))
return false;
return true;
}

bool SelectionDAGBuilder::shouldKeepJumpConditionsTogether(
const FunctionLoweringInfo &FuncInfo, const BranchInst &I,
Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs,
TargetLoweringBase::CondMergingParams Params) const {
if (I.getNumSuccessors() != 2)
return false;

if (!I.isConditional())
return false;

if (Params.BaseCost < 0)
return false;

// Baseline cost.
InstructionCost CostThresh = Params.BaseCost;

BranchProbabilityInfo *BPI = nullptr;
if (Params.LikelyBias || Params.UnlikelyBias)
BPI = FuncInfo.BPI;
if (BPI != nullptr) {
// See if we are either likely to get an early out or compute both lhs/rhs
// of the condition.
BasicBlock *IfFalse = I.getSuccessor(0);
BasicBlock *IfTrue = I.getSuccessor(1);

std::optional<bool> Likely;
if (BPI->isEdgeHot(I.getParent(), IfTrue))
Likely = true;
else if (BPI->isEdgeHot(I.getParent(), IfFalse))
Likely = false;

if (Likely) {
if (Opc == (*Likely ? Instruction::And : Instruction::Or))
// Its likely we will have to compute both lhs and rhs of condition
CostThresh += Params.LikelyBias;
else {
if (Params.UnlikelyBias < 0)
return false;
// Its likely we will get an early out.
CostThresh -= Params.UnlikelyBias;
}
}
}

if (CostThresh <= 0)
return false;

// Collect "all" instructions that lhs condition is dependent on.
// Use map for stable iteration (to avoid non-determanism of iteration of
// SmallPtrSet). The `bool` value is just a dummy.
SmallMapVector<const Instruction *, bool, 8> LhsDeps, RhsDeps;
collectInstructionDeps(&LhsDeps, Lhs);
// Collect "all" instructions that rhs condition is dependent on AND are
// dependencies of lhs. This gives us an estimate on which instructions we
// stand to save by splitting the condition.
if (!collectInstructionDeps(&RhsDeps, Rhs, &LhsDeps))
return false;
// Add the compare instruction itself unless its a dependency on the LHS.
if (const auto *RhsI = dyn_cast<Instruction>(Rhs))
if (!LhsDeps.contains(RhsI))
RhsDeps.try_emplace(RhsI, false);

const auto &TLI = DAG.getTargetLoweringInfo();
const auto &TTI =
TLI.getTargetMachine().getTargetTransformInfo(*I.getFunction());

InstructionCost CostOfIncluding = 0;
// See if this instruction will need to computed independently of whether RHS
// is.
Value *BrCond = I.getCondition();
auto ShouldCountInsn = [&RhsDeps, &BrCond](const Instruction *Ins) {
for (const auto *U : Ins->users()) {
// If user is independent of RHS calculation we don't need to count it.
if (auto *UIns = dyn_cast<Instruction>(U))
if (UIns != BrCond && !RhsDeps.contains(UIns))
return false;
}
return true;
};

// Prune instructions from RHS Deps that are dependencies of unrelated
// instructions. The value (SelectionDAG::MaxRecursionDepth) is fairly
// arbitrary and just meant to cap the how much time we spend in the pruning
// loop. Its highly unlikely to come into affect.
const unsigned MaxPruneIters = SelectionDAG::MaxRecursionDepth;
// Stop after a certain point. No incorrectness from including too many
// instructions.
for (unsigned PruneIters = 0; PruneIters < MaxPruneIters; ++PruneIters) {
const Instruction *ToDrop = nullptr;
for (const auto &InsPair : RhsDeps) {
if (!ShouldCountInsn(InsPair.first)) {
ToDrop = InsPair.first;
break;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a question: Does it work or improve if we would erase all found Instructions, not only first found one?
(Then, it should work w/o MapVector)

for (InsPair : RhsDeps)
  // find mark insns to be erased

for (to be erased)
  RhsDeps.erase(I);

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes we could do it in bulk, but still need to repeat after each change (or max prune iters is reached). Think since we have a maximum number of pruning iterations, we still potentially could have issues if iteration order is not fixed.

}
}
if (ToDrop == nullptr)
break;
RhsDeps.erase(ToDrop);
}

for (const auto &InsPair : RhsDeps) {
// Finally accumulate latency that we can only attribute to computing the
// RHS condition. Use latency because we are essentially trying to calculate
// the cost of the dependency chain.
// Possible TODO: We could try to estimate ILP and make this more precise.
CostOfIncluding +=
TTI.getInstructionCost(InsPair.first, TargetTransformInfo::TCK_Latency);

if (CostOfIncluding > CostThresh)
return false;
}
return true;
}

void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
Expand Down Expand Up @@ -2660,8 +2808,13 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
else if (match(BOp, m_LogicalOr(m_Value(BOp0), m_Value(BOp1))))
Opcode = Instruction::Or;

if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) {
if (Opcode &&
!(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value()))) &&
!shouldKeepJumpConditionsTogether(
FuncInfo, I, Opcode, BOp0, BOp1,
DAG.getTargetLoweringInfo().getJumpConditionMergingParams(
Opcode, BOp0, BOp1))) {
FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, Opcode,
getEdgeProbability(BrMBB, Succ0MBB),
getEdgeProbability(BrMBB, Succ1MBB),
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,11 @@ class SelectionDAGBuilder {
N = NewN;
}

bool shouldKeepJumpConditionsTogether(
const FunctionLoweringInfo &FuncInfo, const BranchInst &I,
Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs,
TargetLoweringBase::CondMergingParams Params) const;

void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB,
MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
MachineBasicBlock *SwitchBB,
Expand Down
52 changes: 52 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,40 @@ static cl::opt<int> ExperimentalPrefInnermostLoopAlignment(
"alignment set by x86-experimental-pref-loop-alignment."),
cl::Hidden);

static cl::opt<int> BrMergingBaseCostThresh(
"x86-br-merging-base-cost", cl::init(2),
cl::desc(
"Sets the cost threshold for when multiple conditionals will be merged "
"into one branch versus be split in multiple branches. Merging "
"conditionals saves branches at the cost of additional instructions. "
"This value sets the instruction cost limit, below which conditionals "
"will be merged, and above which conditionals will be split. Set to -1 "
"to never merge branches."),
cl::Hidden);

static cl::opt<int> BrMergingLikelyBias(
"x86-br-merging-likely-bias", cl::init(0),
cl::desc("Increases 'x86-br-merging-base-cost' in cases that it is likely "
"that all conditionals will be executed. For example for merging "
"the conditionals (a == b && c > d), if its known that a == b is "
"likely, then it is likely that if the conditionals are split "
"both sides will be executed, so it may be desirable to increase "
"the instruction cost threshold. Set to -1 to never merge likely "
"branches."),
cl::Hidden);

static cl::opt<int> BrMergingUnlikelyBias(
"x86-br-merging-unlikely-bias", cl::init(-1),
cl::desc(
"Decreases 'x86-br-merging-base-cost' in cases that it is unlikely "
"that all conditionals will be executed. For example for merging "
"the conditionals (a == b && c > d), if its known that a == b is "
"unlikely, then it is unlikely that if the conditionals are split "
"both sides will be executed, so it may be desirable to decrease "
"the instruction cost threshold. Set to -1 to never merge unlikely "
"branches."),
cl::Hidden);

static cl::opt<bool> MulConstantOptimization(
"mul-constant-optimization", cl::init(true),
cl::desc("Replace 'mul x, Const' with more effective instructions like "
Expand Down Expand Up @@ -3333,6 +3367,24 @@ unsigned X86TargetLowering::preferedOpcodeForCmpEqPiecesOfOperand(
return ISD::SRL;
}

TargetLoweringBase::CondMergingParams
X86TargetLowering::getJumpConditionMergingParams(Instruction::BinaryOps Opc,
const Value *Lhs,
const Value *Rhs) const {
using namespace llvm::PatternMatch;
int BaseCost = BrMergingBaseCostThresh.getValue();
// a == b && a == c is a fast pattern on x86.
ICmpInst::Predicate Pred;
if (BaseCost >= 0 && Opc == Instruction::And &&
match(Lhs, m_ICmp(Pred, m_Value(), m_Value())) &&
Pred == ICmpInst::ICMP_EQ &&
match(Rhs, m_ICmp(Pred, m_Value(), m_Value())) &&
Pred == ICmpInst::ICMP_EQ)
BaseCost += 1;
return {BaseCost, BrMergingLikelyBias.getValue(),
BrMergingUnlikelyBias.getValue()};
}

bool X86TargetLowering::preferScalarizeSplat(SDNode *N) const {
return N->getOpcode() != ISD::FP_EXTEND;
}
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -1150,6 +1150,10 @@ namespace llvm {

bool preferScalarizeSplat(SDNode *N) const override;

CondMergingParams
getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs,
const Value *Rhs) const override;

bool shouldFoldConstantShiftPairToMask(const SDNode *N,
CombineLevel Level) const override;

Expand Down
11 changes: 6 additions & 5 deletions llvm/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,16 @@ define i1 @loadAndRLEsource_no_exit_2E_1_label_2E_0(i32 %tmp.21.reload, i32 %tmp
; CHECK-NEXT: movl _block, %esi
; CHECK-NEXT: movb %al, 1(%esi,%edx)
; CHECK-NEXT: cmpl %ecx, _last
; CHECK-NEXT: jge LBB0_3
; CHECK-NEXT: ## %bb.1: ## %label.0
; CHECK-NEXT: setl %cl
; CHECK-NEXT: cmpl $257, %eax ## imm = 0x101
; CHECK-NEXT: je LBB0_3
; CHECK-NEXT: ## %bb.2: ## %label.0.no_exit.1_crit_edge.exitStub
; CHECK-NEXT: setne %al
; CHECK-NEXT: testb %al, %cl
; CHECK-NEXT: je LBB0_2
; CHECK-NEXT: ## %bb.1: ## %label.0.no_exit.1_crit_edge.exitStub
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: popl %esi
; CHECK-NEXT: retl
; CHECK-NEXT: LBB0_3: ## %codeRepl5.exitStub
; CHECK-NEXT: LBB0_2: ## %codeRepl5.exitStub
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: popl %esi
; CHECK-NEXT: retl
Expand Down
Loading