Skip to content

[RISCV] Optimize conditional branches that can be statically evaluated #131684

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions llvm/lib/CodeGen/BranchFolding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,7 @@ static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB,
DebugLoc dl = CurMBB->findBranchDebugLoc();
if (!dl)
dl = BranchDL;
if (I != MF->end() && !TII->analyzeBranch(*CurMBB, TBB, FBB, Cond, true)) {
if (I != MF->end() && !TII->analyzeBranch(*CurMBB, TBB, FBB, Cond)) {
MachineBasicBlock *NextBB = &*I;
if (TBB == NextBB && !Cond.empty() && !FBB) {
if (!TII->reverseBranchCondition(Cond)) {
Expand Down Expand Up @@ -1132,7 +1132,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {

MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
if (!TII->analyzeBranch(*PBB, TBB, FBB, Cond, true)) {
if (!TII->analyzeBranch(*PBB, TBB, FBB, Cond)) {
// Failing case: IBB is the target of a cbr, and we cannot reverse the
// branch.
SmallVector<MachineOperand, 4> NewCond(Cond);
Expand Down Expand Up @@ -1589,7 +1589,8 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
// Loop: xxx; jcc Out; jmp Loop
// we want:
// Loop: xxx; jncc Loop; jmp Out
if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) {
if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB &&
!CurCond.empty()) {
SmallVector<MachineOperand, 4> NewCond(CurCond);
if (!TII->reverseBranchCondition(NewCond)) {
DebugLoc Dl = MBB->findBranchDebugLoc();
Expand Down
29 changes: 25 additions & 4 deletions llvm/lib/CodeGen/PeepholeOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,8 @@ class PeepholeOptimizer : private MachineFunction::Delegate {
MachineDominatorTree *DT = nullptr; // Machine dominator tree
MachineLoopInfo *MLI = nullptr;

bool NeedToInvalidateMLI = false;

public:
PeepholeOptimizer(MachineDominatorTree *DT, MachineLoopInfo *MLI)
: DT(DT), MLI(MLI) {}
Expand All @@ -444,6 +446,7 @@ class PeepholeOptimizer : private MachineFunction::Delegate {
/// Sequence of instructions that formulate recurrence cycle.
using RecurrenceCycle = SmallVector<RecurrenceInstr, 4>;

bool needToInvalidateMLI() const { return NeedToInvalidateMLI; }
private:
bool optimizeCmpInstr(MachineInstr &MI);
bool optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB,
Expand Down Expand Up @@ -566,10 +569,8 @@ class PeepholeOptimizerLegacy : public MachineFunctionPass {
bool runOnMachineFunction(MachineFunction &MF) override;

void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
AU.addRequired<MachineLoopInfoWrapperPass>();
AU.addPreserved<MachineLoopInfoWrapperPass>();
if (Aggressive) {
AU.addRequired<MachineDominatorTreeWrapperPass>();
AU.addPreserved<MachineDominatorTreeWrapperPass>();
Expand Down Expand Up @@ -1660,8 +1661,10 @@ PeepholeOptimizerPass::run(MachineFunction &MF,

auto PA = getMachineFunctionPassPreservedAnalyses();
PA.preserve<MachineDominatorTreeAnalysis>();
PA.preserve<MachineLoopAnalysis>();
PA.preserveSet<CFGAnalyses>();
if (!Impl.needToInvalidateMLI()) {
PA.preserve<MachineLoopAnalysis>();
PA.preserveSet<CFGAnalyses>();
}
return PA;
}

Expand Down Expand Up @@ -1791,6 +1794,24 @@ bool PeepholeOptimizer::run(MachineFunction &MF) {
}

if (MI->isConditionalBranch() && optimizeCondBranch(*MI)) {
NeedToInvalidateMLI = true;
// optimizeCondBranch might have converted a conditional branch to
// an unconditional branch. If there is a branch instruction after it,
// delete it.
MachineInstr *NewBr = &*std::prev(MII);
if (NewBr->isUnconditionalBranch()) {
if (MII != MBB.end()) {
MachineInstr *Dead = &*MII;
MachineBasicBlock *DeadDest = TII->getBranchDestBlock(*Dead);
if (DT && TII->getBranchDestBlock(*NewBr) != DeadDest) {
DT->deleteEdge(&MBB, DeadDest);
MLI->calculate(*DT);
NeedToInvalidateMLI = false;
}
++MII;
Dead->eraseFromParent();
}
}
Changed = true;
continue;
}
Expand Down
127 changes: 106 additions & 21 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1071,6 +1071,12 @@ bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
// Handle a single conditional branch.
if (NumTerminators == 1 && I->getDesc().isConditionalBranch()) {
parseCondBranch(*I, TBB, Cond);
// Try and optimize the conditional branch.
if (AllowModify) {
optimizeCondBranch(*I);
// The branch might have changed, reanalyze it.
return analyzeBranch(MBB, TBB, FBB, Cond, false);
}
return false;
}

Expand All @@ -1079,6 +1085,14 @@ bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
I->getDesc().isUnconditionalBranch()) {
parseCondBranch(*std::prev(I), TBB, Cond);
FBB = getBranchDestBlock(*I);
// Try and optimize the pair.
if (AllowModify) {
if (optimizeCondBranch(*std::prev(I)))
I->eraseFromParent();

// The branch might have changed, reanalyze it.
return analyzeBranch(MBB, TBB, FBB, Cond, false);
}
return false;
}

Expand Down Expand Up @@ -1234,7 +1248,8 @@ bool RISCVInstrInfo::reverseBranchCondition(

bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const {
MachineBasicBlock *MBB = MI.getParent();
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
if (!MBB)
return false;

MachineBasicBlock *TBB, *FBB;
SmallVector<MachineOperand, 3> Cond;
Expand All @@ -1244,8 +1259,97 @@ bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const {
RISCVCC::CondCode CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());
assert(CC != RISCVCC::COND_INVALID);

if (CC == RISCVCC::COND_EQ || CC == RISCVCC::COND_NE)
// Right now we only care about LI (i.e. ADDI x0, imm)
auto isLoadImm = [](const MachineInstr *MI, int64_t &Imm) -> bool {
if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(1).isReg() &&
MI->getOperand(1).getReg() == RISCV::X0) {
Imm = MI->getOperand(2).getImm();
return true;
}
return false;
};

MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
// Either a load from immediate instruction or X0.
auto isFromLoadImm = [&](const MachineOperand &Op, int64_t &Imm) -> bool {
if (!Op.isReg())
return false;
Register Reg = Op.getReg();
if (Reg == RISCV::X0) {
Imm = 0;
return true;
}
return Reg.isVirtual() && isLoadImm(MRI.getVRegDef(Reg), Imm);
};

// Try and convert a conditional branch that can be evaluated statically
// into an unconditional branch.
MachineBasicBlock *Folded = nullptr;
int64_t C0, C1;
if (isFromLoadImm(Cond[1], C0) && isFromLoadImm(Cond[2], C1)) {
switch (CC) {
case RISCVCC::COND_INVALID:
llvm_unreachable("Unexpected CC");
case RISCVCC::COND_EQ: {
Folded = (C0 == C1) ? TBB : FBB;
break;
}
case RISCVCC::COND_NE: {
Folded = (C0 != C1) ? TBB : FBB;
break;
}
case RISCVCC::COND_LT: {
Folded = (C0 < C1) ? TBB : FBB;
break;
}
case RISCVCC::COND_GE: {
Folded = (C0 >= C1) ? TBB : FBB;
break;
}
case RISCVCC::COND_LTU: {
Folded = ((uint64_t)C0 < (uint64_t)C1) ? TBB : FBB;
break;
}
case RISCVCC::COND_GEU: {
Folded = ((uint64_t)C0 >= (uint64_t)C1) ? TBB : FBB;
break;
}
}

// Do the conversion
// Build the new unconditional branch
DebugLoc DL = MBB->findBranchDebugLoc();
if (Folded) {
BuildMI(*MBB, MI, DL, get(RISCV::PseudoBR)).addMBB(Folded);
} else {
MachineFunction::iterator Fallthrough = ++MBB->getIterator();
if (Fallthrough == MBB->getParent()->end())
return false;
BuildMI(*MBB, MI, DL, get(RISCV::PseudoBR)).addMBB(&*Fallthrough);
}

// Update successors of MBB.
if (Folded == TBB) {
// If we're taking TBB, then the succ to delete is the fallthrough (if
// it was a succ in the first place), or its the MBB from the
// unconditional branch.
if (!FBB) {
MachineFunction::iterator Fallthrough = ++MBB->getIterator();
if (Fallthrough != MBB->getParent()->end() &&
MBB->isSuccessor(&*Fallthrough))
MBB->removeSuccessor(&*Fallthrough, true);
} else {
MBB->removeSuccessor(FBB, true);
}
} else if (Folded == FBB) {
// If we're taking the fallthrough or unconditional branch, then the
// succ to remove is the one from the conditional branch.
MBB->removeSuccessor(TBB, true);
}

MI.eraseFromParent();
return true;
}

// For two constants C0 and C1 from
// ```
Expand All @@ -1264,24 +1368,6 @@ bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const {
//
// To make sure this optimization is really beneficial, we only
// optimize for cases where Y had only one use (i.e. only used by the branch).

// Right now we only care about LI (i.e. ADDI x0, imm)
auto isLoadImm = [](const MachineInstr *MI, int64_t &Imm) -> bool {
if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(1).isReg() &&
MI->getOperand(1).getReg() == RISCV::X0) {
Imm = MI->getOperand(2).getImm();
return true;
}
return false;
};
// Either a load from immediate instruction or X0.
auto isFromLoadImm = [&](const MachineOperand &Op, int64_t &Imm) -> bool {
if (!Op.isReg())
return false;
Register Reg = Op.getReg();
return Reg.isVirtual() && isLoadImm(MRI.getVRegDef(Reg), Imm);
};

MachineOperand &LHS = MI.getOperand(0);
MachineOperand &RHS = MI.getOperand(1);
// Try to find the register for constant Z; return
Expand All @@ -1300,7 +1386,6 @@ bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const {
};

bool Modify = false;
int64_t C0;
if (isFromLoadImm(LHS, C0) && MRI.hasOneUse(LHS.getReg())) {
// Might be case 1.
// Signed integer overflow is UB. (UINT64_MAX is bigger so we don't need
Expand Down
3 changes: 2 additions & 1 deletion llvm/test/CodeGen/AArch64/O3-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,8 @@
; CHECK-NEXT: Machine code sinking
; CHECK-NEXT: Peephole Optimizations
; CHECK-NEXT: Remove dead machine instructions
; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: Machine Natural Loop Construction
; CHECK-NEXT: AArch64 MI Peephole Optimization pass
; CHECK-NEXT: AArch64 Dead register definitions
; CHECK-NEXT: Detect Dead Lanes
Expand All @@ -169,7 +171,6 @@
; CHECK-NEXT: Live Variable Analysis
; CHECK-NEXT: Eliminate PHI nodes for register allocation
; CHECK-NEXT: Two-Address instruction pass
; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: Slot index numbering
; CHECK-NEXT: Live Interval Analysis
; CHECK-NEXT: Register Coalescer
Expand Down
10 changes: 7 additions & 3 deletions llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,7 @@
; GCN-O1-NEXT: Remove unreachable machine basic blocks
; GCN-O1-NEXT: Live Variable Analysis
; GCN-O1-NEXT: MachineDominator Tree Construction
; GCN-O1-NEXT: Machine Natural Loop Construction
; GCN-O1-NEXT: SI Optimize VGPR LiveRange
; GCN-O1-NEXT: Eliminate PHI nodes for register allocation
; GCN-O1-NEXT: SI Lower control flow pseudo instructions
Expand Down Expand Up @@ -639,8 +640,9 @@
; GCN-O1-OPTS-NEXT: GCN DPP Combine
; GCN-O1-OPTS-NEXT: SI Load Store Optimizer
; GCN-O1-OPTS-NEXT: SI Peephole SDWA
; GCN-O1-OPTS-NEXT: Machine Block Frequency Analysis
; GCN-O1-OPTS-NEXT: MachineDominator Tree Construction
; GCN-O1-OPTS-NEXT: Machine Natural Loop Construction
; GCN-O1-OPTS-NEXT: Machine Block Frequency Analysis
; GCN-O1-OPTS-NEXT: Early Machine Loop Invariant Code Motion
; GCN-O1-OPTS-NEXT: MachineDominator Tree Construction
; GCN-O1-OPTS-NEXT: Machine Block Frequency Analysis
Expand Down Expand Up @@ -957,8 +959,9 @@
; GCN-O2-NEXT: GCN DPP Combine
; GCN-O2-NEXT: SI Load Store Optimizer
; GCN-O2-NEXT: SI Peephole SDWA
; GCN-O2-NEXT: Machine Block Frequency Analysis
; GCN-O2-NEXT: MachineDominator Tree Construction
; GCN-O2-NEXT: Machine Natural Loop Construction
; GCN-O2-NEXT: Machine Block Frequency Analysis
; GCN-O2-NEXT: Early Machine Loop Invariant Code Motion
; GCN-O2-NEXT: MachineDominator Tree Construction
; GCN-O2-NEXT: Machine Block Frequency Analysis
Expand Down Expand Up @@ -1289,8 +1292,9 @@
; GCN-O3-NEXT: GCN DPP Combine
; GCN-O3-NEXT: SI Load Store Optimizer
; GCN-O3-NEXT: SI Peephole SDWA
; GCN-O3-NEXT: Machine Block Frequency Analysis
; GCN-O3-NEXT: MachineDominator Tree Construction
; GCN-O3-NEXT: Machine Natural Loop Construction
; GCN-O3-NEXT: Machine Block Frequency Analysis
; GCN-O3-NEXT: Early Machine Loop Invariant Code Motion
; GCN-O3-NEXT: MachineDominator Tree Construction
; GCN-O3-NEXT: Machine Block Frequency Analysis
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/ARM/O3-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@
; CHECK-NEXT: Peephole Optimizations
; CHECK-NEXT: Remove dead machine instructions
; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: Machine Natural Loop Construction
; CHECK-NEXT: Slot index numbering
; CHECK-NEXT: Live Interval Analysis
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
Expand Down
3 changes: 2 additions & 1 deletion llvm/test/CodeGen/LoongArch/opt-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,10 @@
; LAXX-NEXT: Process Implicit Definitions
; LAXX-NEXT: Remove unreachable machine basic blocks
; LAXX-NEXT: Live Variable Analysis
; LAXX-NEXT: MachineDominator Tree Construction
; LAXX-NEXT: Machine Natural Loop Construction
; LAXX-NEXT: Eliminate PHI nodes for register allocation
; LAXX-NEXT: Two-Address instruction pass
; LAXX-NEXT: MachineDominator Tree Construction
; LAXX-NEXT: Slot index numbering
; LAXX-NEXT: Live Interval Analysis
; LAXX-NEXT: Register Coalescer
Expand Down
20 changes: 0 additions & 20 deletions llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll
Original file line number Diff line number Diff line change
Expand Up @@ -357,11 +357,6 @@ define i64 @ctpop_i64(i64 %a) nounwind {
define i1 @ctpop_i64_ugt_two(i64 %a) nounwind {
; RV32I-LABEL: ctpop_i64_ugt_two:
; RV32I: # %bb.0:
; RV32I-NEXT: beqz zero, .LBB6_2
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks like a missed post legalizer constant fold in GISel.

; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: sltiu a0, zero, 0
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB6_2:
; RV32I-NEXT: srli a2, a0, 1
; RV32I-NEXT: lui a3, 349525
; RV32I-NEXT: lui a4, 209715
Expand Down Expand Up @@ -404,11 +399,6 @@ define i1 @ctpop_i64_ugt_two(i64 %a) nounwind {
;
; RV32ZBB-LABEL: ctpop_i64_ugt_two:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: beqz zero, .LBB6_2
; RV32ZBB-NEXT: # %bb.1:
; RV32ZBB-NEXT: sltiu a0, zero, 0
; RV32ZBB-NEXT: ret
; RV32ZBB-NEXT: .LBB6_2:
; RV32ZBB-NEXT: cpop a0, a0
; RV32ZBB-NEXT: cpop a1, a1
; RV32ZBB-NEXT: add a0, a1, a0
Expand All @@ -422,11 +412,6 @@ define i1 @ctpop_i64_ugt_two(i64 %a) nounwind {
define i1 @ctpop_i64_ugt_one(i64 %a) nounwind {
; RV32I-LABEL: ctpop_i64_ugt_one:
; RV32I: # %bb.0:
; RV32I-NEXT: beqz zero, .LBB7_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: snez a0, zero
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB7_2:
; RV32I-NEXT: srli a2, a0, 1
; RV32I-NEXT: lui a3, 349525
; RV32I-NEXT: lui a4, 209715
Expand Down Expand Up @@ -470,11 +455,6 @@ define i1 @ctpop_i64_ugt_one(i64 %a) nounwind {
;
; RV32ZBB-LABEL: ctpop_i64_ugt_one:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: beqz zero, .LBB7_2
; RV32ZBB-NEXT: # %bb.1:
; RV32ZBB-NEXT: snez a0, zero
; RV32ZBB-NEXT: ret
; RV32ZBB-NEXT: .LBB7_2:
; RV32ZBB-NEXT: cpop a0, a0
; RV32ZBB-NEXT: cpop a1, a1
; RV32ZBB-NEXT: add a0, a1, a0
Expand Down
Loading
Loading