Skip to content

Commit 7985cef

Browse files
[BranchFolding][RISCV] Optimize branches NE/EQ to zero or one
There are two changes here. The first that we teach analyzeBranch how to evaluate a conditional branch followed by a unconditional branch such that we can evaluate the conditional branch statically. Often, we will see comparison to one or zero since SelectionDAG often uses i1 for the conditional comparison. As a result, we handle this specific case. We handle only EQ and NEQ for now, but this can be expanded in the future. We can also expand on handling arbitrary constants in the future. The second change is that we pass AllowModify=false to analyzeBranch in the tail merging code. The reason we do this is because this code is doing some clever tricks to the branch code that it will restore later. Now that we are actually optimizing branches in analyzeBranch, we have to be careful not to mess up this canonical form that the tail merging code expects.
1 parent 4511b8c commit 7985cef

File tree

8 files changed

+192
-108
lines changed

8 files changed

+192
-108
lines changed

llvm/lib/CodeGen/BranchFolding.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -467,7 +467,7 @@ static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB,
467467
DebugLoc dl = CurMBB->findBranchDebugLoc();
468468
if (!dl)
469469
dl = BranchDL;
470-
if (I != MF->end() && !TII->analyzeBranch(*CurMBB, TBB, FBB, Cond, true)) {
470+
if (I != MF->end() && !TII->analyzeBranch(*CurMBB, TBB, FBB, Cond)) {
471471
MachineBasicBlock *NextBB = &*I;
472472
if (TBB == NextBB && !Cond.empty() && !FBB) {
473473
if (!TII->reverseBranchCondition(Cond)) {
@@ -1107,7 +1107,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
11071107

11081108
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
11091109
SmallVector<MachineOperand, 4> Cond;
1110-
if (!TII->analyzeBranch(*PBB, TBB, FBB, Cond, true)) {
1110+
if (!TII->analyzeBranch(*PBB, TBB, FBB, Cond)) {
11111111
// Failing case: IBB is the target of a cbr, and we cannot reverse the
11121112
// branch.
11131113
SmallVector<MachineOperand, 4> NewCond(Cond);
@@ -1564,7 +1564,8 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
15641564
// Loop: xxx; jcc Out; jmp Loop
15651565
// we want:
15661566
// Loop: xxx; jncc Loop; jmp Out
1567-
if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) {
1567+
if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB &&
1568+
!CurCond.empty()) {
15681569
SmallVector<MachineOperand, 4> NewCond(CurCond);
15691570
if (!TII->reverseBranchCondition(NewCond)) {
15701571
DebugLoc Dl = MBB->findBranchDebugLoc();

llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

Lines changed: 112 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1005,6 +1005,109 @@ RISCVCC::CondCode RISCVCC::getOppositeBranchCondition(RISCVCC::CondCode CC) {
10051005
}
10061006
}
10071007

1008+
bool RISCVInstrInfo::trySimplifyCondBr(
1009+
MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
1010+
SmallVectorImpl<MachineOperand> &Cond) const {
1011+
1012+
if (!TBB || Cond.size() != 3)
1013+
return false;
1014+
1015+
RISCVCC::CondCode CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());
1016+
auto LHS = Cond[1];
1017+
auto RHS = Cond[2];
1018+
1019+
// Return true if MO definitely contains the value one.
1020+
auto isOne = [](MachineOperand &MO) -> bool {
1021+
if (MO.isImm() && MO.getImm() == 1)
1022+
return true;
1023+
1024+
if (!MO.isReg() || !MO.getReg().isVirtual())
1025+
return false;
1026+
1027+
MachineRegisterInfo &MRI =
1028+
MO.getParent()->getParent()->getParent()->getRegInfo();
1029+
MachineInstr *DefMI = MRI.getUniqueVRegDef(MO.getReg());
1030+
if (!DefMI)
1031+
return false;
1032+
1033+
// For now, just check the canonical one value.
1034+
if (DefMI->getOpcode() == RISCV::ADDI &&
1035+
DefMI->getOperand(1).getReg() == RISCV::X0 &&
1036+
DefMI->getOperand(2).getImm() == 1)
1037+
return true;
1038+
1039+
return false;
1040+
};
1041+
1042+
// Return true if MO definitely contains the value zero.
1043+
auto isZero = [](MachineOperand &MO) -> bool {
1044+
if (MO.isImm() && MO.getImm() == 0)
1045+
return true;
1046+
if (MO.isReg() && MO.getReg() == RISCV::X0)
1047+
return true;
1048+
return false;
1049+
};
1050+
1051+
MachineBasicBlock *Folded = nullptr;
1052+
switch (CC) {
1053+
default:
1054+
// TODO: Implement for more CCs
1055+
return false;
1056+
case RISCVCC::COND_EQ: {
1057+
// We can statically evaluate that we take the first branch
1058+
if ((isZero(LHS) && isZero(RHS)) || (isOne(LHS) && isOne(RHS))) {
1059+
Folded = TBB;
1060+
break;
1061+
}
1062+
// We can statically evaluate that we take the second branch
1063+
if ((isZero(LHS) && isOne(RHS)) || (isOne(LHS) && isZero(RHS))) {
1064+
Folded = FBB;
1065+
break;
1066+
}
1067+
return false;
1068+
}
1069+
case RISCVCC::COND_NE: {
1070+
// We can statically evaluate that we take the first branch
1071+
if ((isOne(LHS) && isZero(RHS)) || (isZero(LHS) && isOne(RHS))) {
1072+
Folded = TBB;
1073+
break;
1074+
}
1075+
// We can statically evaluate that we take the second branch
1076+
if ((isZero(LHS) && isZero(RHS)) || (isOne(LHS) && isOne(RHS))) {
1077+
Folded = FBB;
1078+
break;
1079+
}
1080+
return false;
1081+
}
1082+
}
1083+
1084+
// At this point, its legal to optimize.
1085+
removeBranch(MBB);
1086+
Cond.clear();
1087+
1088+
// Only need to insert a branch if we're not falling through.
1089+
if (Folded) {
1090+
DebugLoc DL = MBB.findBranchDebugLoc();
1091+
insertBranch(MBB, Folded, nullptr, {}, DL);
1092+
}
1093+
1094+
// Update the successors. Remove them all and add back the correct one.
1095+
while (!MBB.succ_empty())
1096+
MBB.removeSuccessor(MBB.succ_end() - 1);
1097+
1098+
// If it's a fallthrough, we need to figure out where MBB is going.
1099+
if (!Folded) {
1100+
MachineFunction::iterator Fallthrough = ++MBB.getIterator();
1101+
if (Fallthrough != MBB.getParent()->end())
1102+
MBB.addSuccessor(&*Fallthrough);
1103+
} else
1104+
MBB.addSuccessor(Folded);
1105+
1106+
TBB = Folded;
1107+
FBB = nullptr;
1108+
return true;
1109+
}
1110+
10081111
bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
10091112
MachineBasicBlock *&TBB,
10101113
MachineBasicBlock *&FBB,
@@ -1062,14 +1165,22 @@ bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
10621165
// Handle a single conditional branch.
10631166
if (NumTerminators == 1 && I->getDesc().isConditionalBranch()) {
10641167
parseCondBranch(*I, TBB, Cond);
1168+
// Try to fold the branch of the conditional branch into a the fallthru.
1169+
if (AllowModify)
1170+
trySimplifyCondBr(MBB, TBB, FBB, Cond);
10651171
return false;
10661172
}
10671173

10681174
// Handle a conditional branch followed by an unconditional branch.
10691175
if (NumTerminators == 2 && std::prev(I)->getDesc().isConditionalBranch() &&
10701176
I->getDesc().isUnconditionalBranch()) {
1071-
parseCondBranch(*std::prev(I), TBB, Cond);
1177+
MachineInstr &CondBr = *std::prev(I);
1178+
parseCondBranch(CondBr, TBB, Cond);
10721179
FBB = getBranchDestBlock(*I);
1180+
// Try to fold the branch of the conditional branch into an unconditional
1181+
// branch.
1182+
if (AllowModify)
1183+
trySimplifyCondBr(MBB, TBB, FBB, Cond);
10731184
return false;
10741185
}
10751186

llvm/lib/Target/RISCV/RISCVInstrInfo.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,26 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
319319
const MachineInstr &MI2) const;
320320
bool hasReassociableVectorSibling(const MachineInstr &Inst,
321321
bool &Commuted) const;
322+
/// Return true if the branch represented by the conditional branch with
323+
/// components TBB, FBB, and CurCond was folded into an unconditional branch.
324+
///
325+
/// If FBB is nullptr, then the the input represents a conditional branch with
326+
/// a fallthrough.
327+
///
328+
/// For example:
329+
/// BRCOND EQ 0, 0, BB1
330+
/// BR BB2
331+
///
332+
/// can be simplified to BR BB1 since 0 == 0 statically. On the other hand,
333+
///
334+
///
335+
/// BRCOND EQ 0, 1, BB1
336+
/// BR BB2
337+
///
338+
/// can be simplified to BR BB2 because 0 != 1 statically.
339+
bool trySimplifyCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
340+
MachineBasicBlock *FBB,
341+
SmallVectorImpl<MachineOperand> &Cond) const;
322342
};
323343

324344
namespace RISCV {

llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -357,11 +357,6 @@ define i64 @ctpop_i64(i64 %a) nounwind {
357357
define i1 @ctpop_i64_ugt_two(i64 %a) nounwind {
358358
; RV32I-LABEL: ctpop_i64_ugt_two:
359359
; RV32I: # %bb.0:
360-
; RV32I-NEXT: beqz zero, .LBB6_2
361-
; RV32I-NEXT: # %bb.1:
362-
; RV32I-NEXT: sltiu a0, zero, 0
363-
; RV32I-NEXT: ret
364-
; RV32I-NEXT: .LBB6_2:
365360
; RV32I-NEXT: srli a2, a0, 1
366361
; RV32I-NEXT: lui a3, 349525
367362
; RV32I-NEXT: lui a4, 209715
@@ -404,11 +399,6 @@ define i1 @ctpop_i64_ugt_two(i64 %a) nounwind {
404399
;
405400
; RV32ZBB-LABEL: ctpop_i64_ugt_two:
406401
; RV32ZBB: # %bb.0:
407-
; RV32ZBB-NEXT: beqz zero, .LBB6_2
408-
; RV32ZBB-NEXT: # %bb.1:
409-
; RV32ZBB-NEXT: sltiu a0, zero, 0
410-
; RV32ZBB-NEXT: ret
411-
; RV32ZBB-NEXT: .LBB6_2:
412402
; RV32ZBB-NEXT: cpop a0, a0
413403
; RV32ZBB-NEXT: cpop a1, a1
414404
; RV32ZBB-NEXT: add a0, a1, a0
@@ -422,11 +412,6 @@ define i1 @ctpop_i64_ugt_two(i64 %a) nounwind {
422412
define i1 @ctpop_i64_ugt_one(i64 %a) nounwind {
423413
; RV32I-LABEL: ctpop_i64_ugt_one:
424414
; RV32I: # %bb.0:
425-
; RV32I-NEXT: beqz zero, .LBB7_2
426-
; RV32I-NEXT: # %bb.1:
427-
; RV32I-NEXT: snez a0, zero
428-
; RV32I-NEXT: ret
429-
; RV32I-NEXT: .LBB7_2:
430415
; RV32I-NEXT: srli a2, a0, 1
431416
; RV32I-NEXT: lui a3, 349525
432417
; RV32I-NEXT: lui a4, 209715
@@ -470,11 +455,6 @@ define i1 @ctpop_i64_ugt_one(i64 %a) nounwind {
470455
;
471456
; RV32ZBB-LABEL: ctpop_i64_ugt_one:
472457
; RV32ZBB: # %bb.0:
473-
; RV32ZBB-NEXT: beqz zero, .LBB7_2
474-
; RV32ZBB-NEXT: # %bb.1:
475-
; RV32ZBB-NEXT: snez a0, zero
476-
; RV32ZBB-NEXT: ret
477-
; RV32ZBB-NEXT: .LBB7_2:
478458
; RV32ZBB-NEXT: cpop a0, a0
479459
; RV32ZBB-NEXT: cpop a1, a1
480460
; RV32ZBB-NEXT: add a0, a1, a0

llvm/test/CodeGen/RISCV/branch_zero.ll

Lines changed: 6 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,11 @@
55
define void @foo(i16 %finder_idx) {
66
; CHECK-LABEL: foo:
77
; CHECK: # %bb.0: # %entry
8-
; CHECK-NEXT: .LBB0_1: # %for.body
9-
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
8+
; CHECK-NEXT: # %bb.1: # %for.body
109
; CHECK-NEXT: slli a0, a0, 48
11-
; CHECK-NEXT: bltz a0, .LBB0_4
10+
; CHECK-NEXT: bltz a0, .LBB0_3
1211
; CHECK-NEXT: # %bb.2: # %while.cond.preheader.i
13-
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
14-
; CHECK-NEXT: li a0, 0
15-
; CHECK-NEXT: bnez zero, .LBB0_1
16-
; CHECK-NEXT: # %bb.3: # %while.body
17-
; CHECK-NEXT: .LBB0_4: # %while.cond1.preheader.i
12+
; CHECK-NEXT: .LBB0_3: # %while.cond1.preheader.i
1813
entry:
1914
br label %for.body
2015

@@ -46,16 +41,11 @@ if.then:
4641
define void @bar(i16 %finder_idx) {
4742
; CHECK-LABEL: bar:
4843
; CHECK: # %bb.0: # %entry
49-
; CHECK-NEXT: .LBB1_1: # %for.body
50-
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
44+
; CHECK-NEXT: # %bb.1: # %for.body
5145
; CHECK-NEXT: slli a0, a0, 48
52-
; CHECK-NEXT: bgez a0, .LBB1_4
46+
; CHECK-NEXT: bgez a0, .LBB1_3
5347
; CHECK-NEXT: # %bb.2: # %while.cond.preheader.i
54-
; CHECK-NEXT: # in Loop: Header=BB1_1 Depth=1
55-
; CHECK-NEXT: li a0, 0
56-
; CHECK-NEXT: bnez zero, .LBB1_1
57-
; CHECK-NEXT: # %bb.3: # %while.body
58-
; CHECK-NEXT: .LBB1_4: # %while.cond1.preheader.i
48+
; CHECK-NEXT: .LBB1_3: # %while.cond1.preheader.i
5949
entry:
6050
br label %for.body
6151

@@ -83,4 +73,3 @@ exit1:
8373
if.then:
8474
br label %for.body
8575
}
86-

llvm/test/CodeGen/RISCV/push-pop-opt-crash.ll

Lines changed: 21 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
12
; RUN: llc -mattr=+zcmp -verify-machineinstrs \
23
; RUN: -mtriple=riscv32 -target-abi=ilp32 < %s \
34
; RUN: | FileCheck %s -check-prefixes=RV32IZCMP
@@ -13,36 +14,27 @@ declare dso_local void @f1() local_unnamed_addr
1314
declare dso_local void @f2() local_unnamed_addr
1415
define dso_local void @f0() local_unnamed_addr {
1516
; RV32IZCMP-LABEL: f0:
16-
; RV32IZCMP: .cfi_startproc
17-
; RV32IZCMP-NEXT: # %bb.0: # %entry
18-
; RV32IZCMP-NEXT: bnez zero, .LBB0_2
19-
; RV32IZCMP-NEXT: # %bb.1: # %if.T
20-
; RV32IZCMP-NEXT: cm.push {ra}, -16
21-
; RV32IZCMP-NEXT: .cfi_def_cfa_offset 16
22-
; RV32IZCMP-NEXT: .cfi_offset ra, -4
23-
; RV32IZCMP-NEXT: call f1
24-
; RV32IZCMP-NEXT: cm.pop {ra}, 16
25-
; RV32IZCMP-NEXT: .cfi_restore ra
26-
; RV32IZCMP-NEXT: .cfi_def_cfa_offset 0
27-
; RV32IZCMP-NEXT: .LBB0_2: # %if.F
28-
; RV32IZCMP-NEXT: tail f2
29-
; RV32IZCMP-NEXT: .Lfunc_end0:
30-
17+
; RV32IZCMP: # %bb.0: # %entry
18+
; RV32IZCMP-NEXT: cm.push {ra}, -16
19+
; RV32IZCMP-NEXT: .cfi_def_cfa_offset 16
20+
; RV32IZCMP-NEXT: .cfi_offset ra, -4
21+
; RV32IZCMP-NEXT: call f1
22+
; RV32IZCMP-NEXT: cm.pop {ra}, 16
23+
; RV32IZCMP-NEXT: .cfi_restore ra
24+
; RV32IZCMP-NEXT: .cfi_def_cfa_offset 0
25+
; RV32IZCMP-NEXT: tail f2
26+
;
3127
; RV64IZCMP-LABEL: f0:
32-
; RV64IZCMP: .cfi_startproc
33-
; RV64IZCMP-NEXT: # %bb.0: # %entry
34-
; RV64IZCMP-NEXT: bnez zero, .LBB0_2
35-
; RV64IZCMP-NEXT: # %bb.1: # %if.T
36-
; RV64IZCMP-NEXT: cm.push {ra}, -16
37-
; RV64IZCMP-NEXT: .cfi_def_cfa_offset 16
38-
; RV64IZCMP-NEXT: .cfi_offset ra, -8
39-
; RV64IZCMP-NEXT: call f1
40-
; RV64IZCMP-NEXT: cm.pop {ra}, 16
41-
; RV64IZCMP-NEXT: .cfi_restore ra
42-
; RV64IZCMP-NEXT: .cfi_def_cfa_offset 0
43-
; RV64IZCMP-NEXT: .LBB0_2: # %if.F
44-
; RV64IZCMP-NEXT: tail f2
45-
; RV64IZCMP-NEXT: .Lfunc_end0:
28+
; RV64IZCMP: # %bb.0: # %entry
29+
; RV64IZCMP-NEXT: cm.push {ra}, -16
30+
; RV64IZCMP-NEXT: .cfi_def_cfa_offset 16
31+
; RV64IZCMP-NEXT: .cfi_offset ra, -8
32+
; RV64IZCMP-NEXT: call f1
33+
; RV64IZCMP-NEXT: cm.pop {ra}, 16
34+
; RV64IZCMP-NEXT: .cfi_restore ra
35+
; RV64IZCMP-NEXT: .cfi_def_cfa_offset 0
36+
; RV64IZCMP-NEXT: tail f2
37+
4638
entry:
4739
br i1 poison, label %if.T, label %if.F
4840

0 commit comments

Comments
 (0)