Skip to content

Commit bd669d3

Browse files
[BranchFolding][RISCV] Optimize branches NE/EQ to zero or one
There are two changes here. The first that we teach analyzeBranch how to evaluate a conditional branch followed by a unconditional branch such that we can evaluate the conditional branch statically. Often, we will see comparison to one or zero since SelectionDAG often uses i1 for the conditional comparison. As a result, we handle this specific case. We handle only EQ and NEQ for now, but this can be expanded in the future. We can also expand on handling arbitrary constants in the future. The second change is that we pass AllowModify=false to analyzeBranch in the tail merging code. The reason we do this is because this code is doing some clever tricks to the branch code that it will restore later. Now that we are actually optimizing branches in analyzeBranch, we have to be careful not to mess up this canonical form that the tail merging code expects.
1 parent aba3aaa commit bd669d3

File tree

7 files changed

+188
-107
lines changed

7 files changed

+188
-107
lines changed

llvm/lib/CodeGen/BranchFolding.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1107,7 +1107,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
11071107

11081108
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
11091109
SmallVector<MachineOperand, 4> Cond;
1110-
if (!TII->analyzeBranch(*PBB, TBB, FBB, Cond, true)) {
1110+
if (!TII->analyzeBranch(*PBB, TBB, FBB, Cond)) {
11111111
// Failing case: IBB is the target of a cbr, and we cannot reverse the
11121112
// branch.
11131113
SmallVector<MachineOperand, 4> NewCond(Cond);

llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

Lines changed: 109 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1005,6 +1005,104 @@ RISCVCC::CondCode RISCVCC::getOppositeBranchCondition(RISCVCC::CondCode CC) {
10051005
}
10061006
}
10071007

1008+
bool RISCVInstrInfo::trySimplifyCondBr(MachineBasicBlock &MBB,
1009+
MachineBasicBlock *TBB,
1010+
MachineBasicBlock *FBB,
1011+
SmallVectorImpl<MachineOperand> &Cond,
1012+
MachineBasicBlock *&Folded) const {
1013+
if (!TBB || Cond.size() != 3)
1014+
return false;
1015+
1016+
// If there is no FBB, get the fall through.
1017+
if (!FBB) {
1018+
MachineFunction::iterator FallThrough = ++MBB.getIterator();
1019+
// There is no fallthrough
1020+
if (FallThrough == MBB.getParent()->end())
1021+
return false;
1022+
FBB = &*FallThrough;
1023+
}
1024+
1025+
RISCVCC::CondCode CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());
1026+
auto LHS = Cond[1];
1027+
auto RHS = Cond[2];
1028+
1029+
// Return true if MO definitely contains the value one.
1030+
auto isOne = [](MachineOperand &MO) -> bool {
1031+
if (MO.isImm() && MO.getImm() == 1)
1032+
return true;
1033+
1034+
if (!MO.isReg() || !MO.getReg().isVirtual())
1035+
return false;
1036+
1037+
MachineRegisterInfo &MRI =
1038+
MO.getParent()->getParent()->getParent()->getRegInfo();
1039+
MachineInstr *DefMI = MRI.getUniqueVRegDef(MO.getReg());
1040+
if (!DefMI)
1041+
return false;
1042+
1043+
// For now, just check the canonical zero value.
1044+
if (DefMI->getOpcode() == RISCV::ADDI &&
1045+
DefMI->getOperand(1).getReg() == RISCV::X0 &&
1046+
DefMI->getOperand(2).getImm() == 1)
1047+
return true;
1048+
1049+
return false;
1050+
};
1051+
1052+
// Return true if MO definitely contains the value zero.
1053+
auto isZero = [](MachineOperand &MO) -> bool {
1054+
if (MO.isImm() && MO.getImm() == 0)
1055+
return true;
1056+
if (MO.isReg() && MO.getReg() == RISCV::X0)
1057+
return true;
1058+
return false;
1059+
};
1060+
1061+
switch (CC) {
1062+
default:
1063+
// TODO: Implement for more CCs
1064+
return false;
1065+
case RISCVCC::COND_EQ: {
1066+
// We can statically evaluate that we take the first branch
1067+
if ((isZero(LHS) && isZero(RHS)) || (isOne(LHS) && isOne(RHS))) {
1068+
Folded = TBB;
1069+
break;
1070+
}
1071+
// We can statically evaluate that we take the second branch
1072+
if ((isZero(LHS) && isOne(RHS)) || (isOne(LHS) && isZero(RHS))) {
1073+
Folded = FBB;
1074+
break;
1075+
}
1076+
return false;
1077+
}
1078+
case RISCVCC::COND_NE: {
1079+
// We can statically evaluate that we take the first branch
1080+
if ((isOne(LHS) && isZero(RHS)) || (isZero(LHS) && isOne(RHS))) {
1081+
Folded = TBB;
1082+
break;
1083+
}
1084+
// We can statically evaluate that we take the second branch
1085+
if ((isZero(LHS) && isZero(RHS)) || (isOne(LHS) && isOne(RHS))) {
1086+
Folded = FBB;
1087+
break;
1088+
}
1089+
return false;
1090+
}
1091+
}
1092+
1093+
// At this point, its legal to optimize.
1094+
DebugLoc DL = MBB.findBranchDebugLoc();
1095+
removeBranch(MBB);
1096+
insertBranch(MBB, Folded, nullptr, {}, DL);
1097+
while (!MBB.succ_empty())
1098+
MBB.removeSuccessor(MBB.succ_end() - 1);
1099+
MBB.addSuccessor(Folded);
1100+
TBB = Folded;
1101+
FBB = nullptr;
1102+
Cond.clear();
1103+
return true;
1104+
}
1105+
10081106
bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
10091107
MachineBasicBlock *&TBB,
10101108
MachineBasicBlock *&FBB,
@@ -1062,14 +1160,24 @@ bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
10621160
// Handle a single conditional branch.
10631161
if (NumTerminators == 1 && I->getDesc().isConditionalBranch()) {
10641162
parseCondBranch(*I, TBB, Cond);
1163+
// Try to fold the branch of the conditional branch into a the fallthru.
1164+
MachineBasicBlock *Folded = nullptr;
1165+
if (AllowModify)
1166+
trySimplifyCondBr(MBB, TBB, FBB, Cond, Folded);
10651167
return false;
10661168
}
10671169

10681170
// Handle a conditional branch followed by an unconditional branch.
10691171
if (NumTerminators == 2 && std::prev(I)->getDesc().isConditionalBranch() &&
10701172
I->getDesc().isUnconditionalBranch()) {
1071-
parseCondBranch(*std::prev(I), TBB, Cond);
1173+
MachineInstr &CondBr = *std::prev(I);
1174+
parseCondBranch(CondBr, TBB, Cond);
10721175
FBB = getBranchDestBlock(*I);
1176+
// Try to fold the branch of the conditional branch into an unconditional
1177+
// branch.
1178+
MachineBasicBlock *Folded = nullptr;
1179+
if (AllowModify)
1180+
trySimplifyCondBr(MBB, TBB, FBB, Cond, Folded);
10731181
return false;
10741182
}
10751183

llvm/lib/Target/RISCV/RISCVInstrInfo.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,28 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
319319
const MachineInstr &MI2) const;
320320
bool hasReassociableVectorSibling(const MachineInstr &Inst,
321321
bool &Commuted) const;
322+
/// Return true if the branch represented by the conditional branch with
323+
/// components TBB, FBB, and CurCond was folded into an unconditional branch
324+
/// that branches to FoldedBB.
325+
///
326+
/// If FBB is nullptr, then the the input represents a conditional branch with
327+
/// a fallthrough.
328+
///
329+
/// For example:
330+
/// BRCOND EQ 0, 0, BB1
331+
/// BR BB2
332+
///
333+
/// can be simplified to BR BB1 since 0 == 0 statically. On the other hand,
334+
///
335+
///
336+
/// BRCOND EQ 0, 1, BB1
337+
/// BR BB2
338+
///
339+
/// can be simplified to BR BB2 because 0 != 1 statically.
340+
bool trySimplifyCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
341+
MachineBasicBlock *FBB,
342+
SmallVectorImpl<MachineOperand> &Cond,
343+
MachineBasicBlock *&FoldedBB) const;
322344
};
323345

324346
namespace RISCV {

llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -357,11 +357,6 @@ define i64 @ctpop_i64(i64 %a) nounwind {
357357
define i1 @ctpop_i64_ugt_two(i64 %a) nounwind {
358358
; RV32I-LABEL: ctpop_i64_ugt_two:
359359
; RV32I: # %bb.0:
360-
; RV32I-NEXT: beqz zero, .LBB6_2
361-
; RV32I-NEXT: # %bb.1:
362-
; RV32I-NEXT: sltiu a0, zero, 0
363-
; RV32I-NEXT: ret
364-
; RV32I-NEXT: .LBB6_2:
365360
; RV32I-NEXT: srli a2, a0, 1
366361
; RV32I-NEXT: lui a3, 349525
367362
; RV32I-NEXT: lui a4, 209715
@@ -404,11 +399,6 @@ define i1 @ctpop_i64_ugt_two(i64 %a) nounwind {
404399
;
405400
; RV32ZBB-LABEL: ctpop_i64_ugt_two:
406401
; RV32ZBB: # %bb.0:
407-
; RV32ZBB-NEXT: beqz zero, .LBB6_2
408-
; RV32ZBB-NEXT: # %bb.1:
409-
; RV32ZBB-NEXT: sltiu a0, zero, 0
410-
; RV32ZBB-NEXT: ret
411-
; RV32ZBB-NEXT: .LBB6_2:
412402
; RV32ZBB-NEXT: cpop a0, a0
413403
; RV32ZBB-NEXT: cpop a1, a1
414404
; RV32ZBB-NEXT: add a0, a1, a0
@@ -422,11 +412,6 @@ define i1 @ctpop_i64_ugt_two(i64 %a) nounwind {
422412
define i1 @ctpop_i64_ugt_one(i64 %a) nounwind {
423413
; RV32I-LABEL: ctpop_i64_ugt_one:
424414
; RV32I: # %bb.0:
425-
; RV32I-NEXT: beqz zero, .LBB7_2
426-
; RV32I-NEXT: # %bb.1:
427-
; RV32I-NEXT: snez a0, zero
428-
; RV32I-NEXT: ret
429-
; RV32I-NEXT: .LBB7_2:
430415
; RV32I-NEXT: srli a2, a0, 1
431416
; RV32I-NEXT: lui a3, 349525
432417
; RV32I-NEXT: lui a4, 209715
@@ -470,11 +455,6 @@ define i1 @ctpop_i64_ugt_one(i64 %a) nounwind {
470455
;
471456
; RV32ZBB-LABEL: ctpop_i64_ugt_one:
472457
; RV32ZBB: # %bb.0:
473-
; RV32ZBB-NEXT: beqz zero, .LBB7_2
474-
; RV32ZBB-NEXT: # %bb.1:
475-
; RV32ZBB-NEXT: snez a0, zero
476-
; RV32ZBB-NEXT: ret
477-
; RV32ZBB-NEXT: .LBB7_2:
478458
; RV32ZBB-NEXT: cpop a0, a0
479459
; RV32ZBB-NEXT: cpop a1, a1
480460
; RV32ZBB-NEXT: add a0, a1, a0

llvm/test/CodeGen/RISCV/branch_zero.ll

Lines changed: 6 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,10 @@
55
define void @foo(i16 %finder_idx) {
66
; CHECK-LABEL: foo:
77
; CHECK: # %bb.0: # %entry
8-
; CHECK-NEXT: .LBB0_1: # %for.body
9-
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
108
; CHECK-NEXT: slli a0, a0, 48
11-
; CHECK-NEXT: bltz a0, .LBB0_4
12-
; CHECK-NEXT: # %bb.2: # %while.cond.preheader.i
13-
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
14-
; CHECK-NEXT: li a0, 0
15-
; CHECK-NEXT: bnez zero, .LBB0_1
16-
; CHECK-NEXT: # %bb.3: # %while.body
17-
; CHECK-NEXT: .LBB0_4: # %while.cond1.preheader.i
9+
; CHECK-NEXT: bltz a0, .LBB0_2
10+
; CHECK-NEXT: # %bb.1: # %while.cond.preheader.i
11+
; CHECK-NEXT: .LBB0_2: # %while.cond1.preheader.i
1812
entry:
1913
br label %for.body
2014

@@ -46,16 +40,10 @@ if.then:
4640
define void @bar(i16 %finder_idx) {
4741
; CHECK-LABEL: bar:
4842
; CHECK: # %bb.0: # %entry
49-
; CHECK-NEXT: .LBB1_1: # %for.body
50-
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
5143
; CHECK-NEXT: slli a0, a0, 48
52-
; CHECK-NEXT: bgez a0, .LBB1_4
53-
; CHECK-NEXT: # %bb.2: # %while.cond.preheader.i
54-
; CHECK-NEXT: # in Loop: Header=BB1_1 Depth=1
55-
; CHECK-NEXT: li a0, 0
56-
; CHECK-NEXT: bnez zero, .LBB1_1
57-
; CHECK-NEXT: # %bb.3: # %while.body
58-
; CHECK-NEXT: .LBB1_4: # %while.cond1.preheader.i
44+
; CHECK-NEXT: bgez a0, .LBB1_2
45+
; CHECK-NEXT: # %bb.1: # %while.cond.preheader.i
46+
; CHECK-NEXT: .LBB1_2: # %while.cond1.preheader.i
5947
entry:
6048
br label %for.body
6149

@@ -87,11 +75,9 @@ if.then:
8775
define ptr @baz() {
8876
; CHECK-LABEL: baz:
8977
; CHECK: # %bb.0: # %entry
90-
; CHECK-NEXT: beqz zero, .LBB2_2
9178
; CHECK-NEXT: # %bb.1: # %if.end12
9279
; CHECK-NEXT: li a0, 0
9380
; CHECK-NEXT: ret
94-
; CHECK-NEXT: .LBB2_2: # %if.then10
9581
entry:
9682
%or.cond = or i1 false, false
9783
br i1 %or.cond, label %if.end12, label %if.then10

llvm/test/CodeGen/RISCV/push-pop-opt-crash.ll

Lines changed: 21 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
12
; RUN: llc -mattr=+zcmp -verify-machineinstrs \
23
; RUN: -mtriple=riscv32 -target-abi=ilp32 < %s \
34
; RUN: | FileCheck %s -check-prefixes=RV32IZCMP
@@ -13,36 +14,27 @@ declare dso_local void @f1() local_unnamed_addr
1314
declare dso_local void @f2() local_unnamed_addr
1415
define dso_local void @f0() local_unnamed_addr {
1516
; RV32IZCMP-LABEL: f0:
16-
; RV32IZCMP: .cfi_startproc
17-
; RV32IZCMP-NEXT: # %bb.0: # %entry
18-
; RV32IZCMP-NEXT: bnez zero, .LBB0_2
19-
; RV32IZCMP-NEXT: # %bb.1: # %if.T
20-
; RV32IZCMP-NEXT: cm.push {ra}, -16
21-
; RV32IZCMP-NEXT: .cfi_def_cfa_offset 16
22-
; RV32IZCMP-NEXT: .cfi_offset ra, -4
23-
; RV32IZCMP-NEXT: call f1
24-
; RV32IZCMP-NEXT: cm.pop {ra}, 16
25-
; RV32IZCMP-NEXT: .cfi_restore ra
26-
; RV32IZCMP-NEXT: .cfi_def_cfa_offset 0
27-
; RV32IZCMP-NEXT: .LBB0_2: # %if.F
28-
; RV32IZCMP-NEXT: tail f2
29-
; RV32IZCMP-NEXT: .Lfunc_end0:
30-
17+
; RV32IZCMP: # %bb.0: # %entry
18+
; RV32IZCMP-NEXT: cm.push {ra}, -16
19+
; RV32IZCMP-NEXT: .cfi_def_cfa_offset 16
20+
; RV32IZCMP-NEXT: .cfi_offset ra, -4
21+
; RV32IZCMP-NEXT: call f1
22+
; RV32IZCMP-NEXT: cm.pop {ra}, 16
23+
; RV32IZCMP-NEXT: .cfi_restore ra
24+
; RV32IZCMP-NEXT: .cfi_def_cfa_offset 0
25+
; RV32IZCMP-NEXT: tail f2
26+
;
3127
; RV64IZCMP-LABEL: f0:
32-
; RV64IZCMP: .cfi_startproc
33-
; RV64IZCMP-NEXT: # %bb.0: # %entry
34-
; RV64IZCMP-NEXT: bnez zero, .LBB0_2
35-
; RV64IZCMP-NEXT: # %bb.1: # %if.T
36-
; RV64IZCMP-NEXT: cm.push {ra}, -16
37-
; RV64IZCMP-NEXT: .cfi_def_cfa_offset 16
38-
; RV64IZCMP-NEXT: .cfi_offset ra, -8
39-
; RV64IZCMP-NEXT: call f1
40-
; RV64IZCMP-NEXT: cm.pop {ra}, 16
41-
; RV64IZCMP-NEXT: .cfi_restore ra
42-
; RV64IZCMP-NEXT: .cfi_def_cfa_offset 0
43-
; RV64IZCMP-NEXT: .LBB0_2: # %if.F
44-
; RV64IZCMP-NEXT: tail f2
45-
; RV64IZCMP-NEXT: .Lfunc_end0:
28+
; RV64IZCMP: # %bb.0: # %entry
29+
; RV64IZCMP-NEXT: cm.push {ra}, -16
30+
; RV64IZCMP-NEXT: .cfi_def_cfa_offset 16
31+
; RV64IZCMP-NEXT: .cfi_offset ra, -8
32+
; RV64IZCMP-NEXT: call f1
33+
; RV64IZCMP-NEXT: cm.pop {ra}, 16
34+
; RV64IZCMP-NEXT: .cfi_restore ra
35+
; RV64IZCMP-NEXT: .cfi_def_cfa_offset 0
36+
; RV64IZCMP-NEXT: tail f2
37+
4638
entry:
4739
br i1 poison, label %if.T, label %if.F
4840

0 commit comments

Comments
 (0)