Skip to content

Commit 1e39575

Browse files
authored
[RISCV] CSE by swapping conditional branches (llvm#71111)
DAGCombiner, as well as InstCombine, tend to canonicalize GE/LE into GT/LT, namely: ``` X >= C --> X > (C - 1) ``` Which sometime generates off-by-one constants that could have been CSE'd with surrounding constants. Instead of changing such canonicalization, this patch tries to swap those branch conditions post-isel, in the hope of resurfacing more constant CSE opportunities. More specifically, it performs the following optimization: For two constants C0 and C1 from ``` li Y, C0 li Z, C1 ``` To remove redundnat `li Y, C0`, 1. if C1 = C0 + 1 we can turn: (a) blt Y, X -> bge X, Z (b) bge Y, X -> blt X, Z 2. if C1 = C0 - 1 we can turn: (a) blt X, Y -> bge Z, X (b) bge X, Y -> blt Z, X This optimization will be done by PeepholeOptimizer through RISCVInstrInfo::optimizeCondBranch.
1 parent 015c06a commit 1e39575

File tree

3 files changed

+240
-0
lines changed

3 files changed

+240
-0
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1159,6 +1159,125 @@ bool RISCVInstrInfo::reverseBranchCondition(
11591159
return false;
11601160
}
11611161

1162+
bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const {
1163+
MachineBasicBlock *MBB = MI.getParent();
1164+
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1165+
1166+
MachineBasicBlock *TBB, *FBB;
1167+
SmallVector<MachineOperand, 3> Cond;
1168+
if (analyzeBranch(*MBB, TBB, FBB, Cond, /*AllowModify=*/false))
1169+
return false;
1170+
(void)FBB;
1171+
1172+
RISCVCC::CondCode CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());
1173+
assert(CC != RISCVCC::COND_INVALID);
1174+
1175+
if (CC == RISCVCC::COND_EQ || CC == RISCVCC::COND_NE)
1176+
return false;
1177+
1178+
// For two constants C0 and C1 from
1179+
// ```
1180+
// li Y, C0
1181+
// li Z, C1
1182+
// ```
1183+
// 1. if C1 = C0 + 1
1184+
// we can turn:
1185+
// (a) blt Y, X -> bge X, Z
1186+
// (b) bge Y, X -> blt X, Z
1187+
//
1188+
// 2. if C1 = C0 - 1
1189+
// we can turn:
1190+
// (a) blt X, Y -> bge Z, X
1191+
// (b) bge X, Y -> blt Z, X
1192+
//
1193+
// To make sure this optimization is really beneficial, we only
1194+
// optimize for cases where Y had only one use (i.e. only used by the branch).
1195+
1196+
// Right now we only care about LI (i.e. ADDI x0, imm)
1197+
auto isLoadImm = [](const MachineInstr *MI, int64_t &Imm) -> bool {
1198+
if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(1).isReg() &&
1199+
MI->getOperand(1).getReg() == RISCV::X0) {
1200+
Imm = MI->getOperand(2).getImm();
1201+
return true;
1202+
}
1203+
return false;
1204+
};
1205+
// Either a load from immediate instruction or X0.
1206+
auto isFromLoadImm = [&](const MachineOperand &Op, int64_t &Imm) -> bool {
1207+
if (!Op.isReg())
1208+
return false;
1209+
Register Reg = Op.getReg();
1210+
if (Reg == RISCV::X0) {
1211+
Imm = 0;
1212+
return true;
1213+
}
1214+
if (!Reg.isVirtual())
1215+
return false;
1216+
return isLoadImm(MRI.getVRegDef(Op.getReg()), Imm);
1217+
};
1218+
1219+
MachineOperand &LHS = MI.getOperand(0);
1220+
MachineOperand &RHS = MI.getOperand(1);
1221+
// Try to find the register for constant Z; return
1222+
// invalid register otherwise.
1223+
auto searchConst = [&](int64_t C1) -> Register {
1224+
MachineBasicBlock::reverse_iterator II(&MI), E = MBB->rend();
1225+
auto DefC1 = std::find_if(++II, E, [&](const MachineInstr &I) -> bool {
1226+
int64_t Imm;
1227+
return isLoadImm(&I, Imm) && Imm == C1;
1228+
});
1229+
if (DefC1 != E)
1230+
return DefC1->getOperand(0).getReg();
1231+
1232+
return Register();
1233+
};
1234+
1235+
bool Modify = false;
1236+
int64_t C0;
1237+
if (isFromLoadImm(LHS, C0) && MRI.hasOneUse(LHS.getReg())) {
1238+
// Might be case 1.
1239+
// Signed integer overflow is UB. (UINT64_MAX is bigger so we don't need
1240+
// to worry about unsigned overflow here)
1241+
if (C0 < INT64_MAX)
1242+
if (Register RegZ = searchConst(C0 + 1)) {
1243+
reverseBranchCondition(Cond);
1244+
Cond[1] = MachineOperand::CreateReg(RHS.getReg(), /*isDef=*/false);
1245+
Cond[2] = MachineOperand::CreateReg(RegZ, /*isDef=*/false);
1246+
// We might extend the live range of Z, clear its kill flag to
1247+
// account for this.
1248+
MRI.clearKillFlags(RegZ);
1249+
Modify = true;
1250+
}
1251+
} else if (isFromLoadImm(RHS, C0) && MRI.hasOneUse(RHS.getReg())) {
1252+
// Might be case 2.
1253+
// For unsigned cases, we don't want C1 to wrap back to UINT64_MAX
1254+
// when C0 is zero.
1255+
if ((CC == RISCVCC::COND_GE || CC == RISCVCC::COND_LT) || C0)
1256+
if (Register RegZ = searchConst(C0 - 1)) {
1257+
reverseBranchCondition(Cond);
1258+
Cond[1] = MachineOperand::CreateReg(RegZ, /*isDef=*/false);
1259+
Cond[2] = MachineOperand::CreateReg(LHS.getReg(), /*isDef=*/false);
1260+
// We might extend the live range of Z, clear its kill flag to
1261+
// account for this.
1262+
MRI.clearKillFlags(RegZ);
1263+
Modify = true;
1264+
}
1265+
}
1266+
1267+
if (!Modify)
1268+
return false;
1269+
1270+
// Build the new branch and remove the old one.
1271+
BuildMI(*MBB, MI, MI.getDebugLoc(),
1272+
getBrCond(static_cast<RISCVCC::CondCode>(Cond[0].getImm())))
1273+
.add(Cond[1])
1274+
.add(Cond[2])
1275+
.addMBB(TBB);
1276+
MI.eraseFromParent();
1277+
1278+
return true;
1279+
}
1280+
11621281
MachineBasicBlock *
11631282
RISCVInstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
11641283
assert(MI.getDesc().isBranch() && "Unexpected opcode!");

llvm/lib/Target/RISCV/RISCVInstrInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,8 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
121121
bool
122122
reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
123123

124+
bool optimizeCondBranch(MachineInstr &MI) const override;
125+
124126
MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;
125127

126128
bool isBranchOffsetInRange(unsigned BranchOpc,

llvm/test/CodeGen/RISCV/branch-opt.ll

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2+
; RUN: llc -mtriple=riscv32 -O2 -verify-machineinstrs < %s | FileCheck %s
3+
; RUN: llc -mtriple=riscv64 -O2 -verify-machineinstrs < %s | FileCheck %s
4+
5+
define void @u_case1_a(ptr %a, i32 signext %b, ptr %c, ptr %d) {
6+
; CHECK-LABEL: u_case1_a:
7+
; CHECK: # %bb.0:
8+
; CHECK-NEXT: li a4, 32
9+
; CHECK-NEXT: sw a4, 0(a0)
10+
; CHECK-NEXT: bgeu a1, a4, .LBB0_2
11+
; CHECK-NEXT: # %bb.1: # %block1
12+
; CHECK-NEXT: sw a1, 0(a2)
13+
; CHECK-NEXT: ret
14+
; CHECK-NEXT: .LBB0_2: # %block2
15+
; CHECK-NEXT: li a0, 87
16+
; CHECK-NEXT: sw a0, 0(a3)
17+
; CHECK-NEXT: ret
18+
store i32 32, ptr %a
19+
%p = icmp ule i32 %b, 31
20+
br i1 %p, label %block1, label %block2
21+
22+
block1: ; preds = %0
23+
store i32 %b, ptr %c
24+
br label %end_block
25+
26+
block2: ; preds = %0
27+
store i32 87, ptr %d
28+
br label %end_block
29+
30+
end_block: ; preds = %block2, %block1
31+
ret void
32+
}
33+
34+
define void @case1_a(ptr %a, i32 signext %b, ptr %c, ptr %d) {
35+
; CHECK-LABEL: case1_a:
36+
; CHECK: # %bb.0:
37+
; CHECK-NEXT: li a4, -1
38+
; CHECK-NEXT: sw a4, 0(a0)
39+
; CHECK-NEXT: bge a1, a4, .LBB1_2
40+
; CHECK-NEXT: # %bb.1: # %block1
41+
; CHECK-NEXT: sw a1, 0(a2)
42+
; CHECK-NEXT: ret
43+
; CHECK-NEXT: .LBB1_2: # %block2
44+
; CHECK-NEXT: li a0, 87
45+
; CHECK-NEXT: sw a0, 0(a3)
46+
; CHECK-NEXT: ret
47+
store i32 -1, ptr %a
48+
%p = icmp sle i32 %b, -2
49+
br i1 %p, label %block1, label %block2
50+
51+
block1: ; preds = %0
52+
store i32 %b, ptr %c
53+
br label %end_block
54+
55+
block2: ; preds = %0
56+
store i32 87, ptr %d
57+
br label %end_block
58+
59+
end_block: ; preds = %block2, %block1
60+
ret void
61+
}
62+
63+
define void @u_case2_a(ptr %a, i32 signext %b, ptr %c, ptr %d) {
64+
; CHECK-LABEL: u_case2_a:
65+
; CHECK: # %bb.0:
66+
; CHECK-NEXT: li a4, 32
67+
; CHECK-NEXT: sw a4, 0(a0)
68+
; CHECK-NEXT: bgeu a4, a1, .LBB2_2
69+
; CHECK-NEXT: # %bb.1: # %block1
70+
; CHECK-NEXT: sw a1, 0(a2)
71+
; CHECK-NEXT: ret
72+
; CHECK-NEXT: .LBB2_2: # %block2
73+
; CHECK-NEXT: li a0, 87
74+
; CHECK-NEXT: sw a0, 0(a3)
75+
; CHECK-NEXT: ret
76+
store i32 32, ptr %a
77+
%p = icmp uge i32 %b, 33
78+
br i1 %p, label %block1, label %block2
79+
80+
block1: ; preds = %0
81+
store i32 %b, ptr %c
82+
br label %end_block
83+
84+
block2: ; preds = %0
85+
store i32 87, ptr %d
86+
br label %end_block
87+
88+
end_block: ; preds = %block2, %block1
89+
ret void
90+
}
91+
92+
define void @case2_a(ptr %a, i32 signext %b, ptr %c, ptr %d) {
93+
; CHECK-LABEL: case2_a:
94+
; CHECK: # %bb.0:
95+
; CHECK-NEXT: li a4, -4
96+
; CHECK-NEXT: sw a4, 0(a0)
97+
; CHECK-NEXT: bge a4, a1, .LBB3_2
98+
; CHECK-NEXT: # %bb.1: # %block1
99+
; CHECK-NEXT: sw a1, 0(a2)
100+
; CHECK-NEXT: ret
101+
; CHECK-NEXT: .LBB3_2: # %block2
102+
; CHECK-NEXT: li a0, 87
103+
; CHECK-NEXT: sw a0, 0(a3)
104+
; CHECK-NEXT: ret
105+
store i32 -4, ptr %a
106+
%p = icmp sge i32 %b, -3
107+
br i1 %p, label %block1, label %block2
108+
109+
block1: ; preds = %0
110+
store i32 %b, ptr %c
111+
br label %end_block
112+
113+
block2: ; preds = %0
114+
store i32 87, ptr %d
115+
br label %end_block
116+
117+
end_block: ; preds = %block2, %block1
118+
ret void
119+
}

0 commit comments

Comments
 (0)