Skip to content

Commit 41a010a

Browse files
Yi-Hong LyuYi-Hong Lyu
authored andcommitted
[PowerPC] Remove redundant load immediate instructions
Currently PowerPC backend emits code like this: r3 = li 0 std r3, 264(r1) r3 = li 0 std r3, 272(r1) This patch fixes that and other cases where a register already contains a value that is loaded so we will get: r3 = li 0 std r3, 264(r1) std r3, 272(r1) Differential Revision: https://reviews.llvm.org/D64220 llvm-svn: 366840
1 parent 62ac91f commit 41a010a

File tree

3 files changed

+506
-0
lines changed

3 files changed

+506
-0
lines changed

llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,108 @@ namespace {
5757
MachineFunctionProperties::Property::NoVRegs);
5858
}
5959

60+
// This function removes any redundant load immediates. It has two level
61+
// loops - The outer loop finds the load immediates BBI that could be used
62+
// to replace following redundancy. The inner loop scans instructions that
63+
// after BBI to find redundancy and update kill/dead flags accordingly. If
64+
// AfterBBI is the same as BBI, it is redundant, otherwise any instructions
65+
// that modify the def register of BBI would break the scanning.
66+
// DeadOrKillToUnset is a pointer to the previous operand that had the
67+
// kill/dead flag set. It keeps track of the def register of BBI, the use
68+
// registers of AfterBBIs and the def registers of AfterBBIs.
69+
bool removeRedundantLIs(MachineBasicBlock &MBB,
70+
const TargetRegisterInfo *TRI) {
71+
LLVM_DEBUG(dbgs() << "Remove redundant load immediates from MBB:\n";
72+
MBB.dump(); dbgs() << "\n");
73+
74+
DenseSet<MachineInstr *> InstrsToErase;
75+
for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) {
76+
// Skip load immediate that is marked to be erased later because it
77+
// cannot be used to replace any other instructions.
78+
if (InstrsToErase.find(&*BBI) != InstrsToErase.end())
79+
continue;
80+
// Skip non-load immediate.
81+
unsigned Opc = BBI->getOpcode();
82+
if (Opc != PPC::LI && Opc != PPC::LI8 && Opc != PPC::LIS &&
83+
Opc != PPC::LIS8)
84+
continue;
85+
// Skip load immediate, where the operand is a relocation (e.g., $r3 =
86+
// LI target-flags(ppc-lo) %const.0).
87+
if (!BBI->getOperand(1).isImm())
88+
continue;
89+
assert(BBI->getOperand(0).isReg() &&
90+
"Expected a register for the first operand");
91+
92+
LLVM_DEBUG(dbgs() << "Scanning after load immediate: "; BBI->dump(););
93+
94+
unsigned Reg = BBI->getOperand(0).getReg();
95+
int64_t Imm = BBI->getOperand(1).getImm();
96+
MachineOperand *DeadOrKillToUnset = nullptr;
97+
if (BBI->getOperand(0).isDead()) {
98+
DeadOrKillToUnset = &BBI->getOperand(0);
99+
LLVM_DEBUG(dbgs() << " Kill flag of " << *DeadOrKillToUnset
100+
<< " from load immediate " << *BBI
101+
<< " is a unsetting candidate\n");
102+
}
103+
// This loop scans instructions after BBI to see if there is any
104+
// redundant load immediate.
105+
for (auto AfterBBI = std::next(BBI); AfterBBI != MBB.instr_end();
106+
++AfterBBI) {
107+
// Track the operand that kill Reg. We would unset the kill flag of
108+
// the operand if there is a following redundant load immediate.
109+
int KillIdx = AfterBBI->findRegisterUseOperandIdx(Reg, true, TRI);
110+
if (KillIdx != -1) {
111+
assert(!DeadOrKillToUnset && "Shouldn't kill same register twice");
112+
DeadOrKillToUnset = &AfterBBI->getOperand(KillIdx);
113+
LLVM_DEBUG(dbgs()
114+
<< " Kill flag of " << *DeadOrKillToUnset << " from "
115+
<< *AfterBBI << " is a unsetting candidate\n");
116+
}
117+
118+
if (!AfterBBI->modifiesRegister(Reg, TRI))
119+
continue;
120+
assert(DeadOrKillToUnset &&
121+
"Shouldn't overwrite a register before it is killed");
122+
// Finish scanning because Reg is overwritten by a non-load
123+
// instruction.
124+
if (AfterBBI->getOpcode() != Opc)
125+
break;
126+
assert(AfterBBI->getOperand(0).isReg() &&
127+
"Expected a register for the first operand");
128+
// Finish scanning because Reg is overwritten by a relocation or a
129+
// different value.
130+
if (!AfterBBI->getOperand(1).isImm() ||
131+
AfterBBI->getOperand(1).getImm() != Imm)
132+
break;
133+
134+
// It loads same immediate value to the same Reg, which is redundant.
135+
// We would unset kill flag in previous Reg usage to extend live range
136+
// of Reg first, then remove the redundancy.
137+
LLVM_DEBUG(dbgs() << " Unset dead/kill flag of " << *DeadOrKillToUnset
138+
<< " from " << *DeadOrKillToUnset->getParent());
139+
if (DeadOrKillToUnset->isDef())
140+
DeadOrKillToUnset->setIsDead(false);
141+
else
142+
DeadOrKillToUnset->setIsKill(false);
143+
DeadOrKillToUnset =
144+
AfterBBI->findRegisterDefOperand(Reg, true, true, TRI);
145+
if (DeadOrKillToUnset)
146+
LLVM_DEBUG(dbgs()
147+
<< " Dead flag of " << *DeadOrKillToUnset << " from "
148+
<< *AfterBBI << " is a unsetting candidate\n");
149+
InstrsToErase.insert(&*AfterBBI);
150+
LLVM_DEBUG(dbgs() << " Remove redundant load immediate: ";
151+
AfterBBI->dump());
152+
}
153+
}
154+
155+
for (MachineInstr *MI : InstrsToErase) {
156+
MI->eraseFromParent();
157+
}
158+
NumRemovedInPreEmit += InstrsToErase.size();
159+
return !InstrsToErase.empty();
160+
}
161+
60162
bool runOnMachineFunction(MachineFunction &MF) override {
61163
if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole)
62164
return false;
@@ -65,6 +167,7 @@ namespace {
65167
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
66168
SmallVector<MachineInstr *, 4> InstrsToErase;
67169
for (MachineBasicBlock &MBB : MF) {
170+
Changed |= removeRedundantLIs(MBB, TRI);
68171
for (MachineInstr &MI : MBB) {
69172
unsigned Opc = MI.getOpcode();
70173
// Detect self copies - these can result from running AADB.
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mcpu=pwr9 -O3 < %s | FileCheck %s -check-prefix=PPC64LE
3+
4+
target datalayout = "e-m:e-i64:64-n32:64"
5+
target triple = "powerpc64le-unknown-linux-gnu"
6+
7+
@global.6 = external global i32*
8+
9+
declare void @barney.88(i1, i32*)
10+
declare void @barney.94(i8*, i32)
11+
12+
define void @redundancy_on_ppc_only(i1 %arg7) {
13+
; PPC64LE-LABEL: redundancy_on_ppc_only:
14+
; PPC64LE: # %bb.0: # %bb
15+
; PPC64LE-NEXT: mflr 0
16+
; PPC64LE-NEXT: andi. 3, 3, 1
17+
; PPC64LE-NEXT: std 0, 16(1)
18+
; PPC64LE-NEXT: stdu 1, -32(1)
19+
; PPC64LE-NEXT: .cfi_def_cfa_offset 32
20+
; PPC64LE-NEXT: .cfi_offset lr, 16
21+
; PPC64LE-NEXT: li 3, 1
22+
; PPC64LE-NEXT: li 4, 0
23+
; PPC64LE-NEXT: isel 3, 3, 4, 1
24+
; PPC64LE-NEXT: bl barney.88
25+
; PPC64LE-NEXT: nop
26+
; PPC64LE-NEXT: addi 1, 1, 32
27+
; PPC64LE-NEXT: ld 0, 16(1)
28+
; PPC64LE-NEXT: mtlr 0
29+
; PPC64LE-NEXT: blr
30+
bb:
31+
br label %bb10
32+
33+
bb10: ; preds = %bb
34+
call void @barney.88(i1 %arg7, i32* null)
35+
ret void
36+
}
37+
38+
define void @redundancy_on_ppc_and_other_targets() {
39+
; PPC64LE-LABEL: redundancy_on_ppc_and_other_targets:
40+
; PPC64LE: # %bb.0:
41+
; PPC64LE-NEXT: mflr 0
42+
; PPC64LE-NEXT: std 0, 16(1)
43+
; PPC64LE-NEXT: stdu 1, -32(1)
44+
; PPC64LE-NEXT: .cfi_def_cfa_offset 32
45+
; PPC64LE-NEXT: .cfi_offset lr, 16
46+
; PPC64LE-NEXT: addis 3, 2, .LC0@toc@ha
47+
; PPC64LE-NEXT: ld 3, .LC0@toc@l(3)
48+
; PPC64LE-NEXT: li 4, 0
49+
; PPC64LE-NEXT: std 4, 0(3)
50+
; PPC64LE-NEXT: bl barney.94
51+
; PPC64LE-NEXT: nop
52+
store i32* null, i32** @global.6
53+
call void @barney.94(i8* undef, i32 0)
54+
unreachable
55+
}

0 commit comments

Comments
 (0)