Skip to content

Commit 95b28a4

Browse files
committed
[ARM] LE support in ConstantIslands
The low-overhead branch extension provides a loop-end 'LE' instruction that performs no decrement nor compare, it just jumps backwards. This patch modifies the constant islands pass to try to insert LE instructions in place of a Thumb2 conditional branch, instead of shrinking it. This only happens if a cmp can be converted to a cbn/z and used to exit the loop. Differential Revision: https://reviews.llvm.org/D67404 llvm-svn: 372085
1 parent 1bd5887 commit 95b28a4

File tree

6 files changed

+856
-27
lines changed

6 files changed

+856
-27
lines changed

llvm/lib/Target/ARM/ARMConstantIslandPass.cpp

Lines changed: 111 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "llvm/CodeGen/LivePhysRegs.h"
3030
#include "llvm/CodeGen/MachineBasicBlock.h"
3131
#include "llvm/CodeGen/MachineConstantPool.h"
32+
#include "llvm/CodeGen/MachineDominators.h"
3233
#include "llvm/CodeGen/MachineFunction.h"
3334
#include "llvm/CodeGen/MachineFunctionPass.h"
3435
#include "llvm/CodeGen/MachineInstr.h"
@@ -70,6 +71,7 @@ STATISTIC(NumT2BrShrunk, "Number of Thumb2 immediate branches shrunk");
7071
STATISTIC(NumCBZ, "Number of CBZ / CBNZ formed");
7172
STATISTIC(NumJTMoved, "Number of jump table destination blocks moved");
7273
STATISTIC(NumJTInserted, "Number of jump table intermediate blocks inserted");
74+
STATISTIC(NumLEInserted, "Number of LE backwards branches inserted");
7375

7476
static cl::opt<bool>
7577
AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true),
@@ -213,6 +215,7 @@ namespace {
213215
const ARMBaseInstrInfo *TII;
214216
const ARMSubtarget *STI;
215217
ARMFunctionInfo *AFI;
218+
MachineDominatorTree *DT = nullptr;
216219
bool isThumb;
217220
bool isThumb1;
218221
bool isThumb2;
@@ -225,6 +228,12 @@ namespace {
225228

226229
bool runOnMachineFunction(MachineFunction &MF) override;
227230

231+
void getAnalysisUsage(AnalysisUsage &AU) const override {
232+
AU.setPreservesCFG();
233+
AU.addRequired<MachineDominatorTree>();
234+
MachineFunctionPass::getAnalysisUsage(AU);
235+
}
236+
228237
MachineFunctionProperties getRequiredProperties() const override {
229238
return MachineFunctionProperties().set(
230239
MachineFunctionProperties::Property::NoVRegs);
@@ -350,6 +359,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
350359
isPositionIndependentOrROPI =
351360
STI->getTargetLowering()->isPositionIndependent() || STI->isROPI();
352361
AFI = MF->getInfo<ARMFunctionInfo>();
362+
DT = &getAnalysis<MachineDominatorTree>();
353363

354364
isThumb = AFI->isThumbFunction();
355365
isThumb1 = AFI->isThumb1OnlyFunction();
@@ -1809,16 +1819,10 @@ bool ARMConstantIslands::optimizeThumb2Instructions() {
18091819
return MadeChange;
18101820
}
18111821

1822+
18121823
bool ARMConstantIslands::optimizeThumb2Branches() {
1813-
bool MadeChange = false;
18141824

1815-
// The order in which branches appear in ImmBranches is approximately their
1816-
// order within the function body. By visiting later branches first, we reduce
1817-
// the distance between earlier forward branches and their targets, making it
1818-
// more likely that the cbn?z optimization, which can only apply to forward
1819-
// branches, will succeed.
1820-
for (unsigned i = ImmBranches.size(); i != 0; --i) {
1821-
ImmBranch &Br = ImmBranches[i-1];
1825+
auto TryShrinkBranch = [this](ImmBranch &Br) {
18221826
unsigned Opcode = Br.MI->getOpcode();
18231827
unsigned NewOpc = 0;
18241828
unsigned Scale = 1;
@@ -1846,47 +1850,115 @@ bool ARMConstantIslands::optimizeThumb2Branches() {
18461850
BBUtils->adjustBBSize(MBB, -2);
18471851
BBUtils->adjustBBOffsetsAfter(MBB);
18481852
++NumT2BrShrunk;
1849-
MadeChange = true;
1853+
return true;
18501854
}
18511855
}
1856+
return false;
1857+
};
18521858

1853-
Opcode = Br.MI->getOpcode();
1854-
if (Opcode != ARM::tBcc)
1855-
continue;
1859+
struct ImmCompare {
1860+
MachineInstr* MI = nullptr;
1861+
unsigned NewOpc = 0;
1862+
};
1863+
1864+
auto FindCmpForCBZ = [this](ImmBranch &Br, ImmCompare &ImmCmp,
1865+
MachineBasicBlock *DestBB) {
1866+
ImmCmp.MI = nullptr;
1867+
ImmCmp.NewOpc = 0;
18561868

18571869
// If the conditional branch doesn't kill CPSR, then CPSR can be liveout
18581870
// so this transformation is not safe.
18591871
if (!Br.MI->killsRegister(ARM::CPSR))
1860-
continue;
1872+
return false;
18611873

1862-
NewOpc = 0;
18631874
unsigned PredReg = 0;
1875+
unsigned NewOpc = 0;
18641876
ARMCC::CondCodes Pred = getInstrPredicate(*Br.MI, PredReg);
18651877
if (Pred == ARMCC::EQ)
18661878
NewOpc = ARM::tCBZ;
18671879
else if (Pred == ARMCC::NE)
18681880
NewOpc = ARM::tCBNZ;
1869-
if (!NewOpc)
1870-
continue;
1871-
MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
1881+
else
1882+
return false;
1883+
18721884
// Check if the distance is within 126. Subtract starting offset by 2
18731885
// because the cmp will be eliminated.
18741886
unsigned BrOffset = BBUtils->getOffsetOf(Br.MI) + 4 - 2;
18751887
BBInfoVector &BBInfo = BBUtils->getBBInfo();
18761888
unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
18771889
if (BrOffset >= DestOffset || (DestOffset - BrOffset) > 126)
1878-
continue;
1890+
return false;
18791891

18801892
// Search backwards to find a tCMPi8
18811893
auto *TRI = STI->getRegisterInfo();
18821894
MachineInstr *CmpMI = findCMPToFoldIntoCBZ(Br.MI, TRI);
18831895
if (!CmpMI || CmpMI->getOpcode() != ARM::tCMPi8)
1896+
return false;
1897+
1898+
ImmCmp.MI = CmpMI;
1899+
ImmCmp.NewOpc = NewOpc;
1900+
return true;
1901+
};
1902+
1903+
auto TryConvertToLE = [this](ImmBranch &Br, ImmCompare &Cmp) {
1904+
if (Br.MI->getOpcode() != ARM::t2Bcc || !STI->hasLOB() ||
1905+
STI->hasMinSize())
1906+
return false;
1907+
1908+
MachineBasicBlock *MBB = Br.MI->getParent();
1909+
MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
1910+
if (BBUtils->getOffsetOf(MBB) < BBUtils->getOffsetOf(DestBB) ||
1911+
!BBUtils->isBBInRange(Br.MI, DestBB, 4094))
1912+
return false;
1913+
1914+
if (!DT->dominates(DestBB, MBB))
1915+
return false;
1916+
1917+
// We queried for the CBN?Z opcode based upon the 'ExitBB', the opposite
1918+
// target of Br. So now we need to reverse the condition.
1919+
Cmp.NewOpc = Cmp.NewOpc == ARM::tCBZ ? ARM::tCBNZ : ARM::tCBZ;
1920+
1921+
MachineInstrBuilder MIB = BuildMI(*MBB, Br.MI, Br.MI->getDebugLoc(),
1922+
TII->get(ARM::t2LE));
1923+
MIB.add(Br.MI->getOperand(0));
1924+
Br.MI->eraseFromParent();
1925+
Br.MI = MIB;
1926+
++NumLEInserted;
1927+
return true;
1928+
};
1929+
1930+
bool MadeChange = false;
1931+
1932+
// The order in which branches appear in ImmBranches is approximately their
1933+
// order within the function body. By visiting later branches first, we reduce
1934+
// the distance between earlier forward branches and their targets, making it
1935+
// more likely that the cbn?z optimization, which can only apply to forward
1936+
// branches, will succeed.
1937+
for (ImmBranch &Br : reverse(ImmBranches)) {
1938+
MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
1939+
MachineBasicBlock *MBB = Br.MI->getParent();
1940+
MachineBasicBlock *ExitBB = &MBB->back() == Br.MI ?
1941+
MBB->getFallThrough() :
1942+
MBB->back().getOperand(0).getMBB();
1943+
1944+
ImmCompare Cmp;
1945+
if (FindCmpForCBZ(Br, Cmp, ExitBB) && TryConvertToLE(Br, Cmp)) {
1946+
DestBB = ExitBB;
1947+
MadeChange = true;
1948+
} else {
1949+
FindCmpForCBZ(Br, Cmp, DestBB);
1950+
MadeChange |= TryShrinkBranch(Br);
1951+
}
1952+
1953+
unsigned Opcode = Br.MI->getOpcode();
1954+
if ((Opcode != ARM::tBcc && Opcode != ARM::t2LE) || !Cmp.NewOpc)
18841955
continue;
18851956

1886-
Register Reg = CmpMI->getOperand(0).getReg();
1957+
Register Reg = Cmp.MI->getOperand(0).getReg();
18871958

18881959
// Check for Kill flags on Reg. If they are present remove them and set kill
18891960
// on the new CBZ.
1961+
auto *TRI = STI->getRegisterInfo();
18901962
MachineBasicBlock::iterator KillMI = Br.MI;
18911963
bool RegKilled = false;
18921964
do {
@@ -1896,19 +1968,32 @@ bool ARMConstantIslands::optimizeThumb2Branches() {
18961968
RegKilled = true;
18971969
break;
18981970
}
1899-
} while (KillMI != CmpMI);
1971+
} while (KillMI != Cmp.MI);
19001972

19011973
// Create the new CBZ/CBNZ
1902-
MachineBasicBlock *MBB = Br.MI->getParent();
1903-
LLVM_DEBUG(dbgs() << "Fold: " << *CmpMI << " and: " << *Br.MI);
1974+
LLVM_DEBUG(dbgs() << "Fold: " << *Cmp.MI << " and: " << *Br.MI);
19041975
MachineInstr *NewBR =
1905-
BuildMI(*MBB, Br.MI, Br.MI->getDebugLoc(), TII->get(NewOpc))
1976+
BuildMI(*MBB, Br.MI, Br.MI->getDebugLoc(), TII->get(Cmp.NewOpc))
19061977
.addReg(Reg, getKillRegState(RegKilled))
19071978
.addMBB(DestBB, Br.MI->getOperand(0).getTargetFlags());
1908-
CmpMI->eraseFromParent();
1909-
Br.MI->eraseFromParent();
1910-
Br.MI = NewBR;
1979+
1980+
Cmp.MI->eraseFromParent();
1981+
BBInfoVector &BBInfo = BBUtils->getBBInfo();
19111982
BBInfo[MBB->getNumber()].Size -= 2;
1983+
1984+
if (Br.MI->getOpcode() == ARM::tBcc) {
1985+
Br.MI->eraseFromParent();
1986+
Br.MI = NewBR;
1987+
} else if (&MBB->back() != Br.MI) {
1988+
// We've generated an LE and already erased the original conditional
1989+
// branch. The CBN?Z is now used to branch to the other successor, so an
1990+
// unconditional branch terminator is now redundant.
1991+
MachineInstr *LastMI = &MBB->back();
1992+
if (LastMI != Br.MI) {
1993+
BBInfo[MBB->getNumber()].Size -= LastMI->getDesc().getSize();
1994+
LastMI->eraseFromParent();
1995+
}
1996+
}
19121997
BBUtils->adjustBBOffsetsAfter(MBB);
19131998
++NumCBZ;
19141999
MadeChange = true;

llvm/test/CodeGen/ARM/O3-pipeline.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,8 +143,8 @@
143143
; CHECK-NEXT: Thumb2 instruction size reduce pass
144144
; CHECK-NEXT: Unpack machine instruction bundles
145145
; CHECK-NEXT: optimise barriers pass
146-
; CHECK-NEXT: ARM constant island placement and branch shortening pass
147146
; CHECK-NEXT: MachineDominator Tree Construction
147+
; CHECK-NEXT: ARM constant island placement and branch shortening pass
148148
; CHECK-NEXT: Machine Natural Loop Construction
149149
; CHECK-NEXT: ARM Low Overhead Loops pass
150150
; CHECK-NEXT: Contiguously Lay Out Funclets

0 commit comments

Comments
 (0)