Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit 65b47b2

Browse files
committed
[IfCvt][ARM] Optimise diamond if-conversion for code size
Currently, the heuristics the if-conversion pass uses for diamond if-conversion are based on execution time, with no consideration for code size. This adds a new set of heuristics to be used when optimising for code size. This is mostly target-independent, because the if-conversion pass can see the code size of the instructions which it is removing. For thumb, there are a few passes (insertion of IT instructions, selection of narrow branches, and selection of CBZ instructions) which are run after if conversion and affect these heuristics, so I've added target hooks to better predict the code-size effect of a proposed if-conversion. Differential revision: https://reviews.llvm.org/D67350 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@374301 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 318bd27 commit 65b47b2

File tree

5 files changed

+724
-17
lines changed

5 files changed

+724
-17
lines changed

include/llvm/CodeGen/TargetInstrInfo.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -778,6 +778,19 @@ class TargetInstrInfo : public MCInstrInfo {
778778
return false;
779779
}
780780

781+
/// Return the increase in code size needed to predicate a contiguous run of
782+
/// NumInsts instructions.
783+
virtual unsigned extraSizeToPredicateInstructions(const MachineFunction &MF,
784+
unsigned NumInsts) const {
785+
return 0;
786+
}
787+
788+
/// Return an estimate for the code size reduction (in bytes) which will be
789+
/// caused by removing the given branch instruction during if-conversion.
790+
virtual unsigned predictBranchSizeForIfCvt(MachineInstr &MI) const {
791+
return getInstSizeInBytes(MI);
792+
}
793+
781794
/// Return true if it's profitable to unpredicate
782795
/// one side of a 'diamond', i.e. two sides of if-else predicated on mutually
783796
/// exclusive predicates.

lib/CodeGen/IfConversion.cpp

Lines changed: 116 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -285,14 +285,113 @@ namespace {
285285
Prediction);
286286
}
287287

288-
bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB,
289-
unsigned TCycle, unsigned TExtra,
290-
MachineBasicBlock &FBB,
291-
unsigned FCycle, unsigned FExtra,
292-
BranchProbability Prediction) const {
293-
return TCycle > 0 && FCycle > 0 &&
294-
TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra,
295-
Prediction);
288+
bool MeetIfcvtSizeLimit(BBInfo &TBBInfo, BBInfo &FBBInfo,
289+
MachineBasicBlock &CommBB, unsigned Dups,
290+
BranchProbability Prediction, bool Forked) const {
291+
const MachineFunction &MF = *TBBInfo.BB->getParent();
292+
if (MF.getFunction().hasMinSize()) {
293+
MachineBasicBlock::iterator TIB = TBBInfo.BB->begin();
294+
MachineBasicBlock::iterator FIB = FBBInfo.BB->begin();
295+
MachineBasicBlock::iterator TIE = TBBInfo.BB->end();
296+
MachineBasicBlock::iterator FIE = FBBInfo.BB->end();
297+
298+
unsigned Dups1, Dups2;
299+
if (!CountDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2,
300+
*TBBInfo.BB, *FBBInfo.BB,
301+
/*SkipUnconditionalBranches*/ true))
302+
llvm_unreachable("should already have been checked by ValidDiamond");
303+
304+
unsigned BranchBytes = 0;
305+
unsigned CommonBytes = 0;
306+
307+
// Count common instructions at the start of the true and false blocks.
308+
for (auto &I : make_range(TBBInfo.BB->begin(), TIB)) {
309+
LLVM_DEBUG(dbgs() << "Common inst: " << I);
310+
CommonBytes += TII->getInstSizeInBytes(I);
311+
}
312+
for (auto &I : make_range(FBBInfo.BB->begin(), FIB)) {
313+
LLVM_DEBUG(dbgs() << "Common inst: " << I);
314+
CommonBytes += TII->getInstSizeInBytes(I);
315+
}
316+
317+
// Count instructions at the end of the true and false blocks, after
318+
// the ones we plan to predicate. Analyzable branches will be removed
319+
// (unless this is a forked diamond), and all other instructions are
320+
// common between the two blocks.
321+
for (auto &I : make_range(TIE, TBBInfo.BB->end())) {
322+
if (I.isBranch() && TBBInfo.IsBrAnalyzable && !Forked) {
323+
LLVM_DEBUG(dbgs() << "Saving branch: " << I);
324+
BranchBytes += TII->predictBranchSizeForIfCvt(I);
325+
} else {
326+
LLVM_DEBUG(dbgs() << "Common inst: " << I);
327+
CommonBytes += TII->getInstSizeInBytes(I);
328+
}
329+
}
330+
for (auto &I : make_range(FIE, FBBInfo.BB->end())) {
331+
if (I.isBranch() && FBBInfo.IsBrAnalyzable && !Forked) {
332+
LLVM_DEBUG(dbgs() << "Saving branch: " << I);
333+
BranchBytes += TII->predictBranchSizeForIfCvt(I);
334+
} else {
335+
LLVM_DEBUG(dbgs() << "Common inst: " << I);
336+
CommonBytes += TII->getInstSizeInBytes(I);
337+
}
338+
}
339+
for (auto &I : CommBB.terminators()) {
340+
if (I.isBranch()) {
341+
LLVM_DEBUG(dbgs() << "Saving branch: " << I);
342+
BranchBytes += TII->predictBranchSizeForIfCvt(I);
343+
}
344+
}
345+
346+
// The common instructions in one branch will be eliminated, halving
347+
// their code size.
348+
CommonBytes /= 2;
349+
350+
// Count the instructions which we need to predicate.
351+
unsigned NumPredicatedInstructions = 0;
352+
for (auto &I : make_range(TIB, TIE)) {
353+
if (!I.isDebugInstr()) {
354+
LLVM_DEBUG(dbgs() << "Predicating: " << I);
355+
NumPredicatedInstructions++;
356+
}
357+
}
358+
for (auto &I : make_range(FIB, FIE)) {
359+
if (!I.isDebugInstr()) {
360+
LLVM_DEBUG(dbgs() << "Predicating: " << I);
361+
NumPredicatedInstructions++;
362+
}
363+
}
364+
365+
// Even though we're optimising for size at the expense of performance,
366+
// avoid creating really long predicated blocks.
367+
if (NumPredicatedInstructions > 15)
368+
return false;
369+
370+
// Some targets (e.g. Thumb2) need to insert extra instructions to
371+
// start predicated blocks.
372+
unsigned ExtraPredicateBytes = TII->extraSizeToPredicateInstructions(
373+
MF, NumPredicatedInstructions);
374+
375+
LLVM_DEBUG(dbgs() << "MeetIfcvtSizeLimit(BranchBytes=" << BranchBytes
376+
<< ", CommonBytes=" << CommonBytes
377+
<< ", NumPredicatedInstructions="
378+
<< NumPredicatedInstructions
379+
<< ", ExtraPredicateBytes=" << ExtraPredicateBytes
380+
<< ")\n");
381+
return (BranchBytes + CommonBytes) > ExtraPredicateBytes;
382+
} else {
383+
unsigned TCycle = TBBInfo.NonPredSize + TBBInfo.ExtraCost - Dups;
384+
unsigned FCycle = FBBInfo.NonPredSize + FBBInfo.ExtraCost - Dups;
385+
bool Res = TCycle > 0 && FCycle > 0 &&
386+
TII->isProfitableToIfCvt(
387+
*TBBInfo.BB, TCycle, TBBInfo.ExtraCost2, *FBBInfo.BB,
388+
FCycle, FBBInfo.ExtraCost2, Prediction);
389+
LLVM_DEBUG(dbgs() << "MeetIfcvtSizeLimit(TCycle=" << TCycle
390+
<< ", FCycle=" << FCycle
391+
<< ", TExtra=" << TBBInfo.ExtraCost2 << ", FExtra="
392+
<< FBBInfo.ExtraCost2 << ") = " << Res << "\n");
393+
return Res;
394+
}
296395
}
297396

298397
/// Returns true if Block ends without a terminator.
@@ -842,6 +941,8 @@ bool IfConverter::ValidForkedDiamond(
842941

843942
TrueBBICalc.BB = TrueBBI.BB;
844943
FalseBBICalc.BB = FalseBBI.BB;
944+
TrueBBICalc.IsBrAnalyzable = TrueBBI.IsBrAnalyzable;
945+
FalseBBICalc.IsBrAnalyzable = FalseBBI.IsBrAnalyzable;
845946
if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc))
846947
return false;
847948

@@ -899,6 +1000,8 @@ bool IfConverter::ValidDiamond(
8991000

9001001
TrueBBICalc.BB = TrueBBI.BB;
9011002
FalseBBICalc.BB = FalseBBI.BB;
1003+
TrueBBICalc.IsBrAnalyzable = TrueBBI.IsBrAnalyzable;
1004+
FalseBBICalc.IsBrAnalyzable = FalseBBI.IsBrAnalyzable;
9021005
if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc))
9031006
return false;
9041007
// The size is used to decide whether to if-convert, and the shared portions
@@ -1186,13 +1289,9 @@ void IfConverter::AnalyzeBlock(
11861289

11871290
if (CanRevCond) {
11881291
BBInfo TrueBBICalc, FalseBBICalc;
1189-
auto feasibleDiamond = [&]() {
1190-
bool MeetsSize = MeetIfcvtSizeLimit(
1191-
*TrueBBI.BB, (TrueBBICalc.NonPredSize - (Dups + Dups2) +
1192-
TrueBBICalc.ExtraCost), TrueBBICalc.ExtraCost2,
1193-
*FalseBBI.BB, (FalseBBICalc.NonPredSize - (Dups + Dups2) +
1194-
FalseBBICalc.ExtraCost), FalseBBICalc.ExtraCost2,
1195-
Prediction);
1292+
auto feasibleDiamond = [&](bool Forked) {
1293+
bool MeetsSize = MeetIfcvtSizeLimit(TrueBBICalc, FalseBBICalc, *BB,
1294+
Dups + Dups2, Prediction, Forked);
11961295
bool TrueFeasible = FeasibilityAnalysis(TrueBBI, BBI.BrCond,
11971296
/* IsTriangle */ false, /* RevCond */ false,
11981297
/* hasCommonTail */ true);
@@ -1204,7 +1303,7 @@ void IfConverter::AnalyzeBlock(
12041303

12051304
if (ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2,
12061305
TrueBBICalc, FalseBBICalc)) {
1207-
if (feasibleDiamond()) {
1306+
if (feasibleDiamond(false)) {
12081307
// Diamond:
12091308
// EBB
12101309
// / \_
@@ -1220,7 +1319,7 @@ void IfConverter::AnalyzeBlock(
12201319
}
12211320
} else if (ValidForkedDiamond(TrueBBI, FalseBBI, Dups, Dups2,
12221321
TrueBBICalc, FalseBBICalc)) {
1223-
if (feasibleDiamond()) {
1322+
if (feasibleDiamond(true)) {
12241323
// ForkedDiamond:
12251324
// if TBB and FBB have a common tail that includes their conditional
12261325
// branch instructions, then we can If Convert this pattern.

lib/Target/ARM/ARMBaseInstrInfo.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2079,6 +2079,38 @@ isProfitableToIfCvt(MachineBasicBlock &TBB,
20792079
return PredCost <= UnpredCost;
20802080
}
20812081

2082+
unsigned
2083+
ARMBaseInstrInfo::extraSizeToPredicateInstructions(const MachineFunction &MF,
2084+
unsigned NumInsts) const {
2085+
// Thumb2 needs a 2-byte IT instruction to predicate up to 4 instructions.
2086+
// ARM has a condition code field in every predicable instruction, using it
2087+
// doesn't change code size.
2088+
return Subtarget.isThumb2() ? divideCeil(NumInsts, 4) * 2 : 0;
2089+
}
2090+
2091+
unsigned
2092+
ARMBaseInstrInfo::predictBranchSizeForIfCvt(MachineInstr &MI) const {
2093+
// If this branch is likely to be folded into the comparison to form a
2094+
// CB(N)Z, then removing it won't reduce code size at all, because that will
2095+
// just replace the CB(N)Z with a CMP.
2096+
if (MI.getOpcode() == ARM::t2Bcc &&
2097+
findCMPToFoldIntoCBZ(&MI, &getRegisterInfo()))
2098+
return 0;
2099+
2100+
unsigned Size = getInstSizeInBytes(MI);
2101+
2102+
// For Thumb2, all branches are 32-bit instructions during the if conversion
2103+
// pass, but may be replaced with 16-bit instructions during size reduction.
2104+
// Since the branches considered by if conversion tend to be forward branches
2105+
// over small basic blocks, they are very likely to be in range for the
2106+
// narrow instructions, so we assume the final code size will be half what it
2107+
// currently is.
2108+
if (Subtarget.isThumb2())
2109+
Size /= 2;
2110+
2111+
return Size;
2112+
}
2113+
20822114
bool
20832115
ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
20842116
MachineBasicBlock &FMBB) const {

lib/Target/ARM/ARMBaseInstrInfo.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,10 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo {
276276
return NumCycles == 1;
277277
}
278278

279+
unsigned extraSizeToPredicateInstructions(const MachineFunction &MF,
280+
unsigned NumInsts) const override;
281+
unsigned predictBranchSizeForIfCvt(MachineInstr &MI) const override;
282+
279283
bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
280284
MachineBasicBlock &FMBB) const override;
281285

0 commit comments

Comments
 (0)