Skip to content

Commit 0d1392e

Browse files
authored
[MachineOutliner] Remove LOHs from outlined candidates (#143617)
Remove Linker Optimization Hints (LOHs) from outlining candidates instead of simply preventing outlining if LOH labels are found in the candidate. This will improve the effectiveness of the machine outliner when LOHs are enabled (which is the default). In https://discourse.llvm.org/t/loh-conflicting-with-machineoutliner/83279/1 it was observed that the machine outliner is much more effective when LOHs are disabled. Rather than completely disabling LOH, this PR aims to keep LOH in most places and removing them from outlined functions where it could be illegal. Note that we are conservatively removing all LOHs from outlined functions for simplicity, but I believe we could retain LOHs that are in the intersection of all candidates. It should be ok to remove these LOHs since these blocks are being outlined anyway, which will harm performance much more than the gain from keeping the LOHs.
1 parent f4cecfe commit 0d1392e

File tree

8 files changed

+177
-7
lines changed

8 files changed

+177
-7
lines changed

llvm/include/llvm/Target/TargetMachine.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ using ModulePassManager = PassManager<Module>;
3838

3939
class Function;
4040
class GlobalValue;
41+
class MachineInstr;
4142
class MachineModuleInfoWrapperPass;
4243
struct MachineSchedContext;
4344
class Mangler;
@@ -519,6 +520,15 @@ class LLVM_ABI TargetMachine {
519520

520521
// MachineRegisterInfo callback function
521522
virtual void registerMachineRegisterInfoCallback(MachineFunction &MF) const {}
523+
524+
/// Remove all Linker Optimization Hints (LOH) associated with instructions in
525+
/// \p MIs and \return the number of hints removed. This is useful in
526+
/// transformations that cause these hints to be illegal, like in the machine
527+
/// outliner.
528+
virtual size_t clearLinkerOptimizationHints(
529+
const SmallPtrSetImpl<MachineInstr *> &MIs) const {
530+
return 0;
531+
}
522532
};
523533

524534
} // end namespace llvm

llvm/lib/CodeGen/MachineOutliner.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
6868
#include "llvm/CodeGen/Passes.h"
6969
#include "llvm/CodeGen/TargetInstrInfo.h"
70+
#include "llvm/CodeGen/TargetPassConfig.h"
7071
#include "llvm/CodeGen/TargetSubtargetInfo.h"
7172
#include "llvm/IR/DIBuilder.h"
7273
#include "llvm/IR/IRBuilder.h"
@@ -77,6 +78,7 @@
7778
#include "llvm/Support/Debug.h"
7879
#include "llvm/Support/SuffixTree.h"
7980
#include "llvm/Support/raw_ostream.h"
81+
#include "llvm/Target/TargetMachine.h"
8082
#include "llvm/Transforms/Utils/ModuleUtils.h"
8183
#include <tuple>
8284
#include <vector>
@@ -104,6 +106,7 @@ STATISTIC(StableHashAttempts,
104106
"Count of hashing attempts made for outlined functions");
105107
STATISTIC(StableHashDropped,
106108
"Count of unsuccessful hashing attempts for outlined functions");
109+
STATISTIC(NumRemovedLOHs, "Total number of Linker Optimization Hints removed");
107110

108111
// Set to true if the user wants the outliner to run on linkonceodr linkage
109112
// functions. This is false by default because the linker can dedupe linkonceodr
@@ -426,6 +429,7 @@ struct MachineOutliner : public ModulePass {
426429
static char ID;
427430

428431
MachineModuleInfo *MMI = nullptr;
432+
const TargetMachine *TM = nullptr;
429433

430434
/// Set to true if the outliner should consider functions with
431435
/// linkonceodr linkage.
@@ -461,6 +465,7 @@ struct MachineOutliner : public ModulePass {
461465

462466
void getAnalysisUsage(AnalysisUsage &AU) const override {
463467
AU.addRequired<MachineModuleInfoWrapperPass>();
468+
AU.addRequired<TargetPassConfig>();
464469
AU.addPreserved<MachineModuleInfoWrapperPass>();
465470
AU.addUsedIfAvailable<ImmutableModuleSummaryIndexWrapperPass>();
466471
AU.setPreservesAll();
@@ -1075,6 +1080,17 @@ bool MachineOutliner::outline(
10751080
<< " B) > threshold (" << OutlinerBenefitThreshold
10761081
<< " B)\n");
10771082

1083+
// Remove all Linker Optimization Hints from the candidates.
1084+
// TODO: The intersection of the LOHs from all candidates should be legal in
1085+
// the outlined function.
1086+
SmallPtrSet<MachineInstr *, 2> MIs;
1087+
for (Candidate &C : OF->Candidates) {
1088+
for (MachineInstr &MI : C)
1089+
MIs.insert(&MI);
1090+
NumRemovedLOHs += TM->clearLinkerOptimizationHints(MIs);
1091+
MIs.clear();
1092+
}
1093+
10781094
// It's beneficial. Create the function and outline its sequence's
10791095
// occurrences.
10801096
OF->MF = createOutlinedFunction(M, *OF, Mapper, OutlinedFunctionNum);
@@ -1386,6 +1402,7 @@ bool MachineOutliner::runOnModule(Module &M) {
13861402
initializeOutlinerMode(M);
13871403

13881404
MMI = &getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
1405+
TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
13891406

13901407
// Number to append to the current outlined function.
13911408
unsigned OutlinedFunctionNum = 0;

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9696,9 +9696,6 @@ AArch64InstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI,
96969696
MachineBasicBlock::iterator &MIT,
96979697
unsigned Flags) const {
96989698
MachineInstr &MI = *MIT;
9699-
MachineBasicBlock *MBB = MI.getParent();
9700-
MachineFunction *MF = MBB->getParent();
9701-
AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
97029699

97039700
// Don't outline anything used for return address signing. The outlined
97049701
// function will get signed later if needed
@@ -9726,10 +9723,6 @@ AArch64InstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI,
97269723
return outliner::InstrType::Illegal;
97279724
}
97289725

9729-
// Don't outline LOHs.
9730-
if (FuncInfo->getLOHRelated().count(&MI))
9731-
return outliner::InstrType::Illegal;
9732-
97339726
// We can only outline these if we will tail call the outlined function, or
97349727
// fix up the CFI offsets. Currently, CFI instructions are outlined only if
97359728
// in a tail call.

llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,20 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
504504
LOHRelated.insert_range(Args);
505505
}
506506

507+
size_t
508+
clearLinkerOptimizationHints(const SmallPtrSetImpl<MachineInstr *> &MIs) {
509+
size_t InitialSize = LOHContainerSet.size();
510+
erase_if(LOHContainerSet, [&](const auto &D) {
511+
return any_of(D.getArgs(), [&](auto *Arg) { return MIs.contains(Arg); });
512+
});
513+
// In theory there could be an LOH with one label in MIs and another label
514+
// outside MIs, however we don't know if the label outside MIs is used in
515+
// any other LOHs, so we can't remove them from LOHRelated. In that case, we
516+
// might produce a few extra labels, but it won't break anything.
517+
LOHRelated.remove_if([&](auto *MI) { return MIs.contains(MI); });
518+
return InitialSize - LOHContainerSet.size();
519+
};
520+
507521
SmallVectorImpl<ForwardedRegister> &getForwardedMustTailRegParms() {
508522
return ForwardedMustTailRegParms;
509523
}

llvm/lib/Target/AArch64/AArch64TargetMachine.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -512,6 +512,15 @@ AArch64TargetMachine::createPostMachineScheduler(MachineSchedContext *C) const {
512512
return DAG;
513513
}
514514

515+
size_t AArch64TargetMachine::clearLinkerOptimizationHints(
516+
const SmallPtrSetImpl<MachineInstr *> &MIs) const {
517+
if (MIs.empty())
518+
return 0;
519+
auto *MI = *MIs.begin();
520+
auto *FuncInfo = MI->getMF()->getInfo<AArch64FunctionInfo>();
521+
return FuncInfo->clearLinkerOptimizationHints(MIs);
522+
}
523+
515524
void AArch64leTargetMachine::anchor() { }
516525

517526
AArch64leTargetMachine::AArch64leTargetMachine(

llvm/lib/Target/AArch64/AArch64TargetMachine.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,9 @@ class AArch64TargetMachine : public CodeGenTargetMachineImpl {
7676
ScheduleDAGInstrs *
7777
createPostMachineScheduler(MachineSchedContext *C) const override;
7878

79+
size_t clearLinkerOptimizationHints(
80+
const SmallPtrSetImpl<MachineInstr *> &MIs) const override;
81+
7982
private:
8083
bool isLittle;
8184
};
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
; RUN: llc -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck %s --implicit-check-not=.loh --check-prefixes=CHECK,LOH
2+
; RUN: llc -verify-machineinstrs -mtriple=aarch64-apple-darwin -enable-machine-outliner < %s | FileCheck %s --implicit-check-not=.loh --check-prefixes=CHECK,OUTLINE
3+
4+
@A = global i32 0, align 4
5+
@B = global i32 0, align 4
6+
7+
declare void @foo();
8+
declare void @bar(ptr %a);
9+
declare void @goo(ptr %a);
10+
11+
; CHECK-LABEL: _a0:
12+
define void @a0(i32 %a) {
13+
14+
; This becomes AdrpAdd when outlining is disabled, otherwise it is outlined
15+
; and there should be no LOH.
16+
%addr = getelementptr inbounds i32, ptr @A, i32 0
17+
%res = load i32, ptr %addr, align 4
18+
; LOH: [[L0:Lloh.+]]:
19+
; LOH-NEXT: adrp x19, _A@PAGE
20+
; LOH-NEXT: [[L1:Lloh.+]]:
21+
; LOH-NEXT: add x19, x19, _A@PAGEOFF
22+
23+
call void @foo()
24+
; OUTLINE: bl _OUTLINED_FUNCTION_0
25+
; OUTLINE-NEXT: mov x0, x19
26+
; OUTLINE-NEXT: bl _bar
27+
call void @bar(ptr %addr)
28+
29+
; This becomes AdrpAddStr.
30+
%addr2 = getelementptr inbounds i32, ptr @B, i32 4
31+
store i32 %res, ptr %addr2, align 4
32+
; CHECK: [[L2:Lloh.+]]:
33+
; CHECK-NEXT: adrp x8, _B@PAGE
34+
; CHECK-NEXT: [[L3:Lloh.+]]:
35+
; CHECK-NEXT: add x8, x8, _B@PAGEOFF
36+
; CHECK-NEXT: [[L4:Lloh.+]]:
37+
; CHECK-NEXT: str w20, [x8, #16]
38+
ret void
39+
40+
; LOH-DAG: .loh AdrpAdd [[L0]], [[L1]]
41+
; CHECK-DAG: .loh AdrpAddStr [[L2]], [[L3]], [[L4]]
42+
; CHECK: .cfi_endproc
43+
}
44+
45+
; CHECK-LABEL: _a1:
46+
define i32 @a1(i32 %a) {
47+
48+
; This becomes AdrpAdd when outlining is disabled, otherwise it is outlined
49+
; and there should be no LOH.
50+
%addr = getelementptr inbounds i32, ptr @A, i32 0
51+
%res = load i32, ptr %addr, align 4
52+
; LOH: [[L5:Lloh.+]]:
53+
; LOH-NEXT: adrp x19, _A@PAGE
54+
; LOH-NEXT: [[L6:Lloh.+]]:
55+
; LOH-NEXT: add x19, x19, _A@PAGEOFF
56+
57+
call void @foo()
58+
; OUTLINE: bl _OUTLINED_FUNCTION_0
59+
; OUTLINE-NEXT: mov x0, x19
60+
; OUTLINE-NEXT: bl _goo
61+
call void @goo(ptr %addr)
62+
ret i32 %res
63+
64+
; LOH: .loh AdrpAdd [[L5]], [[L6]]
65+
; CHECK: .cfi_endproc
66+
}
67+
68+
; Note: it is not safe to add LOHs to this function as outlined functions do not
69+
; follow calling convention and thus x19 could be live across the call.
70+
; OUTLINE: _OUTLINED_FUNCTION_0:
71+
; OUTLINE: adrp x19, _A@PAGE
72+
; OUTLINE: add x19, x19, _A@PAGEOFF
73+
; OUTLINE: ldr w20, [x19]
74+
; OUTLINE: b _foo
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# RUN: llc %s -mtriple=aarch64-apple-ios -run-pass=aarch64-collect-loh -run-pass=machine-outliner -stats -o - 2>%t | FileCheck %s
2+
# RUN: FileCheck %s --input-file=%t --check-prefix=STATS
3+
# REQUIRES: asserts
4+
5+
--- |
6+
define void @func0() noredzone minsize { ret void }
7+
8+
@g0 = external global i32
9+
...
10+
---
11+
# CHECK-LABEL: name: func0
12+
name: func0
13+
tracksRegLiveness: true
14+
body: |
15+
; CHECK-LABEL: bb.0:
16+
bb.0:
17+
; CHECK: BL @OUTLINED_FUNCTION_0
18+
$x10 = ADRP target-flags(aarch64-page) @g0
19+
$x11 = ADDXri $x10, target-flags(aarch64-pageoff) @g0, 0
20+
$w0 = ORRWri $wzr, 1
21+
$w0 = ORRWri $wzr, 1
22+
$w0 = ORRWri $wzr, 1
23+
$w0 = ORRWri $wzr, 1
24+
25+
; CHECK-LABEL: bb.1:
26+
bb.1:
27+
; CHECK: BL @OUTLINED_FUNCTION_0
28+
; MCLOH_AdrpAdd is not generated because $x11 is still live. If we want to
29+
; outline these instructions, we must remove the MCLOH_AdrpAdd from bb.0.
30+
$x10 = ADRP target-flags(aarch64-page) @g0
31+
$x11 = ADDXri $x10, target-flags(aarch64-pageoff) @g0, 0
32+
$w0 = ORRWri $wzr, 1
33+
$w0 = ORRWri $wzr, 1
34+
$w0 = ORRWri $wzr, 1
35+
$w0 = ORRWri $wzr, 1
36+
; CHECK: $x12 = ADDXri $x11, target-flags(aarch64-pageoff) @g0, 0
37+
$x12 = ADDXri $x11, target-flags(aarch64-pageoff) @g0, 0
38+
39+
...
40+
41+
# CHECK-LABEL: name: OUTLINED_FUNCTION_0
42+
# CHECK: $x10 = ADRP target-flags(aarch64-page) @g0
43+
# CHECK: $x11 = ADDXri $x10, target-flags(aarch64-pageoff) @g0, 0
44+
# CHECK: $w0 = ORRWri $wzr, 1
45+
# CHECK: $w0 = ORRWri $wzr, 1
46+
# CHECK: $w0 = ORRWri $wzr, 1
47+
# CHECK: $w0 = ORRWri $wzr, 1
48+
49+
# STATS: 1 aarch64-collect-loh - Number of simplifiable ADRP + ADD
50+
# STATS: 1 machine-outliner - Total number of Linker Optimization Hints removed

0 commit comments

Comments
 (0)