Skip to content

Commit c771eb3

Browse files
author
git apple-llvm automerger
committed
Merge commit '9d6a549dfb9b' from apple/master into swift/master-next
2 parents ec94009 + 9d6a549 commit c771eb3

File tree

8 files changed

+284
-4
lines changed

8 files changed

+284
-4
lines changed

llvm/include/llvm/Analysis/InlineAdvisor.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,9 @@ void emitInlinedInto(OptimizationRemarkEmitter &ORE, DebugLoc DLoc,
226226
bool ForProfileContext = false,
227227
const char *PassName = nullptr);
228228

229+
/// get call site location as string
230+
StringRef getCallSiteLocation(DebugLoc DLoc);
231+
229232
/// Add location info to ORE message.
230233
void addLocationToRemarks(OptimizationRemark &Remark, DebugLoc DLoc);
231234

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
//===- ReplayInlineAdvisor.h - Replay Inline Advisor interface -*- C++ --*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
#ifndef LLVM_REPLAYINLINEADVISOR_H_
10+
#define LLVM_REPLAYINLINEADVISOR_H_
11+
12+
#include "llvm/ADT/StringSet.h"
13+
#include "llvm/Analysis/InlineAdvisor.h"
14+
#include "llvm/IR/LLVMContext.h"
15+
16+
namespace llvm {
17+
class BasicBlock;
18+
class CallBase;
19+
class Function;
20+
class Module;
21+
class OptimizationRemarkEmitter;
22+
23+
/// Replay inline advisor that uses optimization remarks from inlining of
24+
/// previous build to guide current inlining. This is useful for inliner tuning.
25+
class ReplayInlineAdvisor : public InlineAdvisor {
26+
public:
27+
ReplayInlineAdvisor(FunctionAnalysisManager &FAM, LLVMContext &Context,
28+
StringRef RemarksFile);
29+
std::unique_ptr<InlineAdvice> getAdvice(CallBase &CB) override;
30+
bool areReplayRemarksLoaded() const { return HasReplayRemarks; }
31+
32+
private:
33+
StringSet<> InlineSitesFromRemarks;
34+
bool HasReplayRemarks = false;
35+
};
36+
} // namespace llvm
37+
#endif // LLVM_REPLAYINLINEADVISOR_H_

llvm/lib/Analysis/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ add_llvm_component_library(LLVMAnalysis
117117
RegionInfo.cpp
118118
RegionPass.cpp
119119
RegionPrinter.cpp
120+
ReplayInlineAdvisor.cpp
120121
ScalarEvolution.cpp
121122
ScalarEvolutionAliasAnalysis.cpp
122123
ScalarEvolutionDivision.cpp

llvm/lib/Analysis/InlineAdvisor.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,31 @@ llvm::shouldInline(CallBase &CB,
365365
return IC;
366366
}
367367

368+
StringRef llvm::getCallSiteLocation(DebugLoc DLoc) {
369+
std::ostringstream CallSiteLoc;
370+
bool First = true;
371+
for (DILocation *DIL = DLoc.get(); DIL; DIL = DIL->getInlinedAt()) {
372+
if (!First)
373+
CallSiteLoc << " @ ";
374+
// Note that negative line offset is actually possible, but we use
375+
// unsigned int to match line offset representation in remarks so
376+
// it's directly consumable by relay advisor.
377+
uint32_t Offset =
378+
DIL->getLine() - DIL->getScope()->getSubprogram()->getLine();
379+
uint32_t Discriminator = DIL->getBaseDiscriminator();
380+
StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
381+
if (Name.empty())
382+
Name = DIL->getScope()->getSubprogram()->getName();
383+
CallSiteLoc << Name.str() << ":" << llvm::utostr(Offset);
384+
if (Discriminator) {
385+
CallSiteLoc << "." << llvm::utostr(Discriminator);
386+
}
387+
First = false;
388+
}
389+
390+
return CallSiteLoc.str();
391+
}
392+
368393
void llvm::addLocationToRemarks(OptimizationRemark &Remark, DebugLoc DLoc) {
369394
if (!DLoc.get())
370395
return;
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
//===- ReplayInlineAdvisor.cpp - Replay InlineAdvisor ---------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file implements ReplayInlineAdvisor that replays inline decision based
10+
// on previous inline remarks from optimization remark log.
11+
//
12+
//===----------------------------------------------------------------------===//
13+
14+
#include "llvm/Analysis/InlineAdvisor.h"
15+
#include "llvm/Analysis/ReplayInlineAdvisor.h"
16+
#include "llvm/IR/DebugInfoMetadata.h"
17+
#include "llvm/IR/Instructions.h"
18+
#include "llvm/Support/LineIterator.h"
19+
20+
using namespace llvm;
21+
22+
#define DEBUG_TYPE "inline-replay"
23+
24+
ReplayInlineAdvisor::ReplayInlineAdvisor(FunctionAnalysisManager &FAM,
25+
LLVMContext &Context,
26+
StringRef RemarksFile)
27+
: InlineAdvisor(FAM), HasReplayRemarks(false) {
28+
auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(RemarksFile);
29+
std::error_code EC = BufferOrErr.getError();
30+
if (EC) {
31+
Context.emitError("Could not open remarks file: " + EC.message());
32+
return;
33+
}
34+
35+
// Example for inline remarks to parse:
36+
// _Z3subii inlined into main [details] at callsite sum:1 @ main:3.1
37+
// We use the callsite string after `at callsite` to replay inlining.
38+
line_iterator LineIt(*BufferOrErr.get(), /*SkipBlanks=*/true);
39+
for (; !LineIt.is_at_eof(); ++LineIt) {
40+
StringRef Line = *LineIt;
41+
auto Pair = Line.split(" at callsite ");
42+
if (Pair.second.empty())
43+
continue;
44+
InlineSitesFromRemarks.insert(Pair.second);
45+
}
46+
HasReplayRemarks = true;
47+
}
48+
49+
std::unique_ptr<InlineAdvice> ReplayInlineAdvisor::getAdvice(CallBase &CB) {
50+
assert(HasReplayRemarks);
51+
52+
Function &Caller = *CB.getCaller();
53+
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller);
54+
55+
if (InlineSitesFromRemarks.empty())
56+
return std::make_unique<InlineAdvice>(this, CB, ORE, false);
57+
58+
StringRef CallSiteLoc = getCallSiteLocation(CB.getDebugLoc());
59+
bool InlineRecommended = InlineSitesFromRemarks.count(CallSiteLoc) > 0;
60+
return std::make_unique<InlineAdvice>(this, CB, ORE, InlineRecommended);
61+
}

llvm/lib/Transforms/IPO/SampleProfile.cpp

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
4444
#include "llvm/Analysis/PostDominators.h"
4545
#include "llvm/Analysis/ProfileSummaryInfo.h"
46+
#include "llvm/Analysis/ReplayInlineAdvisor.h"
4647
#include "llvm/Analysis/TargetLibraryInfo.h"
4748
#include "llvm/Analysis/TargetTransformInfo.h"
4849
#include "llvm/IR/BasicBlock.h"
@@ -170,6 +171,13 @@ static cl::opt<int> SampleColdCallSiteThreshold(
170171
"sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45),
171172
cl::desc("Threshold for inlining cold callsites"));
172173

174+
static cl::opt<std::string> ProfileInlineReplayFile(
175+
"sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"),
176+
cl::desc(
177+
"Optimization remarks file containing inline remarks to be replayed "
178+
"by inlining from sample profile loader."),
179+
cl::Hidden);
180+
173181
namespace {
174182

175183
using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
@@ -319,7 +327,7 @@ class SampleProfileLoader {
319327
RemappingFilename(std::string(RemapName)),
320328
IsThinLTOPreLink(IsThinLTOPreLink) {}
321329

322-
bool doInitialization(Module &M);
330+
bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr);
323331
bool runOnModule(Module &M, ModuleAnalysisManager *AM,
324332
ProfileSummaryInfo *_PSI, CallGraph *CG);
325333

@@ -473,6 +481,9 @@ class SampleProfileLoader {
473481
// overriden by -profile-sample-accurate or profile-sample-accurate
474482
// attribute.
475483
bool ProfAccForSymsInList;
484+
485+
// External inline advisor used to replay inline decision from remarks.
486+
std::unique_ptr<ReplayInlineAdvisor> ExternalInlineAdvisor;
476487
};
477488

478489
class SampleProfileLoaderLegacyPass : public ModulePass {
@@ -898,6 +909,16 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
898909
}
899910

900911
bool SampleProfileLoader::inlineCallInstruction(CallBase &CB) {
912+
if (ExternalInlineAdvisor) {
913+
auto Advice = ExternalInlineAdvisor->getAdvice(CB);
914+
if (!Advice->isInliningRecommended()) {
915+
Advice->recordUnattemptedInlining();
916+
return false;
917+
}
918+
// Dummy record, we don't use it for replay.
919+
Advice->recordInlining();
920+
}
921+
901922
Function *CalledFunction = CB.getCalledFunction();
902923
assert(CalledFunction);
903924
DebugLoc DLoc = CB.getDebugLoc();
@@ -1005,7 +1026,7 @@ bool SampleProfileLoader::inlineHotFunctions(
10051026
}
10061027
}
10071028
}
1008-
if (Hot) {
1029+
if (Hot || ExternalInlineAdvisor) {
10091030
CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end());
10101031
emitOptimizationRemarksForInlineCandidates(AllCandidates, F, true);
10111032
} else {
@@ -1818,7 +1839,8 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
18181839
return FunctionOrderList;
18191840
}
18201841

1821-
bool SampleProfileLoader::doInitialization(Module &M) {
1842+
bool SampleProfileLoader::doInitialization(Module &M,
1843+
FunctionAnalysisManager *FAM) {
18221844
auto &Ctx = M.getContext();
18231845

18241846
std::unique_ptr<SampleProfileReaderItaniumRemapper> RemapReader;
@@ -1843,6 +1865,13 @@ bool SampleProfileLoader::doInitialization(Module &M) {
18431865
NamesInProfile.insert(NameTable->begin(), NameTable->end());
18441866
}
18451867

1868+
if (FAM && !ProfileInlineReplayFile.empty()) {
1869+
ExternalInlineAdvisor = std::make_unique<ReplayInlineAdvisor>(
1870+
*FAM, Ctx, ProfileInlineReplayFile);
1871+
if (!ExternalInlineAdvisor->areReplayRemarksLoaded())
1872+
ExternalInlineAdvisor.reset();
1873+
}
1874+
18461875
return true;
18471876
}
18481877

@@ -1995,7 +2024,7 @@ PreservedAnalyses SampleProfileLoaderPass::run(Module &M,
19952024
: ProfileRemappingFileName,
19962025
IsThinLTOPreLink, GetAssumptionCache, GetTTI, GetTLI);
19972026

1998-
if (!SampleLoader.doInitialization(M))
2027+
if (!SampleLoader.doInitialization(M, &FAM))
19992028
return PreservedAnalyses::all();
20002029

20012030
ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
remark: calls.cc:10:0: _Z3sumii inlined into main to match profiling context with (cost=45, threshold=337) at callsite main:3.1
2+
remark: calls.cc:4:0: _Z3subii inlined into main to match profiling context with (cost=-5, threshold=337) at callsite _Z3sumii:1 @ main:3.1
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
;; Note that this needs new pass manager for now. Passing `-sample-profile-inline-replay` to legacy pass manager is a no-op.
2+
3+
;; Check baseline inline decisions
4+
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-topdown.prof -sample-profile-merge-inlinee -sample-profile-top-down-load -pass-remarks=inline -S 2>&1 | FileCheck -check-prefix=DEFAULT %s
5+
6+
;; Check replay inline decisions
7+
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-topdown.prof -sample-profile-inline-replay=%S/Inputs/inline-replay.txt -sample-profile-merge-inlinee -sample-profile-top-down-load -pass-remarks=inline -S 2>&1 | FileCheck -check-prefix=REPLAY %s
8+
9+
@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
10+
11+
define i32 @_Z3sumii(i32 %x, i32 %y) #0 !dbg !6 {
12+
entry:
13+
%x.addr = alloca i32, align 4
14+
%y.addr = alloca i32, align 4
15+
store i32 %x, i32* %x.addr, align 4
16+
store i32 %y, i32* %y.addr, align 4
17+
%tmp = load i32, i32* %x.addr, align 4, !dbg !8
18+
%tmp1 = load i32, i32* %y.addr, align 4, !dbg !8
19+
%add = add nsw i32 %tmp, %tmp1, !dbg !8
20+
%tmp2 = load i32, i32* %x.addr, align 4, !dbg !8
21+
%tmp3 = load i32, i32* %y.addr, align 4, !dbg !8
22+
%call = call i32 @_Z3subii(i32 %tmp2, i32 %tmp3), !dbg !8
23+
ret i32 %add, !dbg !8
24+
}
25+
26+
define i32 @_Z3subii(i32 %x, i32 %y) #0 !dbg !9 {
27+
entry:
28+
%x.addr = alloca i32, align 4
29+
%y.addr = alloca i32, align 4
30+
store i32 %x, i32* %x.addr, align 4
31+
store i32 %y, i32* %y.addr, align 4
32+
%tmp = load i32, i32* %x.addr, align 4, !dbg !10
33+
%tmp1 = load i32, i32* %y.addr, align 4, !dbg !10
34+
%add = sub nsw i32 %tmp, %tmp1, !dbg !10
35+
ret i32 %add, !dbg !11
36+
}
37+
38+
define i32 @main() #0 !dbg !12 {
39+
entry:
40+
%retval = alloca i32, align 4
41+
%s = alloca i32, align 4
42+
%i = alloca i32, align 4
43+
store i32 0, i32* %retval
44+
store i32 0, i32* %i, align 4, !dbg !13
45+
br label %while.cond, !dbg !14
46+
47+
while.cond: ; preds = %if.end, %entry
48+
%tmp = load i32, i32* %i, align 4, !dbg !15
49+
%inc = add nsw i32 %tmp, 1, !dbg !15
50+
store i32 %inc, i32* %i, align 4, !dbg !15
51+
%cmp = icmp slt i32 %tmp, 400000000, !dbg !15
52+
br i1 %cmp, label %while.body, label %while.end, !dbg !15
53+
54+
while.body: ; preds = %while.cond
55+
%tmp1 = load i32, i32* %i, align 4, !dbg !17
56+
%cmp1 = icmp ne i32 %tmp1, 100, !dbg !17
57+
br i1 %cmp1, label %if.then, label %if.else, !dbg !17
58+
59+
if.then: ; preds = %while.body
60+
%tmp2 = load i32, i32* %i, align 4, !dbg !19
61+
%tmp3 = load i32, i32* %s, align 4, !dbg !19
62+
%call = call i32 @_Z3sumii(i32 %tmp2, i32 %tmp3), !dbg !19
63+
store i32 %call, i32* %s, align 4, !dbg !19
64+
br label %if.end, !dbg !19
65+
66+
if.else: ; preds = %while.body
67+
store i32 30, i32* %s, align 4, !dbg !21
68+
br label %if.end
69+
70+
if.end: ; preds = %if.else, %if.then
71+
br label %while.cond, !dbg !23
72+
73+
while.end: ; preds = %while.cond
74+
%tmp4 = load i32, i32* %s, align 4, !dbg !25
75+
%call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %tmp4), !dbg !25
76+
ret i32 0, !dbg !26
77+
}
78+
79+
declare i32 @printf(i8*, ...)
80+
81+
attributes #0 = { "use-sample-profile" }
82+
83+
!llvm.dbg.cu = !{!0}
84+
!llvm.module.flags = !{!3, !4}
85+
!llvm.ident = !{!5}
86+
87+
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.5 ", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
88+
!1 = !DIFile(filename: "calls.cc", directory: ".")
89+
!2 = !{}
90+
!3 = !{i32 2, !"Dwarf Version", i32 4}
91+
!4 = !{i32 1, !"Debug Info Version", i32 3}
92+
!5 = !{!"clang version 3.5 "}
93+
!6 = distinct !DISubprogram(name: "sum", linkageName: "_Z3sumii", scope: !1, file: !1, line: 3, type: !7, scopeLine: 3, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
94+
!7 = !DISubroutineType(types: !2)
95+
!8 = !DILocation(line: 4, scope: !6)
96+
!9 = distinct !DISubprogram(name: "sub", linkageName: "_Z3subii", scope: !1, file: !1, line: 20, type: !7, scopeLine: 20, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
97+
!10 = !DILocation(line: 20, scope: !9)
98+
!11 = !DILocation(line: 21, scope: !9)
99+
!12 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !7, scopeLine: 7, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
100+
!13 = !DILocation(line: 8, scope: !12)
101+
!14 = !DILocation(line: 9, scope: !12)
102+
!15 = !DILocation(line: 9, scope: !16)
103+
!16 = !DILexicalBlockFile(scope: !12, file: !1, discriminator: 2)
104+
!17 = !DILocation(line: 10, scope: !18)
105+
!18 = distinct !DILexicalBlock(scope: !12, file: !1, line: 10)
106+
!19 = !DILocation(line: 10, scope: !20)
107+
!20 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 2)
108+
!21 = !DILocation(line: 10, scope: !22)
109+
!22 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 4)
110+
!23 = !DILocation(line: 10, scope: !24)
111+
!24 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 6)
112+
!25 = !DILocation(line: 11, scope: !12)
113+
!26 = !DILocation(line: 12, scope: !12)
114+
115+
116+
; DEFAULT: _Z3sumii inlined into main
117+
; DEFAULT: _Z3subii inlined into _Z3sumii
118+
; DEFAULT-NOT: _Z3subii inlined into main
119+
120+
; REPLAY: _Z3sumii inlined into main
121+
; REPLAY: _Z3subii inlined into main
122+
; REPLA-NOT: _Z3subii inlined into _Z3sumii

0 commit comments

Comments
 (0)