Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit a764eb9

Browse files
committed
[PGO] Context sensitive PGO (part 1)
Current PGO profile counts are not context sensitive. The branch probabilities for the inlined functions are kept the same for all call-sites, and they might be very different from the actual branch probabilities. These suboptimal profiles can greatly affect some downstream optimizations, in particular for the machine basic block placement optimization. In this patch, we propose to have a post-inline PGO instrumentation/use pass, which we called Context Sensitive PGO (CSPGO). For the users who want the best possible performance, they can perform a second round of PGO instrument/use on the top of the regular PGO. They will have two sets of profile counts. The first pass profile will be manly for inline, indirect-call promotion, and CGSCC simplification pass optimizations. The second pass profile is for post-inline optimizations and code-gen optimizations. A typical usage: // Regular PGO instrumentation and generate pass1 profile. > clang -O2 -fprofile-generate source.c -o gen > ./gen > llvm-profdata merge default.*profraw -o pass1.profdata // CSPGO instrumentation. > clang -O2 -fprofile-use=pass1.profdata -fcs-profile-generate -o gen2 > ./gen2 // Merge two sets of profiles > llvm-profdata merge default.*profraw pass1.profdata -o profile.profdata // Use the combined profile. Pass manager will invoke two PGO use passes. > clang -O2 -fprofile-use=profile.profdata -o use This change touches many components in the compiler. The reviewed patch (D54175) will committed in phrases. Differential Revision: https://reviews.llvm.org/D54175 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@354930 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 1a46c55 commit a764eb9

File tree

14 files changed

+309
-75
lines changed

14 files changed

+309
-75
lines changed

include/llvm/InitializePasses.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,7 @@ void initializePEIPass(PassRegistry&);
299299
void initializePGOIndirectCallPromotionLegacyPassPass(PassRegistry&);
300300
void initializePGOInstrumentationGenLegacyPassPass(PassRegistry&);
301301
void initializePGOInstrumentationUseLegacyPassPass(PassRegistry&);
302+
void initializePGOInstrumentationGenCreateVarLegacyPassPass(PassRegistry&);
302303
void initializePGOMemOPSizeOptLegacyPassPass(PassRegistry&);
303304
void initializePHIEliminationPass(PassRegistry&);
304305
void initializePartialInlinerLegacyPassPass(PassRegistry&);

include/llvm/LTO/Config.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@ struct Config {
5555
/// Disable entirely the optimizer, including importing for ThinLTO
5656
bool CodeGenOnly = false;
5757

58+
/// Run PGO context sensitive IR instrumentation.
59+
bool RunCSIRInstr = false;
60+
5861
/// If this field is set, the set of passes run in the middle-end optimizer
5962
/// will be the one specified by the string. Only works with the new pass
6063
/// manager as the old one doesn't have this ability.
@@ -73,6 +76,9 @@ struct Config {
7376
/// with this triple.
7477
std::string DefaultTriple;
7578

79+
/// Context Sensitive PGO profile path.
80+
std::string CSIRProfile;
81+
7682
/// Sample PGO profile path.
7783
std::string SampleProfile;
7884

include/llvm/LinkAllPasses.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ namespace {
102102
(void) llvm::createGCOVProfilerPass();
103103
(void) llvm::createPGOInstrumentationGenLegacyPass();
104104
(void) llvm::createPGOInstrumentationUseLegacyPass();
105+
(void) llvm::createPGOInstrumentationGenCreateVarLegacyPass();
105106
(void) llvm::createPGOIndirectCallPromotionLegacyPass();
106107
(void) llvm::createPGOMemOPSizeOptLegacyPass();
107108
(void) llvm::createInstrProfilingLegacyPass();

include/llvm/ProfileData/InstrProf.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -767,10 +767,20 @@ struct NamedInstrProfRecord : InstrProfRecord {
767767
StringRef Name;
768768
uint64_t Hash;
769769

770+
// We reserve this bit as the flag for context sensitive profile record.
771+
static const int CS_FLAG_IN_FUNC_HASH = 60;
772+
770773
NamedInstrProfRecord() = default;
771774
NamedInstrProfRecord(StringRef Name, uint64_t Hash,
772775
std::vector<uint64_t> Counts)
773776
: InstrProfRecord(std::move(Counts)), Name(Name), Hash(Hash) {}
777+
778+
static bool hasCSFlagInHash(uint64_t FuncHash) {
779+
return ((FuncHash >> CS_FLAG_IN_FUNC_HASH) & 1);
780+
}
781+
static void setCSFlagInHash(uint64_t &FuncHash) {
782+
FuncHash |= ((uint64_t)1 << CS_FLAG_IN_FUNC_HASH);
783+
}
774784
};
775785

776786
uint32_t InstrProfRecord::getNumValueKinds() const {
@@ -1004,6 +1014,8 @@ namespace RawInstrProf {
10041014
// from control data struct is changed from raw pointer to Name's MD5 value.
10051015
// Version 4: ValueDataBegin and ValueDataSizes fields are removed from the
10061016
// raw header.
1017+
// Version 5: Bit 60 of FuncHash is reserved for the flag for the context
1018+
// sensitive records.
10071019
const uint64_t Version = INSTR_PROF_RAW_VERSION;
10081020

10091021
template <class IntPtrT> inline uint64_t getMagic();
@@ -1040,6 +1052,10 @@ struct Header {
10401052
void getMemOPSizeRangeFromOption(StringRef Str, int64_t &RangeStart,
10411053
int64_t &RangeLast);
10421054

1055+
// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
1056+
// aware this is an ir_level profile so it can set the version flag.
1057+
void createIRLevelProfileFlagVar(Module &M, bool IsCS);
1058+
10431059
// Create the variable for the profile file name.
10441060
void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput);
10451061

include/llvm/ProfileData/InstrProfData.inc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -635,10 +635,12 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
635635
* version for other variants of profile. We set the lowest bit of the upper 8
636636
* bits (i.e. bit 56) to 1 to indicate if this is an IR-level instrumentaiton
637637
* generated profile, and 0 if this is a Clang FE generated profile.
638+
* 1 in bit 57 indicates there are context-sensitive records in the profile.
638639
*/
639640
#define VARIANT_MASKS_ALL 0xff00000000000000ULL
640641
#define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL)
641642
#define VARIANT_MASK_IR_PROF (0x1ULL << 56)
643+
#define VARIANT_MASK_CSIR_PROF (0x1ULL << 57)
642644
#define INSTR_PROF_RAW_VERSION_VAR __llvm_profile_raw_version
643645
#define INSTR_PROF_PROFILE_RUNTIME_VAR __llvm_profile_runtime
644646

include/llvm/Transforms/Instrumentation.h

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -87,10 +87,14 @@ struct GCOVOptions {
8787
ModulePass *createGCOVProfilerPass(const GCOVOptions &Options =
8888
GCOVOptions::getDefault());
8989

90-
// PGO Instrumention
91-
ModulePass *createPGOInstrumentationGenLegacyPass();
90+
// PGO Instrumention. Parameter IsCS indicates if this is the context senstive
91+
// instrumentation.
92+
ModulePass *createPGOInstrumentationGenLegacyPass(bool IsCS = false);
9293
ModulePass *
93-
createPGOInstrumentationUseLegacyPass(StringRef Filename = StringRef(""));
94+
createPGOInstrumentationUseLegacyPass(StringRef Filename = StringRef(""),
95+
bool IsCS = false);
96+
ModulePass *createPGOInstrumentationGenCreateVarLegacyPass(
97+
StringRef CSInstrName = StringRef(""));
9498
ModulePass *createPGOIndirectCallPromotionLegacyPass(bool InLTO = false,
9599
bool SamplePGO = false);
96100
FunctionPass *createPGOMemOPSizeOptLegacyPass();
@@ -132,15 +136,19 @@ struct InstrProfOptions {
132136
// Use atomic profile counter increments.
133137
bool Atomic = false;
134138

139+
// Use BFI to guide register promotion
140+
bool UseBFIInPromotion = false;
141+
135142
// Name of the profile file to use as output
136143
std::string InstrProfileOutput;
137144

138145
InstrProfOptions() = default;
139146
};
140147

141-
/// Insert frontend instrumentation based profiling.
148+
/// Insert frontend instrumentation based profiling. Parameter IsCS indicates if
149+
// this is the context senstive instrumentation.
142150
ModulePass *createInstrProfilingLegacyPass(
143-
const InstrProfOptions &Options = InstrProfOptions());
151+
const InstrProfOptions &Options = InstrProfOptions(), bool IsCS = false);
144152

145153
FunctionPass *createHWAddressSanitizerPass(bool CompileKernel = false,
146154
bool Recover = false);

include/llvm/Transforms/Instrumentation/InstrProfiling.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@ using LoadStorePair = std::pair<Instruction *, Instruction *>;
3535
class InstrProfiling : public PassInfoMixin<InstrProfiling> {
3636
public:
3737
InstrProfiling() = default;
38-
InstrProfiling(const InstrProfOptions &Options) : Options(Options) {}
38+
InstrProfiling(const InstrProfOptions &Options, bool IsCS)
39+
: Options(Options), IsCS(IsCS) {}
3940

4041
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
4142
bool run(Module &M, const TargetLibraryInfo &TLI);
@@ -60,6 +61,9 @@ class InstrProfiling : public PassInfoMixin<InstrProfiling> {
6061
GlobalVariable *NamesVar;
6162
size_t NamesSize;
6263

64+
// Is this lowering for the context-sensitive instrumentation.
65+
bool IsCS;
66+
6367
// vector of counter load/store pairs to be register promoted.
6468
std::vector<LoadStorePair> PromotionCandidates;
6569

include/llvm/Transforms/Instrumentation/PGOInstrumentation.h

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
#include "llvm/ADT/ArrayRef.h"
1919
#include "llvm/IR/PassManager.h"
20+
#include "llvm/ProfileData/InstrProf.h"
2021
#include <cstdint>
2122
#include <string>
2223

@@ -26,23 +27,51 @@ class Function;
2627
class Instruction;
2728
class Module;
2829

30+
/// The instrumentation (profile-instr-gen) pass for IR based PGO.
31+
// We use this pass to create COMDAT profile variables for context
32+
// sensitive PGO (CSPGO). The reason to have a pass for this is CSPGO
33+
// can be run after LTO/ThinLTO linking. Lld linker needs to see
34+
// all the COMDAT variables before linking. So we have this pass
35+
// always run before linking for CSPGO.
36+
class PGOInstrumentationGenCreateVar
37+
: public PassInfoMixin<PGOInstrumentationGenCreateVar> {
38+
public:
39+
PGOInstrumentationGenCreateVar(std::string CSInstrName = "")
40+
: CSInstrName(CSInstrName) {}
41+
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) {
42+
createProfileFileNameVar(M, CSInstrName);
43+
createIRLevelProfileFlagVar(M, /* IsCS */ true);
44+
return PreservedAnalyses::all();
45+
}
46+
47+
private:
48+
std::string CSInstrName;
49+
};
50+
2951
/// The instrumentation (profile-instr-gen) pass for IR based PGO.
3052
class PGOInstrumentationGen : public PassInfoMixin<PGOInstrumentationGen> {
3153
public:
54+
PGOInstrumentationGen(bool IsCS = false) : IsCS(IsCS) {}
3255
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
56+
57+
private:
58+
// If this is a context sensitive instrumentation.
59+
bool IsCS;
3360
};
3461

3562
/// The profile annotation (profile-instr-use) pass for IR based PGO.
3663
class PGOInstrumentationUse : public PassInfoMixin<PGOInstrumentationUse> {
3764
public:
3865
PGOInstrumentationUse(std::string Filename = "",
39-
std::string RemappingFilename = "");
66+
std::string RemappingFilename = "", bool IsCS = false);
4067

4168
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
4269

4370
private:
4471
std::string ProfileFileName;
4572
std::string ProfileRemappingFileName;
73+
// If this is a context sensitive instrumentation.
74+
bool IsCS;
4675
};
4776

4877
/// The indirect function call promotion pass.

lib/Passes/PassBuilder.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -569,7 +569,8 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
569569
if (!ProfileGenFile.empty())
570570
Options.InstrProfileOutput = ProfileGenFile;
571571
Options.DoCounterPromotion = true;
572-
MPM.addPass(InstrProfiling(Options));
572+
Options.UseBFIInPromotion = false;
573+
MPM.addPass(InstrProfiling(Options, false));
573574
}
574575

575576
if (!ProfileUseFile.empty())

lib/ProfileData/InstrProf.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1011,6 +1011,25 @@ void getMemOPSizeRangeFromOption(StringRef MemOPSizeRange, int64_t &RangeStart,
10111011
assert(RangeLast >= RangeStart);
10121012
}
10131013

1014+
// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
1015+
// aware this is an ir_level profile so it can set the version flag.
1016+
void createIRLevelProfileFlagVar(Module &M, bool IsCS) {
1017+
const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
1018+
Type *IntTy64 = Type::getInt64Ty(M.getContext());
1019+
uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
1020+
if (IsCS)
1021+
ProfileVersion |= VARIANT_MASK_CSIR_PROF;
1022+
auto IRLevelVersionVariable = new GlobalVariable(
1023+
M, IntTy64, true, GlobalValue::WeakAnyLinkage,
1024+
Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
1025+
IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility);
1026+
Triple TT(M.getTargetTriple());
1027+
if (TT.supportsCOMDAT()) {
1028+
IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
1029+
IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
1030+
}
1031+
}
1032+
10141033
// Create the variable for the profile file name.
10151034
void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput) {
10161035
if (InstrProfileOutput.empty())

lib/Transforms/Instrumentation/InstrProfiling.cpp

Lines changed: 43 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
#include "llvm/ADT/StringRef.h"
1919
#include "llvm/ADT/Triple.h"
2020
#include "llvm/ADT/Twine.h"
21+
#include "llvm/Analysis/BlockFrequencyInfo.h"
22+
#include "llvm/Analysis/BranchProbabilityInfo.h"
2123
#include "llvm/Analysis/LoopInfo.h"
2224
#include "llvm/Analysis/TargetLibraryInfo.h"
2325
#include "llvm/IR/Attributes.h"
@@ -147,8 +149,8 @@ class InstrProfilingLegacyPass : public ModulePass {
147149
static char ID;
148150

149151
InstrProfilingLegacyPass() : ModulePass(ID) {}
150-
InstrProfilingLegacyPass(const InstrProfOptions &Options)
151-
: ModulePass(ID), InstrProf(Options) {}
152+
InstrProfilingLegacyPass(const InstrProfOptions &Options, bool IsCS)
153+
: ModulePass(ID), InstrProf(Options, IsCS) {}
152154

153155
StringRef getPassName() const override {
154156
return "Frontend instrumentation-based coverage lowering";
@@ -232,9 +234,9 @@ class PGOCounterPromoter {
232234
public:
233235
PGOCounterPromoter(
234236
DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
235-
Loop &CurLoop, LoopInfo &LI)
237+
Loop &CurLoop, LoopInfo &LI, BlockFrequencyInfo *BFI)
236238
: LoopToCandidates(LoopToCands), ExitBlocks(), InsertPts(), L(CurLoop),
237-
LI(LI) {
239+
LI(LI), BFI(BFI) {
238240

239241
SmallVector<BasicBlock *, 8> LoopExitBlocks;
240242
SmallPtrSet<BasicBlock *, 8> BlockSet;
@@ -263,6 +265,20 @@ class PGOCounterPromoter {
263265
SSAUpdater SSA(&NewPHIs);
264266
Value *InitVal = ConstantInt::get(Cand.first->getType(), 0);
265267

268+
// If BFI is set, we will use it to guide the promotions.
269+
if (BFI) {
270+
auto *BB = Cand.first->getParent();
271+
auto InstrCount = BFI->getBlockProfileCount(BB);
272+
if (!InstrCount)
273+
continue;
274+
auto PreheaderCount = BFI->getBlockProfileCount(L.getLoopPreheader());
275+
// If the average loop trip count is not greater than 1.5, we skip
276+
// promotion.
277+
if (PreheaderCount &&
278+
(PreheaderCount.getValue() * 3) >= (InstrCount.getValue() * 2))
279+
continue;
280+
}
281+
266282
PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal,
267283
L.getLoopPreheader(), ExitBlocks,
268284
InsertPts, LoopToCandidates, LI);
@@ -312,6 +328,11 @@ class PGOCounterPromoter {
312328

313329
SmallVector<BasicBlock *, 8> ExitingBlocks;
314330
LP->getExitingBlocks(ExitingBlocks);
331+
332+
// If BFI is set, we do more aggressive promotions based on BFI.
333+
if (BFI)
334+
return (unsigned)-1;
335+
315336
// Not considierered speculative.
316337
if (ExitingBlocks.size() == 1)
317338
return MaxNumOfPromotionsPerLoop;
@@ -343,6 +364,7 @@ class PGOCounterPromoter {
343364
SmallVector<Instruction *, 8> InsertPts;
344365
Loop &L;
345366
LoopInfo &LI;
367+
BlockFrequencyInfo *BFI;
346368
};
347369

348370
} // end anonymous namespace
@@ -365,8 +387,9 @@ INITIALIZE_PASS_END(
365387
"Frontend instrumentation-based coverage lowering.", false, false)
366388

367389
ModulePass *
368-
llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options) {
369-
return new InstrProfilingLegacyPass(Options);
390+
llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options,
391+
bool IsCS) {
392+
return new InstrProfilingLegacyPass(Options, IsCS);
370393
}
371394

372395
static InstrProfIncrementInst *castToIncrementInst(Instruction *Instr) {
@@ -415,6 +438,13 @@ void InstrProfiling::promoteCounterLoadStores(Function *F) {
415438
LoopInfo LI(DT);
416439
DenseMap<Loop *, SmallVector<LoadStorePair, 8>> LoopPromotionCandidates;
417440

441+
std::unique_ptr<BlockFrequencyInfo> BFI;
442+
if (Options.UseBFIInPromotion) {
443+
std::unique_ptr<BranchProbabilityInfo> BPI;
444+
BPI.reset(new BranchProbabilityInfo(*F, LI, TLI));
445+
BFI.reset(new BlockFrequencyInfo(*F, *BPI, LI));
446+
}
447+
418448
for (const auto &LoadStore : PromotionCandidates) {
419449
auto *CounterLoad = LoadStore.first;
420450
auto *CounterStore = LoadStore.second;
@@ -430,7 +460,7 @@ void InstrProfiling::promoteCounterLoadStores(Function *F) {
430460
// Do a post-order traversal of the loops so that counter updates can be
431461
// iteratively hoisted outside the loop nest.
432462
for (auto *Loop : llvm::reverse(Loops)) {
433-
PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI);
463+
PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI, BFI.get());
434464
Promoter.run(&TotalCountersPromoted);
435465
}
436466
}
@@ -681,7 +711,6 @@ static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) {
681711
// Don't do this for Darwin. compiler-rt uses linker magic.
682712
if (TT.isOSDarwin())
683713
return false;
684-
685714
// Use linker script magic to get data/cnts/name start/end.
686715
if (TT.isOSLinux() || TT.isOSFreeBSD() || TT.isOSNetBSD() ||
687716
TT.isOSFuchsia() || TT.isPS4CPU() || TT.isOSWindows())
@@ -985,8 +1014,12 @@ void InstrProfiling::emitUses() {
9851014
}
9861015

9871016
void InstrProfiling::emitInitialization() {
988-
// Create variable for profile name.
989-
createProfileFileNameVar(*M, Options.InstrProfileOutput);
1017+
// Create ProfileFileName variable. Don't don't this for the
1018+
// context-sensitive instrumentation lowering: This lowering is after
1019+
// LTO/ThinLTO linking. Pass PGOInstrumentationGenCreateVar should
1020+
// have already create the variable before LTO/ThinLTO linking.
1021+
if (!IsCS)
1022+
createProfileFileNameVar(*M, Options.InstrProfileOutput);
9901023
Function *RegisterF = M->getFunction(getInstrProfRegFuncsName());
9911024
if (!RegisterF)
9921025
return;

0 commit comments

Comments
 (0)