Skip to content

Commit 1fbb719

Browse files
committed
[LPM] Port CGProfilePass from NPM to LPM
Reviewers: hans, chandlerc!, asbirlea, nikic Reviewed By: hans, nikic Subscribers: steven_wu, dexonsmith, nikic, echristo, void, zhizhouy, cfe-commits, aeubanks, MaskRay, jvesely, nhaehnle, hiraditya, kerbowa, llvm-commits Tags: #llvm, #clang Differential Revision: https://reviews.llvm.org/D83013
1 parent 1d542f0 commit 1fbb719

File tree

17 files changed

+139
-50
lines changed

17 files changed

+139
-50
lines changed

clang/include/clang/Basic/CodeGenOptions.def

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,6 @@ CODEGENOPT(UnwindTables , 1, 0) ///< Emit unwind tables.
254254
CODEGENOPT(VectorizeLoop , 1, 0) ///< Run loop vectorizer.
255255
CODEGENOPT(VectorizeSLP , 1, 0) ///< Run SLP vectorizer.
256256
CODEGENOPT(ProfileSampleAccurate, 1, 0) ///< Sample profile is accurate.
257-
CODEGENOPT(CallGraphProfile , 1, 0) ///< Run call graph profile.
258257

259258
/// Attempt to use register sized accesses to bit-fields in structures, when
260259
/// possible.

clang/lib/CodeGen/BackendUtil.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -620,6 +620,9 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
620620
PMBuilder.SizeLevel = CodeGenOpts.OptimizeSize;
621621
PMBuilder.SLPVectorize = CodeGenOpts.VectorizeSLP;
622622
PMBuilder.LoopVectorize = CodeGenOpts.VectorizeLoop;
623+
// Only enable CGProfilePass when using integrated assembler, since
624+
// non-integrated assemblers don't recognize .cgprofile section.
625+
PMBuilder.CallGraphProfile = !CodeGenOpts.DisableIntegratedAS;
623626

624627
PMBuilder.DisableUnrollLoops = !CodeGenOpts.UnrollLoops;
625628
// Loop interleaving in the loop vectorizer has historically been set to be
@@ -1144,7 +1147,9 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
11441147
PTO.LoopInterleaving = CodeGenOpts.UnrollLoops;
11451148
PTO.LoopVectorization = CodeGenOpts.VectorizeLoop;
11461149
PTO.SLPVectorization = CodeGenOpts.VectorizeSLP;
1147-
PTO.CallGraphProfile = CodeGenOpts.CallGraphProfile;
1150+
// Only enable CGProfilePass when using integrated assembler, since
1151+
// non-integrated assemblers don't recognize .cgprofile section.
1152+
PTO.CallGraphProfile = !CodeGenOpts.DisableIntegratedAS;
11481153
PTO.Coroutines = LangOpts.Coroutines;
11491154

11501155
PassInstrumentationCallbacks PIC;
@@ -1562,7 +1567,9 @@ static void runThinLTOBackend(
15621567
Conf.PTO.LoopInterleaving = CGOpts.UnrollLoops;
15631568
Conf.PTO.LoopVectorization = CGOpts.VectorizeLoop;
15641569
Conf.PTO.SLPVectorization = CGOpts.VectorizeSLP;
1565-
Conf.PTO.CallGraphProfile = CGOpts.CallGraphProfile;
1570+
// Only enable CGProfilePass when using integrated assembler, since
1571+
// non-integrated assemblers don't recognize .cgprofile section.
1572+
Conf.PTO.CallGraphProfile = !CGOpts.DisableIntegratedAS;
15661573

15671574
// Context sensitive profile.
15681575
if (CGOpts.hasProfileCSIRInstr()) {

clang/lib/Frontend/CompilerInvocation.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -860,7 +860,6 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
860860
Opts.RerollLoops = Args.hasArg(OPT_freroll_loops);
861861

862862
Opts.DisableIntegratedAS = Args.hasArg(OPT_fno_integrated_as);
863-
Opts.CallGraphProfile = !Opts.DisableIntegratedAS;
864863
Opts.Autolink = !Args.hasArg(OPT_fno_autolink);
865864
Opts.SampleProfileFile =
866865
std::string(Args.getLastArgValue(OPT_fprofile_sample_use_EQ));

llvm/include/llvm/InitializePasses.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ void initializeCFGViewerLegacyPassPass(PassRegistry&);
103103
void initializeCFIInstrInserterPass(PassRegistry&);
104104
void initializeCFLAndersAAWrapperPassPass(PassRegistry&);
105105
void initializeCFLSteensAAWrapperPassPass(PassRegistry&);
106+
void initializeCGProfileLegacyPassPass(PassRegistry &);
106107
void initializeCallGraphDOTPrinterPass(PassRegistry&);
107108
void initializeCallGraphPrinterLegacyPassPass(PassRegistry&);
108109
void initializeCallGraphViewerPass(PassRegistry&);

llvm/include/llvm/Transforms/IPO.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,8 @@ ModulePass *createSampleProfileLoaderPass(StringRef Name);
282282
ModulePass *createWriteThinLTOBitcodePass(raw_ostream &Str,
283283
raw_ostream *ThinLinkOS = nullptr);
284284

285+
ModulePass *createCGProfileLegacyPass();
286+
285287
} // End llvm namespace
286288

287289
#endif

llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ class PassManagerBuilder {
156156

157157
bool DisableTailCalls;
158158
bool DisableUnrollLoops;
159+
bool CallGraphProfile;
159160
bool SLPVectorize;
160161
bool LoopVectorize;
161162
bool LoopsInterleaved;

llvm/include/llvm/Transforms/Instrumentation/CGProfile.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,6 @@ namespace llvm {
1919
class CGProfilePass : public PassInfoMixin<CGProfilePass> {
2020
public:
2121
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
22-
23-
private:
24-
void addModuleFlags(
25-
Module &M,
26-
MapVector<std::pair<Function *, Function *>, uint64_t> &Counts) const;
2722
};
2823
} // end namespace llvm
2924

llvm/lib/Passes/PassBuilder.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -248,10 +248,6 @@ static cl::opt<bool>
248248
EnableCHR("enable-chr-npm", cl::init(true), cl::Hidden,
249249
cl::desc("Enable control height reduction optimization (CHR)"));
250250

251-
static cl::opt<bool> EnableCallGraphProfile(
252-
"enable-npm-call-graph-profile", cl::init(true), cl::Hidden,
253-
cl::desc("Enable call graph profile pass for the new PM (default = on)"));
254-
255251
/// Flag to enable inline deferral during PGO.
256252
static cl::opt<bool>
257253
EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
@@ -267,7 +263,7 @@ PipelineTuningOptions::PipelineTuningOptions() {
267263
Coroutines = false;
268264
LicmMssaOptCap = SetLicmMssaOptCap;
269265
LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
270-
CallGraphProfile = EnableCallGraphProfile;
266+
CallGraphProfile = true;
271267
}
272268

273269
extern cl::opt<bool> EnableHotColdSplit;

llvm/lib/Transforms/IPO/PassManagerBuilder.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,7 @@ PassManagerBuilder::PassManagerBuilder() {
195195
PrepareForThinLTO = EnablePrepareForThinLTO;
196196
PerformThinLTO = EnablePerformThinLTO;
197197
DivergentTarget = false;
198+
CallGraphProfile = true;
198199
}
199200

200201
PassManagerBuilder::~PassManagerBuilder() {
@@ -834,6 +835,10 @@ void PassManagerBuilder::populateModulePassManager(
834835
if (MergeFunctions)
835836
MPM.add(createMergeFunctionsPass());
836837

838+
// Add Module flag "CG Profile" based on Branch Frequency Information.
839+
if (CallGraphProfile)
840+
MPM.add(createCGProfileLegacyPass());
841+
837842
// LoopSink pass sinks instructions hoisted by LICM, which serves as a
838843
// canonicalization pass that enables other optimizations. As a result,
839844
// LoopSink pass needs to be a very late IR pass to avoid undoing LICM

llvm/lib/Transforms/Instrumentation/CGProfile.cpp

Lines changed: 82 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -10,22 +10,47 @@
1010

1111
#include "llvm/ADT/MapVector.h"
1212
#include "llvm/Analysis/BlockFrequencyInfo.h"
13+
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
1314
#include "llvm/Analysis/TargetTransformInfo.h"
1415
#include "llvm/IR/Constants.h"
1516
#include "llvm/IR/Instructions.h"
1617
#include "llvm/IR/MDBuilder.h"
1718
#include "llvm/IR/PassManager.h"
19+
#include "llvm/InitializePasses.h"
1820
#include "llvm/ProfileData/InstrProf.h"
21+
#include "llvm/Transforms/IPO.h"
1922
#include "llvm/Transforms/Instrumentation.h"
2023

2124
#include <array>
2225

2326
using namespace llvm;
2427

25-
PreservedAnalyses CGProfilePass::run(Module &M, ModuleAnalysisManager &MAM) {
28+
static bool
29+
addModuleFlags(Module &M,
30+
MapVector<std::pair<Function *, Function *>, uint64_t> &Counts) {
31+
if (Counts.empty())
32+
return false;
33+
34+
LLVMContext &Context = M.getContext();
35+
MDBuilder MDB(Context);
36+
std::vector<Metadata *> Nodes;
37+
38+
for (auto E : Counts) {
39+
Metadata *Vals[] = {ValueAsMetadata::get(E.first.first),
40+
ValueAsMetadata::get(E.first.second),
41+
MDB.createConstant(ConstantInt::get(
42+
Type::getInt64Ty(Context), E.second))};
43+
Nodes.push_back(MDNode::get(Context, Vals));
44+
}
45+
46+
M.addModuleFlag(Module::Append, "CG Profile", MDNode::get(Context, Nodes));
47+
return true;
48+
}
49+
50+
static bool runCGProfilePass(
51+
Module &M, function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
52+
function_ref<TargetTransformInfo &(Function &)> GetTTI, bool LazyBFI) {
2653
MapVector<std::pair<Function *, Function *>, uint64_t> Counts;
27-
FunctionAnalysisManager &FAM =
28-
MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2954
InstrProfSymtab Symtab;
3055
auto UpdateCounts = [&](TargetTransformInfo &TTI, Function *F,
3156
Function *CalledF, uint64_t NewCount) {
@@ -35,14 +60,18 @@ PreservedAnalyses CGProfilePass::run(Module &M, ModuleAnalysisManager &MAM) {
3560
Count = SaturatingAdd(Count, NewCount);
3661
};
3762
// Ignore error here. Indirect calls are ignored if this fails.
38-
(void)(bool)Symtab.create(M);
63+
(void)(bool) Symtab.create(M);
3964
for (auto &F : M) {
40-
if (F.isDeclaration())
65+
// Avoid extra cost of running passes for BFI when the function doesn't have
66+
// entry count. Since LazyBlockFrequencyInfoPass only exists in LPM, check
67+
// if using LazyBlockFrequencyInfoPass.
68+
// TODO: Remove LazyBFI when LazyBlockFrequencyInfoPass is available in NPM.
69+
if (F.isDeclaration() || (LazyBFI && !F.getEntryCount()))
4170
continue;
42-
auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
71+
auto &BFI = GetBFI(F);
4372
if (BFI.getEntryFreq() == 0)
4473
continue;
45-
TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(F);
74+
TargetTransformInfo &TTI = GetTTI(F);
4675
for (auto &BB : F) {
4776
Optional<uint64_t> BBCount = BFI.getBlockProfileCount(&BB);
4877
if (!BBCount)
@@ -69,28 +98,56 @@ PreservedAnalyses CGProfilePass::run(Module &M, ModuleAnalysisManager &MAM) {
6998
}
7099
}
71100

72-
addModuleFlags(M, Counts);
73-
74-
return PreservedAnalyses::all();
101+
return addModuleFlags(M, Counts);
75102
}
76103

77-
void CGProfilePass::addModuleFlags(
78-
Module &M,
79-
MapVector<std::pair<Function *, Function *>, uint64_t> &Counts) const {
80-
if (Counts.empty())
81-
return;
104+
namespace {
105+
struct CGProfileLegacyPass final : public ModulePass {
106+
static char ID;
107+
CGProfileLegacyPass() : ModulePass(ID) {
108+
initializeCGProfileLegacyPassPass(*PassRegistry::getPassRegistry());
109+
}
82110

83-
LLVMContext &Context = M.getContext();
84-
MDBuilder MDB(Context);
85-
std::vector<Metadata *> Nodes;
111+
void getAnalysisUsage(AnalysisUsage &AU) const override {
112+
AU.setPreservesCFG();
113+
AU.addRequired<LazyBlockFrequencyInfoPass>();
114+
AU.addRequired<TargetTransformInfoWrapperPass>();
115+
}
86116

87-
for (auto E : Counts) {
88-
Metadata *Vals[] = {ValueAsMetadata::get(E.first.first),
89-
ValueAsMetadata::get(E.first.second),
90-
MDB.createConstant(ConstantInt::get(
91-
Type::getInt64Ty(Context), E.second))};
92-
Nodes.push_back(MDNode::get(Context, Vals));
117+
bool runOnModule(Module &M) override {
118+
auto GetBFI = [this](Function &F) -> BlockFrequencyInfo & {
119+
return this->getAnalysis<LazyBlockFrequencyInfoPass>(F).getBFI();
120+
};
121+
auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
122+
return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
123+
};
124+
125+
return runCGProfilePass(M, GetBFI, GetTTI, true);
93126
}
127+
};
94128

95-
M.addModuleFlag(Module::Append, "CG Profile", MDNode::get(Context, Nodes));
129+
} // namespace
130+
131+
char CGProfileLegacyPass::ID = 0;
132+
133+
INITIALIZE_PASS(CGProfileLegacyPass, "cg-profile", "Call Graph Profile", false,
134+
false)
135+
136+
ModulePass *llvm::createCGProfileLegacyPass() {
137+
return new CGProfileLegacyPass();
138+
}
139+
140+
PreservedAnalyses CGProfilePass::run(Module &M, ModuleAnalysisManager &MAM) {
141+
FunctionAnalysisManager &FAM =
142+
MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
143+
auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
144+
return FAM.getResult<BlockFrequencyAnalysis>(F);
145+
};
146+
auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
147+
return FAM.getResult<TargetIRAnalysis>(F);
148+
};
149+
150+
runCGProfilePass(M, GetBFI, GetTTI, false);
151+
152+
return PreservedAnalyses::all();
96153
}

llvm/lib/Transforms/Instrumentation/Instrumentation.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) {
112112
initializePGOInstrumentationUseLegacyPassPass(Registry);
113113
initializePGOIndirectCallPromotionLegacyPassPass(Registry);
114114
initializePGOMemOPSizeOptLegacyPassPass(Registry);
115+
initializeCGProfileLegacyPassPass(Registry);
115116
initializeInstrOrderFileLegacyPassPass(Registry);
116117
initializeInstrProfilingLegacyPassPass(Registry);
117118
initializeMemorySanitizerLegacyPassPass(Registry);

llvm/test/CodeGen/AMDGPU/opt-pipeline.ll

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,12 @@
276276
; GCN-O1-NEXT: Warn about non-applied transformations
277277
; GCN-O1-NEXT: Alignment from assumptions
278278
; GCN-O1-NEXT: Strip Unused Function Prototypes
279+
; GCN-O1-NEXT: Call Graph Profile
280+
; GCN-O1-NEXT: FunctionPass Manager
281+
; GCN-O1-NEXT: Dominator Tree Construction
282+
; GCN-O1-NEXT: Natural Loop Information
283+
; GCN-O1-NEXT: Lazy Branch Probability Analysis
284+
; GCN-O1-NEXT: Lazy Block Frequency Analysis
279285
; GCN-O1-NEXT: FunctionPass Manager
280286
; GCN-O1-NEXT: Dominator Tree Construction
281287
; GCN-O1-NEXT: Natural Loop Information
@@ -623,6 +629,12 @@
623629
; GCN-O2-NEXT: Strip Unused Function Prototypes
624630
; GCN-O2-NEXT: Dead Global Elimination
625631
; GCN-O2-NEXT: Merge Duplicate Global Constants
632+
; GCN-O2-NEXT: Call Graph Profile
633+
; GCN-O2-NEXT: FunctionPass Manager
634+
; GCN-O2-NEXT: Dominator Tree Construction
635+
; GCN-O2-NEXT: Natural Loop Information
636+
; GCN-O2-NEXT: Lazy Branch Probability Analysis
637+
; GCN-O2-NEXT: Lazy Block Frequency Analysis
626638
; GCN-O2-NEXT: FunctionPass Manager
627639
; GCN-O2-NEXT: Dominator Tree Construction
628640
; GCN-O2-NEXT: Natural Loop Information
@@ -975,6 +987,12 @@
975987
; GCN-O3-NEXT: Strip Unused Function Prototypes
976988
; GCN-O3-NEXT: Dead Global Elimination
977989
; GCN-O3-NEXT: Merge Duplicate Global Constants
990+
; GCN-O3-NEXT: Call Graph Profile
991+
; GCN-O3-NEXT: FunctionPass Manager
992+
; GCN-O3-NEXT: Dominator Tree Construction
993+
; GCN-O3-NEXT: Natural Loop Information
994+
; GCN-O3-NEXT: Lazy Branch Probability Analysis
995+
; GCN-O3-NEXT: Lazy Block Frequency Analysis
978996
; GCN-O3-NEXT: FunctionPass Manager
979997
; GCN-O3-NEXT: Dominator Tree Construction
980998
; GCN-O3-NEXT: Natural Loop Information

llvm/test/Instrumentation/cgprofile.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
; RUN: opt < %s -passes cg-profile -S | FileCheck %s
2+
; RUN: opt < %s -cg-profile -S | FileCheck %s
23

34
declare void @b()
45

llvm/test/Other/new-pm-cgprofile.ll

Lines changed: 0 additions & 11 deletions
This file was deleted.

llvm/test/Other/opt-O2-pipeline.ll

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,12 @@
280280
; CHECK-NEXT: Strip Unused Function Prototypes
281281
; CHECK-NEXT: Dead Global Elimination
282282
; CHECK-NEXT: Merge Duplicate Global Constants
283+
; CHECK-NEXT: Call Graph Profile
284+
; CHECK-NEXT: FunctionPass Manager
285+
; CHECK-NEXT: Dominator Tree Construction
286+
; CHECK-NEXT: Natural Loop Information
287+
; CHECK-NEXT: Lazy Branch Probability Analysis
288+
; CHECK-NEXT: Lazy Block Frequency Analysis
283289
; CHECK-NEXT: FunctionPass Manager
284290
; CHECK-NEXT: Dominator Tree Construction
285291
; CHECK-NEXT: Natural Loop Information

llvm/test/Other/opt-O3-pipeline.ll

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,12 @@
285285
; CHECK-NEXT: Strip Unused Function Prototypes
286286
; CHECK-NEXT: Dead Global Elimination
287287
; CHECK-NEXT: Merge Duplicate Global Constants
288+
; CHECK-NEXT: Call Graph Profile
289+
; CHECK-NEXT: FunctionPass Manager
290+
; CHECK-NEXT: Dominator Tree Construction
291+
; CHECK-NEXT: Natural Loop Information
292+
; CHECK-NEXT: Lazy Branch Probability Analysis
293+
; CHECK-NEXT: Lazy Block Frequency Analysis
288294
; CHECK-NEXT: FunctionPass Manager
289295
; CHECK-NEXT: Dominator Tree Construction
290296
; CHECK-NEXT: Natural Loop Information

llvm/test/Other/opt-Os-pipeline.ll

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,12 @@
266266
; CHECK-NEXT: Strip Unused Function Prototypes
267267
; CHECK-NEXT: Dead Global Elimination
268268
; CHECK-NEXT: Merge Duplicate Global Constants
269+
; CHECK-NEXT: Call Graph Profile
270+
; CHECK-NEXT: FunctionPass Manager
271+
; CHECK-NEXT: Dominator Tree Construction
272+
; CHECK-NEXT: Natural Loop Information
273+
; CHECK-NEXT: Lazy Branch Probability Analysis
274+
; CHECK-NEXT: Lazy Block Frequency Analysis
269275
; CHECK-NEXT: FunctionPass Manager
270276
; CHECK-NEXT: Dominator Tree Construction
271277
; CHECK-NEXT: Natural Loop Information

0 commit comments

Comments
 (0)