Skip to content

Commit d080c74

Browse files
committed
Introduce UnpredictableProfileLoader for PMU branch-miss profiles
This pass reads IP-based profiles of branch-miss PMU events and uses them to add !unpredictable metadata. This can be thought of as automatically adding __builtin_unpredictable() hints on branch conditions based on PMU feedback. On Linux, such a profile may be created with something like: perf record -b -e branch-misses:uppp ... llvm-profgen --leading-ip-only --perfdata perf.data ... This branch mispredict profile should be accompanied by an SPGO execution frequency profile.
1 parent e6ec7c8 commit d080c74

17 files changed

+641
-0
lines changed
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
//===-- UnpredictableProfileLoader.h - Unpredictable Profile Loader -------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_TRANSFORMS_IPO_UNPREDICTABLEPROFILELOADER_H
10+
#define LLVM_TRANSFORMS_IPO_UNPREDICTABLEPROFILELOADER_H
11+
12+
#include "llvm/IR/PassManager.h"
13+
#include "llvm/ProfileData/SampleProfReader.h"
14+
15+
namespace llvm {
16+
17+
class Module;
18+
19+
struct UnpredictableProfileLoaderPass
20+
: PassInfoMixin<UnpredictableProfileLoaderPass> {
21+
UnpredictableProfileLoaderPass(StringRef FrequencyProfileFile);
22+
UnpredictableProfileLoaderPass();
23+
PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
24+
std::unique_ptr<SampleProfileReader> FreqReader, MispReader;
25+
bool loadSampleProfile(Module &M);
26+
bool addUpredictableMetadata(Module &F);
27+
bool addUpredictableMetadata(Function &F);
28+
ErrorOr<double> getMispredictRatio(const FunctionSamples *FreqSamples,
29+
const FunctionSamples *MispSamples,
30+
const Instruction *I);
31+
const std::string FrequencyProfileFile;
32+
};
33+
34+
} // end namespace llvm
35+
36+
#endif // LLVM_TRANSFORMS_IPO_UNPREDICTABLEPROFILELOADER_H

llvm/lib/Passes/PassBuilder.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,7 @@
176176
#include "llvm/Transforms/IPO/StripDeadPrototypes.h"
177177
#include "llvm/Transforms/IPO/StripSymbols.h"
178178
#include "llvm/Transforms/IPO/SyntheticCountsPropagation.h"
179+
#include "llvm/Transforms/IPO/UnpredictableProfileLoader.h"
179180
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
180181
#include "llvm/Transforms/InstCombine/InstCombine.h"
181182
#include "llvm/Transforms/Instrumentation.h"

llvm/lib/Passes/PassBuilderPipelines.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
#include "llvm/Transforms/IPO/SampleProfile.h"
6868
#include "llvm/Transforms/IPO/SampleProfileProbe.h"
6969
#include "llvm/Transforms/IPO/SyntheticCountsPropagation.h"
70+
#include "llvm/Transforms/IPO/UnpredictableProfileLoader.h"
7071
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
7172
#include "llvm/Transforms/InstCombine/InstCombine.h"
7273
#include "llvm/Transforms/Instrumentation/CGProfile.h"
@@ -1092,6 +1093,9 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
10921093
// Cache ProfileSummaryAnalysis once to avoid the potential need to insert
10931094
// RequireAnalysisPass for PSI before subsequent non-module passes.
10941095
MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
1096+
// Run after inlining decisions made by SampleProfileLoader. This can apply
1097+
// mispredict metadata to specific inlined callees.
1098+
MPM.addPass(UnpredictableProfileLoaderPass(PGOOpt->ProfileFile));
10951099
// Do not invoke ICP in the LTOPrelink phase as it makes it hard
10961100
// for the profile annotation to be accurate in the LTO backend.
10971101
if (!isLTOPreLink(Phase))

llvm/lib/Passes/PassRegistry.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ MODULE_PASS("strip-nonlinetable-debuginfo", StripNonLineTableDebugInfoPass())
140140
MODULE_PASS("synthetic-counts-propagation", SyntheticCountsPropagation())
141141
MODULE_PASS("trigger-crash-module", TriggerCrashModulePass())
142142
MODULE_PASS("trigger-verifier-error", TriggerVerifierErrorPass())
143+
MODULE_PASS("unpredictable-profile-loader", UnpredictableProfileLoaderPass())
143144
MODULE_PASS("tsan-module", ModuleThreadSanitizerPass())
144145
MODULE_PASS("verify", VerifierPass())
145146
MODULE_PASS("view-callgraph", CallGraphViewerPass())

llvm/lib/Transforms/IPO/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ add_llvm_component_library(LLVMipo
4343
StripSymbols.cpp
4444
SyntheticCountsPropagation.cpp
4545
ThinLTOBitcodeWriter.cpp
46+
UnpredictableProfileLoader.cpp
4647
WholeProgramDevirt.cpp
4748

4849
ADDITIONAL_HEADER_DIRS
Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
//=== UnpredictableProfileLoader.cpp - Unpredictable Profile Loader -------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This pass reads a sample profile containing mispredict counts and a sample
10+
// profile containing execution counts and computes branch mispredict ratios for
11+
// each conditional instruction. If a sufficiently high mispredict ratio is
12+
// found !unpredictable metadata is added.
13+
//
14+
// Note that this requires that the mispredict and frequency profiles have
15+
// comparable magnitudes.
16+
//
17+
//===----------------------------------------------------------------------===//
18+
19+
#include "llvm/Transforms/IPO/UnpredictableProfileLoader.h"
20+
#include "llvm/IR/DebugInfoMetadata.h"
21+
#include "llvm/IR/Function.h"
22+
#include "llvm/IR/Instructions.h"
23+
#include "llvm/IR/Module.h"
24+
#include "llvm/ProfileData/SampleProf.h"
25+
#include "llvm/ProfileData/SampleProfReader.h"
26+
#include "llvm/Support/CommandLine.h"
27+
#include "llvm/Support/VirtualFileSystem.h"
28+
#include "llvm/Transforms/IPO.h"
29+
30+
using namespace llvm;
31+
32+
#define DEBUG_TYPE "unpredictable-profile-loader"
33+
34+
static cl::opt<std::string> UnpredictableHintsFile(
35+
"unpredictable-hints-file",
36+
cl::desc("Path to the unpredictability hints profile"), cl::Hidden);
37+
38+
// Typically this file will be provided via PGOOpt. This option is provided
39+
// primarily for debugging and testing.
40+
static cl::opt<std::string>
41+
FrequencyProfileOption("unpredictable-hints-frequency-profile",
42+
cl::desc("Path to an execution frequency profile to "
43+
"use as a baseline for unpredictability"),
44+
cl::Hidden);
45+
46+
// This determines the minimum apparent mispredict ratio which should earn a
47+
// mispredict metadata annotation.
48+
static cl::opt<double> MinimumRatio(
49+
"unpredictable-hints-min-ratio",
50+
cl::desc(
51+
"Absolute minimum branch miss ratio to apply MD_unpredictable from"),
52+
cl::init(0.2), cl::Hidden);
53+
54+
// This option is useful for dealing with two different sampling frequencies.
55+
static cl::opt<double>
56+
RatioFactor("unpredictable-hints-factor",
57+
cl::desc("Multiply all ratios by this factor"), cl::init(1.0),
58+
cl::ReallyHidden);
59+
60+
// Lookup samples for an Instruction's corresponding location in a
61+
// FunctionSamples profile. The count returned is directly from the profile
62+
// representing the number of samples seen.
63+
ErrorOr<double> UnpredictableProfileLoaderPass::getMispredictRatio(
64+
const FunctionSamples *FuncFreqSamples,
65+
const FunctionSamples *FuncMispSamples, const Instruction *I) {
66+
67+
const auto &Loc = I->getDebugLoc();
68+
if (!Loc)
69+
return std::error_code();
70+
71+
const FunctionSamples *FreqSamples =
72+
FuncFreqSamples->findFunctionSamples(Loc, FreqReader->getRemapper());
73+
if (!FreqSamples)
74+
return std::error_code();
75+
const ErrorOr<uint64_t> FreqCount = FreqSamples->findSamplesAt(
76+
FunctionSamples::getOffset(Loc), Loc->getBaseDiscriminator());
77+
if (!FreqCount)
78+
return std::error_code();
79+
80+
const FunctionSamples *MispSamples =
81+
FuncMispSamples->findFunctionSamples(Loc, MispReader->getRemapper());
82+
if (!MispSamples)
83+
return std::error_code();
84+
const ErrorOr<uint64_t> MispCount = MispSamples->findSamplesAt(
85+
FunctionSamples::getOffset(Loc), Loc->getBaseDiscriminator());
86+
if (!MispCount)
87+
return std::error_code();
88+
89+
const double Freq = FreqCount.get();
90+
if (!Freq)
91+
return std::error_code();
92+
93+
const double Misp = MispCount.get();
94+
const double MissRatio = (Misp * RatioFactor) / Freq;
95+
96+
LLVM_DEBUG(dbgs() << "Computing mispredict ratio of " << format("%0.2f", Misp)
97+
<< "/" << format("%0.2f", Freq) << " * "
98+
<< format("%0.2f", RatioFactor.getValue()) << " = "
99+
<< format("%0.2f", MissRatio) << " for instruction\n"
100+
<< *I << "\n");
101+
return MissRatio;
102+
}
103+
104+
// Examine all Select and BranchInsts in a function, adding !unpredictable
105+
// metadata if they appear in the mispredict profile with sufficient weight.
106+
bool UnpredictableProfileLoaderPass::addUpredictableMetadata(Function &F) {
107+
108+
const FunctionSamples *FreqSamples = FreqReader->getSamplesFor(F);
109+
if (!FreqSamples)
110+
return false;
111+
112+
const FunctionSamples *MispSamples = MispReader->getSamplesFor(F);
113+
if (!MispSamples)
114+
return false;
115+
116+
bool MadeChange = false;
117+
for (BasicBlock &BB : F) {
118+
for (Instruction &I : BB) {
119+
if (!isa<BranchInst>(&I) && !isa<SelectInst>(&I) && !isa<SwitchInst>(&I))
120+
continue;
121+
if (I.hasMetadata(LLVMContext::MD_unpredictable))
122+
continue;
123+
124+
const ErrorOr<double> RatioOrError =
125+
getMispredictRatio(FreqSamples, MispSamples, &I);
126+
if (!RatioOrError)
127+
continue;
128+
const double MissRatio = RatioOrError.get();
129+
130+
if (MissRatio < MinimumRatio) {
131+
LLVM_DEBUG(dbgs() << "\tRatio " << format("%0.2f", MissRatio)
132+
<< " is below threshold of "
133+
<< format("%0.2f", MinimumRatio.getValue())
134+
<< "; ignoring.\n");
135+
continue;
136+
}
137+
138+
// In the future we probably want to attach more information here, such as
139+
// the mispredict count or ratio.
140+
MDNode *MD = MDNode::get(I.getContext(), std::nullopt);
141+
I.setMetadata(LLVMContext::MD_unpredictable, MD);
142+
MadeChange = true;
143+
}
144+
}
145+
146+
return MadeChange;
147+
}
148+
149+
bool UnpredictableProfileLoaderPass::addUpredictableMetadata(Module &M) {
150+
bool MadeChange = false;
151+
152+
for (Function &F : M)
153+
MadeChange |= addUpredictableMetadata(F);
154+
155+
// Return an indication of whether we changed anything or not.
156+
return MadeChange;
157+
}
158+
159+
bool UnpredictableProfileLoaderPass::loadSampleProfile(Module &M) {
160+
if (MispReader && FreqReader)
161+
return true;
162+
163+
assert(!MispReader && !FreqReader &&
164+
"Expected both or neither profile readers");
165+
166+
LLVMContext &Ctx = M.getContext();
167+
auto FS = vfs::getRealFileSystem();
168+
169+
auto ReadProfile = [&Ctx,
170+
&FS](const std::string ProfileFile,
171+
std::unique_ptr<SampleProfileReader> &ReaderPtr) {
172+
if (ProfileFile.empty())
173+
return false;
174+
175+
ErrorOr<std::unique_ptr<SampleProfileReader>> ReaderOrErr =
176+
SampleProfileReader::create(ProfileFile, Ctx, *FS);
177+
if (std::error_code EC = ReaderOrErr.getError()) {
178+
std::string Msg = "Could not open profile: " + EC.message();
179+
Ctx.diagnose(DiagnosticInfoSampleProfile(ProfileFile, Msg,
180+
DiagnosticSeverity::DS_Warning));
181+
return false;
182+
}
183+
184+
ReaderPtr = std::move(ReaderOrErr.get());
185+
ReaderPtr->read();
186+
187+
return true;
188+
};
189+
190+
if (!ReadProfile(UnpredictableHintsFile, MispReader))
191+
return false;
192+
193+
if (!ReadProfile(FrequencyProfileFile, FreqReader))
194+
return false;
195+
196+
return true;
197+
}
198+
199+
UnpredictableProfileLoaderPass::UnpredictableProfileLoaderPass()
200+
: FrequencyProfileFile(FrequencyProfileOption) {}
201+
202+
UnpredictableProfileLoaderPass::UnpredictableProfileLoaderPass(
203+
StringRef PGOProfileFile)
204+
: FrequencyProfileFile(FrequencyProfileOption.empty()
205+
? PGOProfileFile
206+
: FrequencyProfileOption) {}
207+
208+
PreservedAnalyses UnpredictableProfileLoaderPass::run(Module &M,
209+
ModuleAnalysisManager &) {
210+
if (!loadSampleProfile(M))
211+
return PreservedAnalyses::all();
212+
213+
if (addUpredictableMetadata(M)) {
214+
PreservedAnalyses PA;
215+
PA.preserveSet<CFGAnalyses>();
216+
return PA;
217+
}
218+
219+
return PreservedAnalyses::all();
220+
}

llvm/test/Other/new-pm-pgo.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
; SAMPLE_USE_PRE_LINK: Running pass: SROAPass
2626
; SAMPLE_USE_PRE_LINK: Running pass: EarlyCSEPass
2727
; SAMPLE_USE: Running pass: SampleProfileLoaderPass
28+
; SAMPLE_USE: Running pass: UnpredictableProfileLoaderPass
2829
; SAMPLE_USE_O: Running pass: PGOIndirectCallPromotion
2930
; SAMPLE_USE_POST_LINK-NOT: Running pass: GlobalOptPass
3031
; SAMPLE_USE_POST_LINK: Running pass: PGOIndirectCallPromotion

llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
3636
; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis
3737
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
38+
; CHECK-O-NEXT: Running pass: UnpredictableProfileLoaderPass
3839
; CHECK-O-NEXT: Running pass: PGOIndirectCallPromotion
3940
; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
4041
; CHECK-O-NEXT: Running pass: OpenMPOptPass

llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis
4848
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
4949
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
50+
; CHECK-O-NEXT: Running pass: UnpredictableProfileLoaderPass
5051
; CHECK-O-NEXT: Running pass: OpenMPOptPass
5152
; CHECK-O-NEXT: Running pass: IPSCCPPass
5253
; CHECK-O-NEXT: Running pass: CalledValuePropagationPass
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# This is a standard SPGO profile indicating basic block execution frequency.
2+
sel_arr:1:0
3+
11: 4000
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
caller:1:0
2+
1: callee:1
3+
3: 997
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
caller:1:0
2+
1: callee:1
3+
3: 400
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# This profile indicates 1000 mispredict samples for instructions 11 source
2+
# lines into in the sel_arr function.
3+
sel_arr:1:0
4+
11: 1000

0 commit comments

Comments
 (0)