Skip to content

Introduce UnpredictableProfileLoader for PMU branch-miss profiles #99027

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions llvm/include/llvm/Transforms/IPO/UnpredictableProfileLoader.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
//===-- UnpredictableProfileLoader.h - Unpredictable Profile Loader -------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_TRANSFORMS_IPO_UNPREDICTABLEPROFILELOADER_H
#define LLVM_TRANSFORMS_IPO_UNPREDICTABLEPROFILELOADER_H

#include "llvm/IR/PassManager.h"
#include "llvm/ProfileData/SampleProfReader.h"

namespace llvm {

class Module;

struct UnpredictableProfileLoaderPass
: PassInfoMixin<UnpredictableProfileLoaderPass> {
UnpredictableProfileLoaderPass(StringRef FrequencyProfileFile);
UnpredictableProfileLoaderPass();
PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
std::unique_ptr<SampleProfileReader> FreqReader, MispReader;
bool loadSampleProfile(Module &M);
bool addUpredictableMetadata(Module &F);
bool addUpredictableMetadata(Function &F);
ErrorOr<double> getMispredictRatio(const FunctionSamples *FreqSamples,
const FunctionSamples *MispSamples,
const Instruction *I);
const std::string FrequencyProfileFile;
};

} // end namespace llvm

#endif // LLVM_TRANSFORMS_IPO_UNPREDICTABLEPROFILELOADER_H
1 change: 1 addition & 0 deletions llvm/lib/Passes/PassBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@
#include "llvm/Transforms/IPO/StripDeadPrototypes.h"
#include "llvm/Transforms/IPO/StripSymbols.h"
#include "llvm/Transforms/IPO/SyntheticCountsPropagation.h"
#include "llvm/Transforms/IPO/UnpredictableProfileLoader.h"
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
#include "llvm/Transforms/InstCombine/InstCombine.h"
#include "llvm/Transforms/Instrumentation.h"
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Passes/PassBuilderPipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
#include "llvm/Transforms/IPO/SampleProfile.h"
#include "llvm/Transforms/IPO/SampleProfileProbe.h"
#include "llvm/Transforms/IPO/SyntheticCountsPropagation.h"
#include "llvm/Transforms/IPO/UnpredictableProfileLoader.h"
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
#include "llvm/Transforms/InstCombine/InstCombine.h"
#include "llvm/Transforms/Instrumentation/CGProfile.h"
Expand Down Expand Up @@ -1092,6 +1093,9 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
// Cache ProfileSummaryAnalysis once to avoid the potential need to insert
// RequireAnalysisPass for PSI before subsequent non-module passes.
MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
// Run after inlining decisions made by SampleProfileLoader. This can apply
// mispredict metadata to specific inlined callees.
MPM.addPass(UnpredictableProfileLoaderPass(PGOOpt->ProfileFile));
// Do not invoke ICP in the LTOPrelink phase as it makes it hard
// for the profile annotation to be accurate in the LTO backend.
if (!isLTOPreLink(Phase))
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Passes/PassRegistry.def
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ MODULE_PASS("strip-nonlinetable-debuginfo", StripNonLineTableDebugInfoPass())
MODULE_PASS("synthetic-counts-propagation", SyntheticCountsPropagation())
MODULE_PASS("trigger-crash-module", TriggerCrashModulePass())
MODULE_PASS("trigger-verifier-error", TriggerVerifierErrorPass())
MODULE_PASS("unpredictable-profile-loader", UnpredictableProfileLoaderPass())
MODULE_PASS("tsan-module", ModuleThreadSanitizerPass())
MODULE_PASS("verify", VerifierPass())
MODULE_PASS("view-callgraph", CallGraphViewerPass())
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Transforms/IPO/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ add_llvm_component_library(LLVMipo
StripSymbols.cpp
SyntheticCountsPropagation.cpp
ThinLTOBitcodeWriter.cpp
UnpredictableProfileLoader.cpp
WholeProgramDevirt.cpp

ADDITIONAL_HEADER_DIRS
Expand Down
226 changes: 226 additions & 0 deletions llvm/lib/Transforms/IPO/UnpredictableProfileLoader.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
//=== UnpredictableProfileLoader.cpp - Unpredictable Profile Loader -------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass reads a sample profile containing mispredict counts and a sample
// profile containing execution counts and computes branch mispredict ratios for
// each conditional instruction. If a sufficiently high mispredict ratio is
// found !unpredictable metadata is added.
//
// Note that this requires that the mispredict and frequency profiles have
// comparable magnitudes.
//
//===----------------------------------------------------------------------===//

#include "llvm/Transforms/IPO/UnpredictableProfileLoader.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/ProfileData/SampleProf.h"
#include "llvm/ProfileData/SampleProfReader.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Transforms/IPO.h"

using namespace llvm;

#define DEBUG_TYPE "unpredictable-profile-loader"

static cl::opt<std::string> UnpredictableHintsFile(
"unpredictable-hints-file",
cl::desc("Path to the unpredictability hints profile"), cl::Hidden);

// Typically this file will be provided via PGOOpt. This option is provided
// primarily for debugging and testing.
static cl::opt<std::string>
FrequencyProfileOption("unpredictable-hints-frequency-profile",
cl::desc("Path to an execution frequency profile to "
"use as a baseline for unpredictability"),
cl::Hidden);

// This determines the minimum apparent mispredict ratio which should earn a
// mispredict metadata annotation.
static cl::opt<double> MinimumRatio(
"unpredictable-hints-min-ratio",
cl::desc(
"Absolute minimum branch miss ratio to apply MD_unpredictable from"),
cl::init(0.2), cl::Hidden);

// This option is useful for dealing with two different sampling frequencies.
static cl::opt<double>
RatioFactor("unpredictable-hints-factor",
cl::desc("Multiply all ratios by this factor"), cl::init(1.0),
cl::ReallyHidden);

// Lookup execution frequency and mispredict samples for an Instruction's
// corresponding location in a the two FunctionSamples profiles and compute an
// effective branch mispredict ratio. The counts used to compute the ratio are
// uint64s read directly from the profile files.
ErrorOr<double> UnpredictableProfileLoaderPass::getMispredictRatio(
const FunctionSamples *FuncFreqSamples,
const FunctionSamples *FuncMispSamples, const Instruction *I) {

const auto &Loc = I->getDebugLoc();
if (!Loc)
return std::error_code();

const FunctionSamples *FreqSamples =
FuncFreqSamples->findFunctionSamples(Loc, FreqReader->getRemapper());
if (!FreqSamples)
return std::error_code();
const ErrorOr<uint64_t> FreqCount = FreqSamples->findSamplesAt(
FunctionSamples::getOffset(Loc), Loc->getBaseDiscriminator());
if (!FreqCount)
return std::error_code();

const FunctionSamples *MispSamples =
FuncMispSamples->findFunctionSamples(Loc, MispReader->getRemapper());
if (!MispSamples)
return std::error_code();
const ErrorOr<uint64_t> MispCount = MispSamples->findSamplesAt(
FunctionSamples::getOffset(Loc), Loc->getBaseDiscriminator());
if (!MispCount)
return std::error_code();

const double Freq = FreqCount.get();
if (!Freq)
return std::error_code();

const double Misp = MispCount.get();
const double MissRatio = (Misp * RatioFactor) / Freq;

LLVM_DEBUG(dbgs() << "Computing mispredict ratio of " << format("%0.2f", Misp)
<< "/" << format("%0.2f", Freq) << " * "
<< format("%0.2f", RatioFactor.getValue()) << " = "
<< format("%0.2f", MissRatio) << " for instruction\n"
<< *I << "\n");
return MissRatio;
}

// Examine all Branch, Select, and SwitchInsts in a function, adding
// !unpredictable metadata if they appear in the mispredict profile with
// sufficient weight.
bool UnpredictableProfileLoaderPass::addUpredictableMetadata(Function &F) {

const FunctionSamples *FreqSamples = FreqReader->getSamplesFor(F);
if (!FreqSamples)
return false;

const FunctionSamples *MispSamples = MispReader->getSamplesFor(F);
if (!MispSamples)
return false;

bool MadeChange = false;
for (BasicBlock &BB : F) {
for (Instruction &I : BB) {
if (!isa<BranchInst>(&I) && !isa<SelectInst>(&I) && !isa<SwitchInst>(&I))
continue;
if (I.hasMetadata(LLVMContext::MD_unpredictable))
continue;

const ErrorOr<double> RatioOrError =
getMispredictRatio(FreqSamples, MispSamples, &I);
if (!RatioOrError)
continue;
const double MissRatio = RatioOrError.get();

if (MissRatio < MinimumRatio) {
LLVM_DEBUG(dbgs() << "\tRatio " << format("%0.2f", MissRatio)
<< " is below threshold of "
<< format("%0.2f", MinimumRatio.getValue())
<< "; ignoring.\n");
continue;
}

// In the future we probably want to attach more information here, such as
// the mispredict count or ratio.
MDNode *MD = MDNode::get(I.getContext(), std::nullopt);
I.setMetadata(LLVMContext::MD_unpredictable, MD);
MadeChange = true;
}
}

return MadeChange;
}

bool UnpredictableProfileLoaderPass::addUpredictableMetadata(Module &M) {
bool MadeChange = false;

for (Function &F : M)
MadeChange |= addUpredictableMetadata(F);

// Return an indication of whether we changed anything or not.
return MadeChange;
}

bool UnpredictableProfileLoaderPass::loadSampleProfile(Module &M) {
if (MispReader && FreqReader)
return true;

assert(!MispReader && !FreqReader &&
"Expected both or neither profile readers");

LLVMContext &Ctx = M.getContext();
auto FS = vfs::getRealFileSystem();

auto ReadProfile = [&Ctx,
&FS](const std::string ProfileFile,
std::unique_ptr<SampleProfileReader> &ReaderPtr) {
if (ProfileFile.empty())
return false;

ErrorOr<std::unique_ptr<SampleProfileReader>> ReaderOrErr =
SampleProfileReader::create(ProfileFile, Ctx, *FS);
if (std::error_code EC = ReaderOrErr.getError()) {
std::string Msg = "Could not open profile: " + EC.message();
Ctx.diagnose(DiagnosticInfoSampleProfile(ProfileFile, Msg,
DiagnosticSeverity::DS_Warning));
return false;
}

ReaderPtr = std::move(ReaderOrErr.get());
if (std::error_code EC = ReaderPtr->read()) {
std::string Msg = "Profile reading failed: " + EC.message();
Ctx.diagnose(DiagnosticInfoSampleProfile(ProfileFile, Msg));
return false;
}

return true;
};

if (!ReadProfile(UnpredictableHintsFile, MispReader))
return false;

if (!ReadProfile(FrequencyProfileFile, FreqReader))
return false;

return true;
}

UnpredictableProfileLoaderPass::UnpredictableProfileLoaderPass()
: FrequencyProfileFile(FrequencyProfileOption) {}

UnpredictableProfileLoaderPass::UnpredictableProfileLoaderPass(
StringRef PGOProfileFile)
: FrequencyProfileFile(FrequencyProfileOption.empty()
? PGOProfileFile
: FrequencyProfileOption) {}

PreservedAnalyses UnpredictableProfileLoaderPass::run(Module &M,
ModuleAnalysisManager &) {
if (!loadSampleProfile(M))
return PreservedAnalyses::all();

if (addUpredictableMetadata(M)) {
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
return PA;
}

return PreservedAnalyses::all();
}
1 change: 1 addition & 0 deletions llvm/test/Other/new-pm-pgo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
; SAMPLE_USE_PRE_LINK: Running pass: SROAPass
; SAMPLE_USE_PRE_LINK: Running pass: EarlyCSEPass
; SAMPLE_USE: Running pass: SampleProfileLoaderPass
; SAMPLE_USE: Running pass: UnpredictableProfileLoaderPass
; SAMPLE_USE_O: Running pass: PGOIndirectCallPromotion
; SAMPLE_USE_POST_LINK-NOT: Running pass: GlobalOptPass
; SAMPLE_USE_POST_LINK: Running pass: PGOIndirectCallPromotion
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
; CHECK-O-NEXT: Running pass: UnpredictableProfileLoaderPass
; CHECK-O-NEXT: Running pass: PGOIndirectCallPromotion
; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
; CHECK-O-NEXT: Running pass: OpenMPOptPass
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
; CHECK-O-NEXT: Running pass: UnpredictableProfileLoaderPass
; CHECK-O-NEXT: Running pass: OpenMPOptPass
; CHECK-O-NEXT: Running pass: IPSCCPPass
; CHECK-O-NEXT: Running pass: CalledValuePropagationPass
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# This is a standard SPGO profile indicating basic block execution frequency.
sel_arr:1:0
11: 4000
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
caller:1:0
1: callee:1
3: 997
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
caller:1:0
1: callee:1
3: 400
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# This profile indicates 1000 mispredict samples for instructions 11 source
# lines into in the sel_arr function.
sel_arr:1:0
11: 1000
Loading