Skip to content

[BOLT] Drop parsing sample PC when processing LBR perf data #123420

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions bolt/include/bolt/Profile/DataAggregator.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ class DataAggregator : public DataReader {
private:
struct PerfBranchSample {
SmallVector<LBREntry, 32> LBR;
uint64_t PC;
};

struct PerfBasicSample {
Expand Down Expand Up @@ -334,9 +333,6 @@ class DataAggregator : public DataReader {
/// Process all branch events.
void processBranchEvents();

/// This member function supports generating data for AutoFDO LLVM tools.
std::error_code writeAutoFDOData(StringRef OutputFilename);

/// Parse the full output generated by perf script to report non-LBR samples.
std::error_code parseBasicEvents();

Expand Down
109 changes: 9 additions & 100 deletions bolt/lib/Profile/DataAggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,15 +108,6 @@ TimeAggregator("time-aggr",
cl::ZeroOrMore,
cl::cat(AggregatorCategory));

static cl::opt<bool>
UseEventPC("use-event-pc",
cl::desc("use event PC in combination with LBR sampling"),
cl::cat(AggregatorCategory));

static cl::opt<bool> WriteAutoFDOData(
"autofdo", cl::desc("generate autofdo textual data instead of bolt data"),
cl::cat(AggregatorCategory));

} // namespace opts

namespace {
Expand Down Expand Up @@ -187,15 +178,13 @@ void DataAggregator::start() {
/*Wait = */false);
} else if (!opts::ITraceAggregation.empty()) {
std::string ItracePerfScriptArgs = llvm::formatv(
"script -F pid,ip,brstack --itrace={0}", opts::ITraceAggregation);
"script -F pid,brstack --itrace={0}", opts::ITraceAggregation);
launchPerfProcess("branch events with itrace", MainEventsPPI,
ItracePerfScriptArgs.c_str(),
/*Wait = */ false);
} else {
launchPerfProcess("branch events",
MainEventsPPI,
"script -F pid,ip,brstack",
/*Wait = */false);
launchPerfProcess("branch events", MainEventsPPI, "script -F pid,brstack",
/*Wait = */ false);
}

// Note: we launch script for mem events regardless of the option, as the
Expand Down Expand Up @@ -381,67 +370,6 @@ void DataAggregator::parsePreAggregated() {
}
}

std::error_code DataAggregator::writeAutoFDOData(StringRef OutputFilename) {
outs() << "PERF2BOLT: writing data for autofdo tools...\n";
NamedRegionTimer T("writeAutoFDO", "Processing branch events", TimerGroupName,
TimerGroupDesc, opts::TimeAggregator);

std::error_code EC;
raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
if (EC)
return EC;

// Format:
// number of unique traces
// from_1-to_1:count_1
// from_2-to_2:count_2
// ......
// from_n-to_n:count_n
// number of unique sample addresses
// addr_1:count_1
// addr_2:count_2
// ......
// addr_n:count_n
// number of unique LBR entries
// src_1->dst_1:count_1
// src_2->dst_2:count_2
// ......
// src_n->dst_n:count_n

const uint64_t FirstAllocAddress = this->BC->FirstAllocAddress;

// AutoFDO addresses are relative to the first allocated loadable program
// segment
auto filterAddress = [&FirstAllocAddress](uint64_t Address) -> uint64_t {
if (Address < FirstAllocAddress)
return 0;
return Address - FirstAllocAddress;
};

OutFile << FallthroughLBRs.size() << "\n";
for (const auto &[Trace, Info] : FallthroughLBRs) {
OutFile << formatv("{0:x-}-{1:x-}:{2}\n", filterAddress(Trace.From),
filterAddress(Trace.To),
Info.InternCount + Info.ExternCount);
}

OutFile << BasicSamples.size() << "\n";
for (const auto [PC, HitCount] : BasicSamples)
OutFile << formatv("{0:x-}:{1}\n", filterAddress(PC), HitCount);

OutFile << BranchLBRs.size() << "\n";
for (const auto &[Trace, Info] : BranchLBRs) {
OutFile << formatv("{0:x-}->{1:x-}:{2}\n", filterAddress(Trace.From),
filterAddress(Trace.To), Info.TakenCount);
}

outs() << "PERF2BOLT: wrote " << FallthroughLBRs.size() << " unique traces, "
<< BasicSamples.size() << " sample addresses and " << BranchLBRs.size()
<< " unique branches to " << OutputFilename << "\n";

return std::error_code();
}

void DataAggregator::filterBinaryMMapInfo() {
if (opts::FilterPID) {
auto MMapInfoIter = BinaryMMapInfo.find(opts::FilterPID);
Expand Down Expand Up @@ -583,15 +511,6 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
(opts::BasicAggregation && parseBasicEvents()))
errs() << "PERF2BOLT: failed to parse samples\n";

// We can finish early if the goal is just to generate data for autofdo
if (opts::WriteAutoFDOData) {
if (std::error_code EC = writeAutoFDOData(opts::OutputFilename))
errs() << "Error writing autofdo data to file: " << EC.message() << "\n";

deleteTempFiles();
exit(0);
}

// Special handling for memory events
if (prepareToParse("mem events", MemEventsPPI, MemEventsErrorCallback))
return Error::success();
Expand Down Expand Up @@ -1158,14 +1077,6 @@ ErrorOr<DataAggregator::PerfBranchSample> DataAggregator::parseBranchSample() {
return make_error_code(errc::no_such_process);
}

while (checkAndConsumeFS()) {
}

ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true);
if (std::error_code EC = PCRes.getError())
return EC;
Res.PC = PCRes.get();

if (checkAndConsumeNewLine())
return Res;

Expand Down Expand Up @@ -1472,9 +1383,9 @@ std::error_code DataAggregator::printLBRHeatMap() {
uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
bool NeedsSkylakeFix) {
uint64_t NumTraces{0};
// LBRs are stored in reverse execution order. NextPC refers to the next
// recorded executed PC.
uint64_t NextPC = opts::UseEventPC ? Sample.PC : 0;
// LBRs are stored in reverse execution order. NextLBR refers to the next
// executed branch record.
const LBREntry *NextLBR = nullptr;
uint32_t NumEntry = 0;
for (const LBREntry &LBR : Sample.LBR) {
++NumEntry;
Expand All @@ -1486,10 +1397,10 @@ uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
// chronological order)
if (NeedsSkylakeFix && NumEntry <= 2)
continue;
if (NextPC) {
if (NextLBR) {
// Record fall-through trace.
const uint64_t TraceFrom = LBR.To;
const uint64_t TraceTo = NextPC;
const uint64_t TraceTo = NextLBR->From;
const BinaryFunction *TraceBF =
getBinaryFunctionContainingAddress(TraceFrom);
if (TraceBF && TraceBF->containsAddress(TraceTo)) {
Expand Down Expand Up @@ -1524,7 +1435,7 @@ uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
}
++NumTraces;
}
NextPC = LBR.From;
NextLBR = &LBR;

uint64_t From = getBinaryFunctionContainingAddress(LBR.From) ? LBR.From : 0;
uint64_t To = getBinaryFunctionContainingAddress(LBR.To) ? LBR.To : 0;
Expand Down Expand Up @@ -1561,8 +1472,6 @@ std::error_code DataAggregator::parseBranchEvents() {
++NumSamples;

PerfBranchSample &Sample = SampleRes.get();
if (opts::WriteAutoFDOData)
++BasicSamples[Sample.PC];

if (Sample.LBR.empty()) {
++NumSamplesNoLBR;
Expand Down
Loading