-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[BOLT][AArch64] Introduce SPE mode in BasicAggregation #120741
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -49,6 +49,13 @@ static cl::opt<bool> | |
cl::desc("aggregate basic samples (without LBR info)"), | ||
cl::cat(AggregatorCategory)); | ||
|
||
cl::opt<bool> ArmSPE( | ||
"spe", | ||
cl::desc( | ||
"Enable Arm SPE mode. Used in conjuction with no-lbr mode, ie `--spe " | ||
"--nl`"), | ||
cl::cat(AggregatorCategory)); | ||
|
||
static cl::opt<std::string> | ||
ITraceAggregation("itrace", | ||
cl::desc("Generate LBR info with perf itrace argument"), | ||
|
@@ -171,11 +178,19 @@ void DataAggregator::start() { | |
|
||
findPerfExecutable(); | ||
|
||
if (opts::BasicAggregation) { | ||
launchPerfProcess("events without LBR", | ||
MainEventsPPI, | ||
if (opts::ArmSPE) { | ||
if (!opts::BasicAggregation) { | ||
errs() << "PERF2BOLT-ERROR: Arm SPE mode is combined only with " | ||
"BasicAggregation.\n"; | ||
exit(1); | ||
} | ||
launchPerfProcess("branch events with SPE", MainEventsPPI, | ||
"script -F pid,event,ip,addr --itrace=i1i", | ||
/*Wait = */ false); | ||
} else if (opts::BasicAggregation) { | ||
launchPerfProcess("events without LBR", MainEventsPPI, | ||
"script -F pid,event,ip", | ||
/*Wait = */false); | ||
/*Wait = */ false); | ||
} else if (!opts::ITraceAggregation.empty()) { | ||
std::string ItracePerfScriptArgs = llvm::formatv( | ||
"script -F pid,brstack --itrace={0}", opts::ITraceAggregation); | ||
|
@@ -459,14 +474,20 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) { | |
"not read one from input binary\n"; | ||
} | ||
|
||
auto ErrorCallback = [](int ReturnCode, StringRef ErrBuf) { | ||
const Regex NoData("Samples for '.*' event do not have ADDR attribute set. " | ||
"Cannot print 'addr' field."); | ||
|
||
auto ErrorCallback = [&NoData](int ReturnCode, StringRef ErrBuf) { | ||
if (opts::ArmSPE && NoData.match(ErrBuf)) { | ||
errs() << "PERF2BOLT-ERROR: perf data are incompatible for Arm SPE mode " | ||
"consumption. ADDR attribute is unset.\n"; | ||
exit(1); | ||
} | ||
errs() << "PERF-ERROR: return code " << ReturnCode << "\n" << ErrBuf; | ||
exit(1); | ||
}; | ||
|
||
auto MemEventsErrorCallback = [&](int ReturnCode, StringRef ErrBuf) { | ||
Regex NoData("Samples for '.*' event do not have ADDR attribute set. " | ||
"Cannot print 'addr' field."); | ||
if (!NoData.match(ErrBuf)) | ||
ErrorCallback(ReturnCode, ErrBuf); | ||
}; | ||
|
@@ -507,7 +528,8 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) { | |
exit(0); | ||
} | ||
|
||
if ((!opts::BasicAggregation && parseBranchEvents()) || | ||
if (((!opts::BasicAggregation && !opts::ArmSPE) && parseBranchEvents()) || | ||
(opts::BasicAggregation && opts::ArmSPE && parseSpeAsBasicEvents()) || | ||
(opts::BasicAggregation && parseBasicEvents())) | ||
errs() << "PERF2BOLT: failed to parse samples\n"; | ||
|
||
|
@@ -1138,6 +1160,68 @@ ErrorOr<DataAggregator::PerfBasicSample> DataAggregator::parseBasicSample() { | |
return PerfBasicSample{Event.get(), Address}; | ||
} | ||
|
||
ErrorOr< | ||
std::pair<DataAggregator::PerfBasicSample, DataAggregator::PerfBasicSample>> | ||
DataAggregator::parseSpeAsBasicSamples() { | ||
while (checkAndConsumeFS()) { | ||
} | ||
|
||
ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true); | ||
if (std::error_code EC = PIDRes.getError()) | ||
return EC; | ||
|
||
constexpr PerfBasicSample EmptySample = PerfBasicSample{StringRef(), 0}; | ||
auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes); | ||
if (MMapInfoIter == BinaryMMapInfo.end()) { | ||
consumeRestOfLine(); | ||
return std::make_pair(EmptySample, EmptySample); | ||
} | ||
|
||
while (checkAndConsumeFS()) { | ||
paschalis-mpeis marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
ErrorOr<StringRef> Event = parseString(FieldSeparator); | ||
if (std::error_code EC = Event.getError()) | ||
return EC; | ||
|
||
while (checkAndConsumeFS()) { | ||
} | ||
|
||
ErrorOr<uint64_t> AddrResTo = parseHexField(FieldSeparator); | ||
if (std::error_code EC = AddrResTo.getError()) | ||
return EC; | ||
|
||
consumeAllRemainingFS(); | ||
paschalis-mpeis marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
ErrorOr<uint64_t> AddrResFrom = parseHexField(FieldSeparator, true); | ||
if (std::error_code EC = AddrResFrom.getError()) | ||
return EC; | ||
|
||
if (!checkAndConsumeNewLine()) { | ||
reportError("expected end of line"); | ||
return make_error_code(llvm::errc::io_error); | ||
} | ||
|
||
auto genBasicSample = [&](uint64_t Address) { | ||
// When fed with non SPE branch events the target address will be null. | ||
// This is expected and ignored. | ||
if (Address == 0x0) | ||
return EmptySample; | ||
|
||
if (!BC->HasFixedLoadAddress) | ||
adjustAddress(Address, MMapInfoIter->second); | ||
|
||
return PerfBasicSample{Event.get(), Address}; | ||
}; | ||
|
||
// Show more meaningful event names on boltdata. | ||
if (Event->str() == "instructions:") | ||
Event = *AddrResTo != 0x0 ? "branches-spe:" : "instructions-spe:"; | ||
|
||
return std::make_pair(genBasicSample(*AddrResFrom), | ||
genBasicSample(*AddrResTo)); | ||
} | ||
|
||
ErrorOr<DataAggregator::PerfMemSample> DataAggregator::parseMemSample() { | ||
PerfMemSample Res{0, 0}; | ||
|
||
|
@@ -1643,6 +1727,46 @@ std::error_code DataAggregator::parseBasicEvents() { | |
return std::error_code(); | ||
} | ||
|
||
std::error_code DataAggregator::parseSpeAsBasicEvents() { | ||
outs() << "PERF2BOLT: parsing SPE data as basic events (no LBR)...\n"; | ||
NamedRegionTimer T("parseSPEBasic", "Parsing SPE as basic events", | ||
TimerGroupName, TimerGroupDesc, opts::TimeAggregator); | ||
uint64_t NumSpeBranchSamples = 0; | ||
|
||
// Convert entries to one or two basic samples, depending on whether there is | ||
// branch target information. | ||
while (hasData()) { | ||
auto SamplePair = parseSpeAsBasicSamples(); | ||
if (std::error_code EC = SamplePair.getError()) | ||
return EC; | ||
|
||
auto registerSample = [this](const PerfBasicSample *Sample) { | ||
if (!Sample->PC) | ||
return; | ||
|
||
if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC)) | ||
BF->setHasProfileAvailable(); | ||
|
||
++BasicSamples[Sample->PC]; | ||
EventNames.insert(Sample->EventName); | ||
}; | ||
|
||
if (SamplePair->first.PC != 0x0 && SamplePair->second.PC != 0x0) | ||
++NumSpeBranchSamples; | ||
|
||
registerSample(&SamplePair->first); | ||
registerSample(&SamplePair->second); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Am I correct in understanding that it is the case when we have sample for branch SRC -> TGT which was or was not be taken. However we increase hotness of SRC and TGT nodes in any case registering samples always for both nodes and not taking into account ratio of samples with this branch taken and not taken? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hey Pavel, Reading this back, you are concerned whether storing samples on TGT branches that are not NOT-TAKEN might increase hotness in a block that it shouldn't have. Correct? That should not be a concern, as regardless of whether a branch is taken or not, the reported So, for fall-through SPE packets, the
For taken branches, the
In my previous examples I was using mock addresses for PC/TGT, so I've updated any relevant examples to avoid confusion. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right, thank you @paschalis-mpeis for clarifying about taken/not taken information and updating examples. @aaupov @maksfb would you like any additional explanations regarding SPE packets? Generally speaking SPE is providing event based sampling for branches and doesn't have enough information to create trace of N>1 branches and inferring fall throughs. We are aiming to add BRBE (Branch Record Buffer Extension) support for this in BOLT and provide branch stack trace like LBR with it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi, thanks Paschalis for your example. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Correct, thanks Adam. This is irrelevant to any unconditional branching (including call/ret). |
||
} | ||
|
||
if (NumSpeBranchSamples == 0) | ||
errs() << "PERF2BOLT-WARNING: no SPE branches found\n"; | ||
else | ||
outs() << "PERF2BOLT: found " << NumSpeBranchSamples | ||
<< " SPE branch sample pairs.\n"; | ||
|
||
return std::error_code(); | ||
} | ||
|
||
void DataAggregator::processBasicEvents() { | ||
outs() << "PERF2BOLT: processing basic events (without LBR)...\n"; | ||
NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
## Check that Arm SPE mode is available on AArch64 with BasicAggregation. | ||
|
||
REQUIRES: system-linux,perf,target=aarch64{{.*}} | ||
|
||
RUN: %clang %cflags %p/../../Inputs/asm_foo.s %p/../../Inputs/asm_main.c -o %t.exe | ||
RUN: touch %t.empty.perf.data | ||
RUN: perf2bolt -p %t.empty.perf.data -o %t.perf.boltdata --nl --spe --pa %t.exe 2>&1 | FileCheck %s --check-prefix=CHECK-SPE-NO-LBR | ||
|
||
CHECK-SPE-NO-LBR: PERF2BOLT: Starting data aggregation job | ||
|
||
RUN: perf record -e cycles -q -o %t.perf.data -- %t.exe | ||
paschalis-mpeis marked this conversation as resolved.
Show resolved
Hide resolved
|
||
RUN: not perf2bolt -p %t.perf.data -o %t.perf.boltdata --spe %t.exe 2>&1 | FileCheck %s --check-prefix=CHECK-SPE-LBR | ||
|
||
CHECK-SPE-LBR: PERF2BOLT-ERROR: Arm SPE mode is combined only with BasicAggregation. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
## Check that Arm SPE mode is unavailable on X86. | ||
|
||
REQUIRES: system-linux,x86_64-linux | ||
|
||
RUN: %clang %cflags %p/../../Inputs/asm_foo.s %p/../../Inputs/asm_main.c -o %t.exe | ||
RUN: touch %t.empty.perf.data | ||
RUN: not perf2bolt -p %t.empty.perf.data -o %t.perf.boltdata --nl --spe --pa %t.exe 2>&1 | FileCheck %s | ||
|
||
CHECK: perf2bolt: -spe is available only on AArch64. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,25 @@ | ||
set(LLVM_LINK_COMPONENTS | ||
DebugInfoDWARF | ||
Object | ||
${LLVM_TARGETS_TO_BUILD} | ||
) | ||
|
||
add_bolt_unittest(ProfileTests | ||
DataAggregator.cpp | ||
PerfSpeEvents.cpp | ||
|
||
DISABLE_LLVM_LINK_LLVM_DYLIB | ||
) | ||
|
||
target_link_libraries(ProfileTests | ||
PRIVATE | ||
LLVMBOLTCore | ||
LLVMBOLTProfile | ||
LLVMTargetParser | ||
LLVMTestingSupport | ||
) | ||
|
||
foreach (tgt ${BOLT_TARGETS_TO_BUILD}) | ||
string(TOUPPER "${tgt}" upper) | ||
target_compile_definitions(ProfileTests PRIVATE "${upper}_AVAILABLE") | ||
endforeach() |
Uh oh!
There was an error while loading. Please reload this page.