-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[BOLT] Support pre-aggregated basic sample profile #140196
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[BOLT] Support pre-aggregated basic sample profile #140196
Conversation
Created using spr 1.3.4 [skip ci]
Created using spr 1.3.4
@llvm/pr-subscribers-bolt Author: Amir Ayupov (aaupov) ChangesDefine a pre-aggregated basic sample format:
Test Plan: update pre-aggregated-perf.test Full diff: https://github.com/llvm/llvm-project/pull/140196.diff 5 Files Affected:
diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index 6d918134137d5..7d60b4689fb04 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -370,26 +370,25 @@ class DataAggregator : public DataReader {
/// memory.
///
/// File format syntax:
- /// {B|F|f|T} [<start_id>:]<start_offset> [<end_id>:]<end_offset> [<ft_end>]
- /// <count> [<mispred_count>]
+ /// {S|B|F|f|T} <start> [<end>] [<ft_end>] <count> [<mispred_count>]
///
- /// B - indicates an aggregated branch
- /// F - an aggregated fall-through
+ /// where <start>, <end>, <ft_end> have the format [<id>:]<offset>
+ ///
+ /// S - indicates an aggregated basic sample at <start>
+ /// B - indicates an aggregated branch from <start> to <end>
+ /// F - an aggregated fall-through from <start> to <end>
/// f - an aggregated fall-through with external origin - used to disambiguate
/// between a return hitting a basic block head and a regular internal
/// jump to the block
- /// T - an aggregated trace: branch with a fall-through (from, to, ft_end)
- ///
- /// <start_id> - build id of the object containing the start address. We can
- /// skip it for the main binary and use "X" for an unknown object. This will
- /// save some space and facilitate human parsing.
- ///
- /// <start_offset> - hex offset from the object base load address (0 for the
- /// main executable unless it's PIE) to the start address.
+ /// T - an aggregated trace: branch from <start> to <end> with a fall-through
+ /// to <ft_end>
///
- /// <end_id>, <end_offset> - same for the end address.
+ /// <id> - build id of the object containing the address. We can skip it for
+ /// the main binary and use "X" for an unknown object. This will save some
+ /// space and facilitate human parsing.
///
- /// <ft_end> - same for the fallthrough_end address.
+ /// <offset> - hex offset from the object base load address (0 for the
+ /// main executable unless it's PIE) to the address.
///
/// <count> - total aggregated count of the branch or a fall-through.
///
@@ -397,10 +396,17 @@ class DataAggregator : public DataReader {
/// Omitted for fall-throughs.
///
/// Example:
+ /// Basic samples profile:
+ /// S 41be50 3
+ ///
+ /// Soft-deprecated branch profile with separate branches and fall-throughs:
/// F 41be50 41be50 3
/// F 41be90 41be90 4
/// B 4b1942 39b57f0 3 0
/// B 4b196f 4b19e0 2 0
+ ///
+ /// Recommended branch profile with pre-aggregated traces:
+ /// T 4b196f 4b19e0 4b19ef 2
void parsePreAggregated();
/// Parse the full output of pre-aggregated LBR samples generated by
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index aa681e633c0d8..6e889908003da 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -1216,54 +1216,54 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator);
if (std::error_code EC = TypeOrErr.getError())
return EC;
- enum AggregatedLBREntry { TRACE, BRANCH, FT, FT_EXTERNAL_ORIGIN, INVALID };
+ enum AggregatedLBREntry {
+ TRACE,
+ SAMPLE,
+ BRANCH,
+ FT,
+ FT_EXTERNAL_ORIGIN,
+ INVALID
+ };
auto Type = StringSwitch<AggregatedLBREntry>(TypeOrErr.get())
.Case("T", TRACE)
+ .Case("S", SAMPLE)
.Case("B", BRANCH)
.Case("F", FT)
.Case("f", FT_EXTERNAL_ORIGIN)
.Default(INVALID);
if (Type == INVALID) {
- reportError("expected T, B, F or f");
+ reportError("expected T, S, B, F or f");
return make_error_code(llvm::errc::io_error);
}
- while (checkAndConsumeFS()) {
- }
- ErrorOr<Location> From = parseLocationOrOffset();
- if (std::error_code EC = From.getError())
- return EC;
+ std::optional<Location> Addrs[3];
+ int AddrNum = 2;
+ if (Type == TRACE)
+ AddrNum = 3;
+ else if (Type == SAMPLE)
+ AddrNum = 1;
- while (checkAndConsumeFS()) {
- }
- ErrorOr<Location> To = parseLocationOrOffset();
- if (std::error_code EC = To.getError())
- return EC;
+ int64_t Counters[2];
+ int CounterNum = 1;
+ if (Type == BRANCH)
+ CounterNum = 2;
- ErrorOr<Location> TraceFtEnd = std::error_code();
- if (Type == AggregatedLBREntry::TRACE) {
+ for (int I = 0; I < AddrNum; ++I) {
while (checkAndConsumeFS()) {
}
- TraceFtEnd = parseLocationOrOffset();
- if (std::error_code EC = TraceFtEnd.getError())
- return EC;
- }
-
- while (checkAndConsumeFS()) {
+ if (ErrorOr<Location> Addr = parseLocationOrOffset())
+ Addrs[I] = Addr.get();
+ else
+ return Addr.getError();
}
- ErrorOr<int64_t> Frequency =
- parseNumberField(FieldSeparator, Type != AggregatedLBREntry::BRANCH);
- if (std::error_code EC = Frequency.getError())
- return EC;
- uint64_t Mispreds = 0;
- if (Type == AggregatedLBREntry::BRANCH) {
+ for (int I = 0; I < CounterNum; ++I) {
while (checkAndConsumeFS()) {
}
- ErrorOr<int64_t> MispredsOrErr = parseNumberField(FieldSeparator, true);
- if (std::error_code EC = MispredsOrErr.getError())
- return EC;
- Mispreds = static_cast<uint64_t>(MispredsOrErr.get());
+ if (ErrorOr<int64_t> Count = parseNumberField(FieldSeparator, I + 1 == CounterNum))
+ Counters[I] = Count.get();
+ else
+ return Count.getError();
}
if (!checkAndConsumeNewLine()) {
@@ -1271,16 +1271,25 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
return make_error_code(llvm::errc::io_error);
}
- BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From->Offset);
- BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To->Offset);
+ const uint64_t FromOffset = Addrs[0]->Offset;
+ BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(FromOffset);
+ if (FromFunc)
+ FromFunc->setHasProfileAvailable();
- for (BinaryFunction *BF : {FromFunc, ToFunc})
- if (BF)
- BF->setHasProfileAvailable();
+ if (Type == SAMPLE) {
+ BasicSamples[FromOffset] += Counters[0];
+ return std::error_code();
+ }
+
+ const uint64_t ToOffset = Addrs[1]->Offset;
+ BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(ToOffset);
+ if (ToFunc)
+ ToFunc->setHasProfileAvailable();
- uint64_t Count = static_cast<uint64_t>(Frequency.get());
+ int64_t Count = Counters[0];
+ int64_t Mispreds = Counters[1];
- Trace Trace(From->Offset, To->Offset);
+ Trace Trace(FromOffset, ToOffset);
// Taken trace
if (Type == TRACE || Type == BRANCH) {
TakenBranchInfo &Info = BranchLBRs[Trace];
@@ -1291,8 +1300,9 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
}
// Construct fallthrough part of the trace
if (Type == TRACE) {
- Trace.From = To->Offset;
- Trace.To = TraceFtEnd->Offset;
+ const uint64_t TraceFtEndOffset = Addrs[2]->Offset;
+ Trace.From = ToOffset;
+ Trace.To = TraceFtEndOffset;
Type = FromFunc == ToFunc ? FT : FT_EXTERNAL_ORIGIN;
}
// Add fallthrough trace
diff --git a/bolt/test/X86/Inputs/pre-aggregated-basic.txt b/bolt/test/X86/Inputs/pre-aggregated-basic.txt
new file mode 100644
index 0000000000000..28bcacca70ee1
--- /dev/null
+++ b/bolt/test/X86/Inputs/pre-aggregated-basic.txt
@@ -0,0 +1,18 @@
+S 4005f0 1
+S 4005f0 1
+S 400610 1
+S 400ad1 2
+S 400b10 1
+S 400bb7 1
+S 400bbc 2
+S 400d90 1
+S 400dae 1
+S 400e00 2
+S 401170 22
+S 401180 58
+S 4011a0 33
+S 4011a9 33
+S 4011ad 58
+S 4011b2 22
+S X:7f36d18d60c0 2
+S X:7f36d18f2ce0 1
diff --git a/bolt/test/X86/pre-aggregated-perf.test b/bolt/test/X86/pre-aggregated-perf.test
index c05a06bf74945..63488a9062bdd 100644
--- a/bolt/test/X86/pre-aggregated-perf.test
+++ b/bolt/test/X86/pre-aggregated-perf.test
@@ -57,6 +57,14 @@ RUN: llvm-bolt %t.exe -o %t.bolt.yaml --pa -p %p/Inputs/pre-aggregated.txt \
RUN: --aggregate-only --profile-format=yaml --profile-use-dfs
RUN: cat %t.bolt.yaml | FileCheck %s -check-prefix=NEWFORMAT
+## Test pre-aggregated basic profile
+RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated-basic.txt -o %t.ba \
+RUN: 2>&1 | FileCheck %s --check-prefix=BASIC-ERROR
+RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated-basic.txt -o %t.ba \
+RUN: -nl 2>&1 | FileCheck %s --check-prefix=BASIC-SUCCESS
+BASIC-ERROR: BOLT-INFO: 0 out of 7 functions in the binary (0.0%) have non-empty execution profile
+BASIC-SUCCESS: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile
+
PERF2BOLT: 0 [unknown] 7f36d18d60c0 1 main 53c 0 2
PERF2BOLT: 1 main 451 1 SolveCubic 0 0 2
PERF2BOLT: 1 main 490 0 [unknown] 4005f0 0 1
diff --git a/bolt/test/link_fdata.py b/bolt/test/link_fdata.py
index bcf9a777922d5..6a391c10b9481 100755
--- a/bolt/test/link_fdata.py
+++ b/bolt/test/link_fdata.py
@@ -36,9 +36,9 @@
fdata_pat = re.compile(r"([01].*) (?P<exec>\d+) (?P<mispred>\d+)")
# Pre-aggregated profile:
-# {T|B|F|f} [<start_id>:]<start_offset> [<end_id>:]<end_offset> [<ft_end>]
-# <count> [<mispred_count>]
-preagg_pat = re.compile(r"(?P<type>[TBFf]) (?P<offsets_count>.*)")
+# {T|S|B|F|f} <start> [<end>] [<ft_end>] <count> [<mispred_count>]
+# <loc>: [<id>:]<offset>
+preagg_pat = re.compile(r"(?P<type>[TSBFf]) (?P<offsets_count>.*)")
# No-LBR profile:
# <is symbol?> <closest elf symbol or DSO name> <relative address> <count>
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
Created using spr 1.3.4 [skip ci]
Created using spr 1.3.4
@@ -370,33 +370,46 @@ class DataAggregator : public DataReader { | |||
/// memory. | |||
/// | |||
/// File format syntax: | |||
/// {B|F|f|T} [<start_id>:]<start_offset> [<end_id>:]<end_offset> [<ft_end>] | |||
/// <count> [<mispred_count>] | |||
/// E <event> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you specify multiple events per file?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes. It is possible to group samples by event name, but currently we don't have any logic based on the event names other than passing them through to fdata and YAML.
Created using spr 1.3.4 [skip ci]
Define a pre-aggregated basic sample format: ``` E <event name> S <location> <count> ``` `-nl` flag is required to use parsed basic samples. Test Plan: update pre-aggregated-perf.test
#140196 introduced UB by using uninitialized misprediction count for pre-aggregated traces. Fix by zero initializing both counters. Test Plan: updated entry-point-fallthru.s
Define a pre-aggregated basic sample format: ``` E <event name> S <location> <count> ``` `-nl` flag is required to use parsed basic samples. Test Plan: update pre-aggregated-perf.test
llvm#140196 introduced UB by using uninitialized misprediction count for pre-aggregated traces. Fix by zero initializing both counters. Test Plan: updated entry-point-fallthru.s
Define a pre-aggregated basic sample format: ``` E <event name> S <location> <count> ``` `-nl` flag is required to use parsed basic samples. Test Plan: update pre-aggregated-perf.test
llvm#140196 introduced UB by using uninitialized misprediction count for pre-aggregated traces. Fix by zero initializing both counters. Test Plan: updated entry-point-fallthru.s
Define a pre-aggregated basic sample format:
-nl
flag is required to use parsed basic samples.Test Plan: update pre-aggregated-perf.test