Skip to content

Commit 3380dae

Browse files
authored
[lld][InstrProf] Refactor BPSectionOrderer.cpp (#107347)
Refactor some code in `BPSectionOrderer.cpp` in preparation for #107348. * Rename `constructNodesForCompression()` -> `getUnsForCompression()` and return a `SmallVector` directly rather than populating a vector alias * Pass `duplicateSectionIdxs` as a pointer to make it possible to skip finding (nearly) duplicate sections * Combine `duplicate{Function,Data}SectionIdxs` into one variable * Compute all `BPFunctionNode` vectors at the end (like `nodesForStartup`) There should be no functional change.
1 parent 50be455 commit 3380dae

File tree

1 file changed

+66
-58
lines changed

1 file changed

+66
-58
lines changed

lld/MachO/BPSectionOrderer.cpp

Lines changed: 66 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
using namespace llvm;
2222
using namespace lld::macho;
2323

24+
using UtilityNodes = SmallVector<BPFunctionNode::UtilityNodeT>;
25+
2426
/// Symbols can be appended with "(.__uniq.xxxx)?.llvm.yyyy" where "xxxx" and
2527
/// "yyyy" are numbers that could change between builds. We need to use the root
2628
/// symbol name before this suffix so these symbols can be matched with profiles
@@ -60,12 +62,15 @@ getRelocHash(const Reloc &reloc,
6062
return getRelocHash(kind, sectionIdx.value_or(0), 0, reloc.addend);
6163
}
6264

63-
static void constructNodesForCompression(
64-
const SmallVector<const InputSection *> &sections,
65+
/// Given \p sectionIdxs, a list of section indexes, return a list of utility
66+
/// nodes for each section index. If \p duplicateSectionIdx is provided,
67+
/// populate it with nearly identical sections. Increment \p maxUN to be the
68+
/// largest utility node we have used so far.
69+
static SmallVector<std::pair<unsigned, UtilityNodes>> getUnsForCompression(
70+
ArrayRef<const InputSection *> sections,
6571
const DenseMap<const InputSection *, uint64_t> &sectionToIdx,
66-
const SmallVector<unsigned> &sectionIdxs,
67-
std::vector<BPFunctionNode> &nodes,
68-
DenseMap<unsigned, SmallVector<unsigned>> &duplicateSectionIdxs,
72+
ArrayRef<unsigned> sectionIdxs,
73+
DenseMap<unsigned, SmallVector<unsigned>> *duplicateSectionIdxs,
6974
BPFunctionNode::UtilityNodeT &maxUN) {
7075
TimeTraceScope timeScope("Build nodes for compression");
7176

@@ -103,49 +108,52 @@ static void constructNodesForCompression(
103108
for (auto hash : hashes)
104109
++hashFrequency[hash];
105110

106-
// Merge section that are nearly identical
107-
SmallVector<std::pair<unsigned, SmallVector<uint64_t>>> newSectionHashes;
108-
DenseMap<uint64_t, unsigned> wholeHashToSectionIdx;
109-
for (auto &[sectionIdx, hashes] : sectionHashes) {
110-
uint64_t wholeHash = 0;
111-
for (auto hash : hashes)
112-
if (hashFrequency[hash] > 5)
113-
wholeHash ^= hash;
114-
auto [it, wasInserted] =
115-
wholeHashToSectionIdx.insert(std::make_pair(wholeHash, sectionIdx));
116-
if (wasInserted) {
117-
newSectionHashes.emplace_back(sectionIdx, hashes);
118-
} else {
119-
duplicateSectionIdxs[it->getSecond()].push_back(sectionIdx);
111+
if (duplicateSectionIdxs) {
112+
// Merge section that are nearly identical
113+
SmallVector<std::pair<unsigned, SmallVector<uint64_t>>> newSectionHashes;
114+
DenseMap<uint64_t, unsigned> wholeHashToSectionIdx;
115+
for (auto &[sectionIdx, hashes] : sectionHashes) {
116+
uint64_t wholeHash = 0;
117+
for (auto hash : hashes)
118+
if (hashFrequency[hash] > 5)
119+
wholeHash ^= hash;
120+
auto [it, wasInserted] =
121+
wholeHashToSectionIdx.insert(std::make_pair(wholeHash, sectionIdx));
122+
if (wasInserted) {
123+
newSectionHashes.emplace_back(sectionIdx, hashes);
124+
} else {
125+
(*duplicateSectionIdxs)[it->getSecond()].push_back(sectionIdx);
126+
}
120127
}
121-
}
122-
sectionHashes = newSectionHashes;
128+
sectionHashes = newSectionHashes;
123129

124-
// Recompute hash frequencies
125-
hashFrequency.clear();
126-
for (auto &[sectionIdx, hashes] : sectionHashes)
127-
for (auto hash : hashes)
128-
++hashFrequency[hash];
130+
// Recompute hash frequencies
131+
hashFrequency.clear();
132+
for (auto &[sectionIdx, hashes] : sectionHashes)
133+
for (auto hash : hashes)
134+
++hashFrequency[hash];
135+
}
129136

130137
// Filter rare and common hashes and assign each a unique utility node that
131138
// doesn't conflict with the trace utility nodes
132139
DenseMap<uint64_t, BPFunctionNode::UtilityNodeT> hashToUN;
133140
for (auto &[hash, frequency] : hashFrequency) {
134-
if (frequency <= 1 || frequency * 2 > wholeHashToSectionIdx.size())
141+
if (frequency <= 1 || frequency * 2 > sectionHashes.size())
135142
continue;
136143
hashToUN[hash] = ++maxUN;
137144
}
138145

139-
std::vector<BPFunctionNode::UtilityNodeT> uns;
146+
SmallVector<std::pair<unsigned, UtilityNodes>> sectionUns;
140147
for (auto &[sectionIdx, hashes] : sectionHashes) {
148+
UtilityNodes uns;
141149
for (auto &hash : hashes) {
142150
auto it = hashToUN.find(hash);
143151
if (it != hashToUN.end())
144152
uns.push_back(it->second);
145153
}
146-
nodes.emplace_back(sectionIdx, uns);
147-
uns.clear();
154+
sectionUns.emplace_back(sectionIdx, uns);
148155
}
156+
return sectionUns;
149157
}
150158

151159
DenseMap<const InputSection *, size_t> lld::macho::runBalancedPartitioning(
@@ -185,10 +193,10 @@ DenseMap<const InputSection *, size_t> lld::macho::runBalancedPartitioning(
185193
sectionIdxs.end());
186194
}
187195

188-
std::vector<BPFunctionNode> nodesForStartup;
189196
BPFunctionNode::UtilityNodeT maxUN = 0;
190-
DenseMap<unsigned, SmallVector<BPFunctionNode::UtilityNodeT>>
191-
startupSectionIdxUNs;
197+
DenseMap<unsigned, UtilityNodes> startupSectionIdxUNs;
198+
// Used to define the initial order for startup functions.
199+
DenseMap<unsigned, size_t> sectionIdxToTimestamp;
192200
std::unique_ptr<InstrProfReader> reader;
193201
if (!profilePath.empty()) {
194202
auto fs = vfs::getRealFileSystem();
@@ -202,8 +210,6 @@ DenseMap<const InputSection *, size_t> lld::macho::runBalancedPartitioning(
202210
}
203211
auto &traces = reader->getTemporalProfTraces();
204212

205-
// Used to define the initial order for startup functions.
206-
DenseMap<unsigned, size_t> sectionIdxToTimestamp;
207213
DenseMap<unsigned, BPFunctionNode::UtilityNodeT> sectionIdxToFirstUN;
208214
for (size_t traceIdx = 0; traceIdx < traces.size(); traceIdx++) {
209215
uint64_t currentSize = 0, cutoffSize = 1;
@@ -245,15 +251,6 @@ DenseMap<const InputSection *, size_t> lld::macho::runBalancedPartitioning(
245251
++maxUN;
246252
sectionIdxToFirstUN.clear();
247253
}
248-
249-
// These uns should already be sorted without duplicates.
250-
for (auto &[sectionIdx, uns] : startupSectionIdxUNs)
251-
nodesForStartup.emplace_back(sectionIdx, uns);
252-
253-
llvm::sort(nodesForStartup, [&sectionIdxToTimestamp](auto &L, auto &R) {
254-
return std::make_pair(sectionIdxToTimestamp[L.Id], L.Id) <
255-
std::make_pair(sectionIdxToTimestamp[R.Id], R.Id);
256-
});
257254
}
258255

259256
SmallVector<unsigned> sectionIdxsForFunctionCompression,
@@ -271,21 +268,32 @@ DenseMap<const InputSection *, size_t> lld::macho::runBalancedPartitioning(
271268
}
272269
}
273270

274-
std::vector<BPFunctionNode> nodesForFunctionCompression,
275-
nodesForDataCompression;
276271
// Map a section index (to be ordered for compression) to a list of duplicate
277272
// section indices (not ordered for compression).
278-
DenseMap<unsigned, SmallVector<unsigned>> duplicateFunctionSectionIdxs,
279-
duplicateDataSectionIdxs;
280-
constructNodesForCompression(
273+
DenseMap<unsigned, SmallVector<unsigned>> duplicateSectionIdxs;
274+
auto unsForFunctionCompression = getUnsForCompression(
281275
sections, sectionToIdx, sectionIdxsForFunctionCompression,
282-
nodesForFunctionCompression, duplicateFunctionSectionIdxs, maxUN);
283-
constructNodesForCompression(
276+
&duplicateSectionIdxs, maxUN);
277+
auto unsForDataCompression = getUnsForCompression(
284278
sections, sectionToIdx, sectionIdxsForDataCompression,
285-
nodesForDataCompression, duplicateDataSectionIdxs, maxUN);
279+
&duplicateSectionIdxs, maxUN);
286280

287-
// Sort nodes by their Id (which is the section index) because the input
288-
// linker order tends to be not bad
281+
std::vector<BPFunctionNode> nodesForStartup, nodesForFunctionCompression,
282+
nodesForDataCompression;
283+
for (auto &[sectionIdx, uns] : startupSectionIdxUNs)
284+
nodesForStartup.emplace_back(sectionIdx, uns);
285+
for (auto &[sectionIdx, uns] : unsForFunctionCompression)
286+
nodesForFunctionCompression.emplace_back(sectionIdx, uns);
287+
for (auto &[sectionIdx, uns] : unsForDataCompression)
288+
nodesForDataCompression.emplace_back(sectionIdx, uns);
289+
290+
// Use the first timestamp to define the initial order for startup nodes.
291+
llvm::sort(nodesForStartup, [&sectionIdxToTimestamp](auto &L, auto &R) {
292+
return std::make_pair(sectionIdxToTimestamp[L.Id], L.Id) <
293+
std::make_pair(sectionIdxToTimestamp[R.Id], R.Id);
294+
});
295+
// Sort compression nodes by their Id (which is the section index) because the
296+
// input linker order tends to be not bad.
289297
llvm::sort(nodesForFunctionCompression,
290298
[](auto &L, auto &R) { return L.Id < R.Id; });
291299
llvm::sort(nodesForDataCompression,
@@ -318,8 +326,8 @@ DenseMap<const InputSection *, size_t> lld::macho::runBalancedPartitioning(
318326
if (orderedSections.insert(isec))
319327
++numCodeCompressionSections;
320328

321-
auto It = duplicateFunctionSectionIdxs.find(node.Id);
322-
if (It == duplicateFunctionSectionIdxs.end())
329+
auto It = duplicateSectionIdxs.find(node.Id);
330+
if (It == duplicateSectionIdxs.end())
323331
continue;
324332
for (auto dupSecIdx : It->getSecond()) {
325333
const auto *dupIsec = sections[dupSecIdx];
@@ -332,8 +340,8 @@ DenseMap<const InputSection *, size_t> lld::macho::runBalancedPartitioning(
332340
const auto *isec = sections[node.Id];
333341
if (orderedSections.insert(isec))
334342
++numDataCompressionSections;
335-
auto It = duplicateDataSectionIdxs.find(node.Id);
336-
if (It == duplicateDataSectionIdxs.end())
343+
auto It = duplicateSectionIdxs.find(node.Id);
344+
if (It == duplicateSectionIdxs.end())
337345
continue;
338346
for (auto dupSecIdx : It->getSecond()) {
339347
const auto *dupIsec = sections[dupSecIdx];

0 commit comments

Comments
 (0)