Skip to content

Commit 79e859e

Browse files
authored
[lld] Move BPSectionOrderer from MachO to Common for reuse in ELF (#117514)
Add lld/Common/BPSectionOrdererBase from MachO for reuse in ELF
1 parent 0fca76d commit 79e859e

File tree

6 files changed

+595
-403
lines changed

6 files changed

+595
-403
lines changed

lld/Common/BPSectionOrdererBase.cpp

Lines changed: 374 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,374 @@
1+
//===- BPSectionOrdererBase.cpp -------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "lld/Common/BPSectionOrdererBase.h"
10+
#include "lld/Common/ErrorHandler.h"
11+
#include "llvm/ADT/DenseSet.h"
12+
#include "llvm/ADT/SetVector.h"
13+
#include "llvm/ADT/SmallSet.h"
14+
#include "llvm/ADT/StringMap.h"
15+
#include "llvm/ProfileData/InstrProfReader.h"
16+
#include "llvm/Support/BalancedPartitioning.h"
17+
#include "llvm/Support/TimeProfiler.h"
18+
#include "llvm/Support/VirtualFileSystem.h"
19+
20+
#define DEBUG_TYPE "bp-section-orderer"
21+
22+
using namespace llvm;
23+
using namespace lld;
24+
25+
using UtilityNodes = SmallVector<BPFunctionNode::UtilityNodeT>;
26+
27+
static SmallVector<std::pair<unsigned, UtilityNodes>> getUnsForCompression(
28+
ArrayRef<const BPSectionBase *> sections,
29+
const DenseMap<const void *, uint64_t> &sectionToIdx,
30+
ArrayRef<unsigned> sectionIdxs,
31+
DenseMap<unsigned, SmallVector<unsigned>> *duplicateSectionIdxs,
32+
BPFunctionNode::UtilityNodeT &maxUN) {
33+
TimeTraceScope timeScope("Build nodes for compression");
34+
35+
SmallVector<std::pair<unsigned, SmallVector<uint64_t>>> sectionHashes;
36+
sectionHashes.reserve(sectionIdxs.size());
37+
SmallVector<uint64_t> hashes;
38+
39+
for (unsigned sectionIdx : sectionIdxs) {
40+
const auto *isec = sections[sectionIdx];
41+
isec->getSectionHashes(hashes, sectionToIdx);
42+
sectionHashes.emplace_back(sectionIdx, std::move(hashes));
43+
hashes.clear();
44+
}
45+
46+
DenseMap<uint64_t, unsigned> hashFrequency;
47+
for (auto &[sectionIdx, hashes] : sectionHashes)
48+
for (auto hash : hashes)
49+
++hashFrequency[hash];
50+
51+
if (duplicateSectionIdxs) {
52+
// Merge sections that are nearly identical
53+
SmallVector<std::pair<unsigned, SmallVector<uint64_t>>> newSectionHashes;
54+
DenseMap<uint64_t, unsigned> wholeHashToSectionIdx;
55+
for (auto &[sectionIdx, hashes] : sectionHashes) {
56+
uint64_t wholeHash = 0;
57+
for (auto hash : hashes)
58+
if (hashFrequency[hash] > 5)
59+
wholeHash ^= hash;
60+
auto [it, wasInserted] =
61+
wholeHashToSectionIdx.insert(std::make_pair(wholeHash, sectionIdx));
62+
if (wasInserted) {
63+
newSectionHashes.emplace_back(sectionIdx, hashes);
64+
} else {
65+
(*duplicateSectionIdxs)[it->getSecond()].push_back(sectionIdx);
66+
}
67+
}
68+
sectionHashes = newSectionHashes;
69+
70+
// Recompute hash frequencies
71+
hashFrequency.clear();
72+
for (auto &[sectionIdx, hashes] : sectionHashes)
73+
for (auto hash : hashes)
74+
++hashFrequency[hash];
75+
}
76+
77+
// Filter rare and common hashes and assign each a unique utility node that
78+
// doesn't conflict with the trace utility nodes
79+
DenseMap<uint64_t, BPFunctionNode::UtilityNodeT> hashToUN;
80+
for (auto &[hash, frequency] : hashFrequency) {
81+
if (frequency <= 1 || frequency * 2 > sectionHashes.size())
82+
continue;
83+
hashToUN[hash] = ++maxUN;
84+
}
85+
86+
SmallVector<std::pair<unsigned, UtilityNodes>> sectionUns;
87+
for (auto &[sectionIdx, hashes] : sectionHashes) {
88+
UtilityNodes uns;
89+
for (auto &hash : hashes) {
90+
auto it = hashToUN.find(hash);
91+
if (it != hashToUN.end())
92+
uns.push_back(it->second);
93+
}
94+
sectionUns.emplace_back(sectionIdx, uns);
95+
}
96+
return sectionUns;
97+
}
98+
99+
llvm::DenseMap<const BPSectionBase *, size_t>
100+
BPSectionBase::reorderSectionsByBalancedPartitioning(
101+
size_t &highestAvailablePriority, llvm::StringRef profilePath,
102+
bool forFunctionCompression, bool forDataCompression,
103+
bool compressionSortStartupFunctions, bool verbose,
104+
SmallVector<std::unique_ptr<BPSectionBase>> &inputSections) {
105+
TimeTraceScope timeScope("Setup Balanced Partitioning");
106+
SmallVector<const BPSectionBase *> sections;
107+
DenseMap<const void *, uint64_t> sectionToIdx;
108+
StringMap<DenseSet<unsigned>> symbolToSectionIdxs;
109+
110+
// Process input sections
111+
for (const auto &isec : inputSections) {
112+
if (!isec->hasValidData())
113+
continue;
114+
115+
unsigned sectionIdx = sections.size();
116+
sectionToIdx.try_emplace(isec->getSection(), sectionIdx);
117+
sections.emplace_back(isec.get());
118+
for (auto &sym : isec->getSymbols())
119+
symbolToSectionIdxs[sym->getName()].insert(sectionIdx);
120+
}
121+
StringMap<DenseSet<unsigned>> rootSymbolToSectionIdxs;
122+
for (auto &entry : symbolToSectionIdxs) {
123+
StringRef name = entry.getKey();
124+
auto &sectionIdxs = entry.getValue();
125+
name = BPSectionBase::getRootSymbol(name);
126+
rootSymbolToSectionIdxs[name].insert(sectionIdxs.begin(),
127+
sectionIdxs.end());
128+
if (auto resolvedLinkageName =
129+
sections[*sectionIdxs.begin()]->getResolvedLinkageName(name))
130+
rootSymbolToSectionIdxs[resolvedLinkageName.value()].insert(
131+
sectionIdxs.begin(), sectionIdxs.end());
132+
}
133+
134+
BPFunctionNode::UtilityNodeT maxUN = 0;
135+
DenseMap<unsigned, UtilityNodes> startupSectionIdxUNs;
136+
// Used to define the initial order for startup functions.
137+
DenseMap<unsigned, size_t> sectionIdxToTimestamp;
138+
std::unique_ptr<InstrProfReader> reader;
139+
if (!profilePath.empty()) {
140+
auto fs = vfs::getRealFileSystem();
141+
auto readerOrErr = InstrProfReader::create(profilePath, *fs);
142+
lld::checkError(readerOrErr.takeError());
143+
144+
reader = std::move(readerOrErr.get());
145+
for (auto &entry : *reader) {
146+
// Read all entries
147+
(void)entry;
148+
}
149+
auto &traces = reader->getTemporalProfTraces();
150+
151+
DenseMap<unsigned, BPFunctionNode::UtilityNodeT> sectionIdxToFirstUN;
152+
for (size_t traceIdx = 0; traceIdx < traces.size(); traceIdx++) {
153+
uint64_t currentSize = 0, cutoffSize = 1;
154+
size_t cutoffTimestamp = 1;
155+
auto &trace = traces[traceIdx].FunctionNameRefs;
156+
for (size_t timestamp = 0; timestamp < trace.size(); timestamp++) {
157+
auto [Filename, ParsedFuncName] = getParsedIRPGOName(
158+
reader->getSymtab().getFuncOrVarName(trace[timestamp]));
159+
ParsedFuncName = BPSectionBase::getRootSymbol(ParsedFuncName);
160+
161+
auto sectionIdxsIt = rootSymbolToSectionIdxs.find(ParsedFuncName);
162+
if (sectionIdxsIt == rootSymbolToSectionIdxs.end())
163+
continue;
164+
auto &sectionIdxs = sectionIdxsIt->getValue();
165+
// If the same symbol is found in multiple sections, they might be
166+
// identical, so we arbitrarily use the size from the first section.
167+
currentSize += sections[*sectionIdxs.begin()]->getSize();
168+
169+
// Since BalancedPartitioning is sensitive to the initial order, we need
170+
// to explicitly define it to be ordered by earliest timestamp.
171+
for (unsigned sectionIdx : sectionIdxs) {
172+
auto [it, wasInserted] =
173+
sectionIdxToTimestamp.try_emplace(sectionIdx, timestamp);
174+
if (!wasInserted)
175+
it->getSecond() = std::min<size_t>(it->getSecond(), timestamp);
176+
}
177+
178+
if (timestamp >= cutoffTimestamp || currentSize >= cutoffSize) {
179+
++maxUN;
180+
cutoffSize = 2 * currentSize;
181+
cutoffTimestamp = 2 * cutoffTimestamp;
182+
}
183+
for (unsigned sectionIdx : sectionIdxs)
184+
sectionIdxToFirstUN.try_emplace(sectionIdx, maxUN);
185+
}
186+
for (auto &[sectionIdx, firstUN] : sectionIdxToFirstUN)
187+
for (auto un = firstUN; un <= maxUN; ++un)
188+
startupSectionIdxUNs[sectionIdx].push_back(un);
189+
++maxUN;
190+
sectionIdxToFirstUN.clear();
191+
}
192+
}
193+
194+
SmallVector<unsigned> sectionIdxsForFunctionCompression,
195+
sectionIdxsForDataCompression;
196+
for (unsigned sectionIdx = 0; sectionIdx < sections.size(); sectionIdx++) {
197+
if (startupSectionIdxUNs.count(sectionIdx))
198+
continue;
199+
const auto *isec = sections[sectionIdx];
200+
if (isec->isCodeSection()) {
201+
if (forFunctionCompression)
202+
sectionIdxsForFunctionCompression.push_back(sectionIdx);
203+
} else {
204+
if (forDataCompression)
205+
sectionIdxsForDataCompression.push_back(sectionIdx);
206+
}
207+
}
208+
209+
if (compressionSortStartupFunctions) {
210+
SmallVector<unsigned> startupIdxs;
211+
for (auto &[sectionIdx, uns] : startupSectionIdxUNs)
212+
startupIdxs.push_back(sectionIdx);
213+
auto unsForStartupFunctionCompression =
214+
getUnsForCompression(sections, sectionToIdx, startupIdxs,
215+
/*duplicateSectionIdxs=*/nullptr, maxUN);
216+
for (auto &[sectionIdx, compressionUns] :
217+
unsForStartupFunctionCompression) {
218+
auto &uns = startupSectionIdxUNs[sectionIdx];
219+
uns.append(compressionUns);
220+
llvm::sort(uns);
221+
uns.erase(std::unique(uns.begin(), uns.end()), uns.end());
222+
}
223+
}
224+
225+
// Map a section index (order directly) to a list of duplicate section indices
226+
// (not ordered directly).
227+
DenseMap<unsigned, SmallVector<unsigned>> duplicateSectionIdxs;
228+
auto unsForFunctionCompression = getUnsForCompression(
229+
sections, sectionToIdx, sectionIdxsForFunctionCompression,
230+
&duplicateSectionIdxs, maxUN);
231+
auto unsForDataCompression = getUnsForCompression(
232+
sections, sectionToIdx, sectionIdxsForDataCompression,
233+
&duplicateSectionIdxs, maxUN);
234+
235+
std::vector<BPFunctionNode> nodesForStartup, nodesForFunctionCompression,
236+
nodesForDataCompression;
237+
for (auto &[sectionIdx, uns] : startupSectionIdxUNs)
238+
nodesForStartup.emplace_back(sectionIdx, uns);
239+
for (auto &[sectionIdx, uns] : unsForFunctionCompression)
240+
nodesForFunctionCompression.emplace_back(sectionIdx, uns);
241+
for (auto &[sectionIdx, uns] : unsForDataCompression)
242+
nodesForDataCompression.emplace_back(sectionIdx, uns);
243+
244+
// Use the first timestamp to define the initial order for startup nodes.
245+
llvm::sort(nodesForStartup, [&sectionIdxToTimestamp](auto &L, auto &R) {
246+
return std::make_pair(sectionIdxToTimestamp[L.Id], L.Id) <
247+
std::make_pair(sectionIdxToTimestamp[R.Id], R.Id);
248+
});
249+
// Sort compression nodes by their Id (which is the section index) because the
250+
// input linker order tends to be not bad.
251+
llvm::sort(nodesForFunctionCompression,
252+
[](auto &L, auto &R) { return L.Id < R.Id; });
253+
llvm::sort(nodesForDataCompression,
254+
[](auto &L, auto &R) { return L.Id < R.Id; });
255+
256+
{
257+
TimeTraceScope timeScope("Balanced Partitioning");
258+
BalancedPartitioningConfig config;
259+
BalancedPartitioning bp(config);
260+
bp.run(nodesForStartup);
261+
bp.run(nodesForFunctionCompression);
262+
bp.run(nodesForDataCompression);
263+
}
264+
265+
unsigned numStartupSections = 0;
266+
unsigned numCodeCompressionSections = 0;
267+
unsigned numDuplicateCodeSections = 0;
268+
unsigned numDataCompressionSections = 0;
269+
unsigned numDuplicateDataSections = 0;
270+
SetVector<const BPSectionBase *> orderedSections;
271+
// Order startup functions,
272+
for (auto &node : nodesForStartup) {
273+
const auto *isec = sections[node.Id];
274+
if (orderedSections.insert(isec))
275+
++numStartupSections;
276+
}
277+
// then functions for compression,
278+
for (auto &node : nodesForFunctionCompression) {
279+
const auto *isec = sections[node.Id];
280+
if (orderedSections.insert(isec))
281+
++numCodeCompressionSections;
282+
283+
auto It = duplicateSectionIdxs.find(node.Id);
284+
if (It == duplicateSectionIdxs.end())
285+
continue;
286+
for (auto dupSecIdx : It->getSecond()) {
287+
const auto *dupIsec = sections[dupSecIdx];
288+
if (orderedSections.insert(dupIsec))
289+
++numDuplicateCodeSections;
290+
}
291+
}
292+
// then data for compression.
293+
for (auto &node : nodesForDataCompression) {
294+
const auto *isec = sections[node.Id];
295+
if (orderedSections.insert(isec))
296+
++numDataCompressionSections;
297+
auto It = duplicateSectionIdxs.find(node.Id);
298+
if (It == duplicateSectionIdxs.end())
299+
continue;
300+
for (auto dupSecIdx : It->getSecond()) {
301+
const auto *dupIsec = sections[dupSecIdx];
302+
if (orderedSections.insert(dupIsec))
303+
++numDuplicateDataSections;
304+
}
305+
}
306+
307+
if (verbose) {
308+
unsigned numTotalOrderedSections =
309+
numStartupSections + numCodeCompressionSections +
310+
numDuplicateCodeSections + numDataCompressionSections +
311+
numDuplicateDataSections;
312+
dbgs()
313+
<< "Ordered " << numTotalOrderedSections
314+
<< " sections using balanced partitioning:\n Functions for startup: "
315+
<< numStartupSections
316+
<< "\n Functions for compression: " << numCodeCompressionSections
317+
<< "\n Duplicate functions: " << numDuplicateCodeSections
318+
<< "\n Data for compression: " << numDataCompressionSections
319+
<< "\n Duplicate data: " << numDuplicateDataSections << "\n";
320+
321+
if (!profilePath.empty()) {
322+
// Evaluate this function order for startup
323+
StringMap<std::pair<uint64_t, uint64_t>> symbolToPageNumbers;
324+
const uint64_t pageSize = (1 << 14);
325+
uint64_t currentAddress = 0;
326+
for (const auto *isec : orderedSections) {
327+
for (auto &sym : isec->getSymbols()) {
328+
uint64_t startAddress = currentAddress + sym->getValue().value_or(0);
329+
uint64_t endAddress = startAddress + sym->getSize().value_or(0);
330+
uint64_t firstPage = startAddress / pageSize;
331+
// I think the kernel might pull in a few pages when one it touched,
332+
// so it might be more accurate to force lastPage to be aligned by
333+
// 4?
334+
uint64_t lastPage = endAddress / pageSize;
335+
StringRef rootSymbol = sym->getName();
336+
rootSymbol = BPSectionBase::getRootSymbol(rootSymbol);
337+
symbolToPageNumbers.try_emplace(rootSymbol, firstPage, lastPage);
338+
if (auto resolvedLinkageName =
339+
isec->getResolvedLinkageName(rootSymbol))
340+
symbolToPageNumbers.try_emplace(resolvedLinkageName.value(),
341+
firstPage, lastPage);
342+
}
343+
currentAddress += isec->getSize();
344+
}
345+
346+
// The area under the curve F where F(t) is the total number of page
347+
// faults at step t.
348+
unsigned area = 0;
349+
for (auto &trace : reader->getTemporalProfTraces()) {
350+
SmallSet<uint64_t, 0> touchedPages;
351+
for (unsigned step = 0; step < trace.FunctionNameRefs.size(); step++) {
352+
auto traceId = trace.FunctionNameRefs[step];
353+
auto [Filename, ParsedFuncName] =
354+
getParsedIRPGOName(reader->getSymtab().getFuncOrVarName(traceId));
355+
ParsedFuncName = BPSectionBase::getRootSymbol(ParsedFuncName);
356+
auto it = symbolToPageNumbers.find(ParsedFuncName);
357+
if (it != symbolToPageNumbers.end()) {
358+
auto &[firstPage, lastPage] = it->getValue();
359+
for (uint64_t i = firstPage; i <= lastPage; i++)
360+
touchedPages.insert(i);
361+
}
362+
area += touchedPages.size();
363+
}
364+
}
365+
dbgs() << "Total area under the page fault curve: " << (float)area
366+
<< "\n";
367+
}
368+
}
369+
370+
DenseMap<const BPSectionBase *, size_t> sectionPriorities;
371+
for (const auto *isec : orderedSections)
372+
sectionPriorities[isec] = --highestAvailablePriority;
373+
return sectionPriorities;
374+
}

lld/Common/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ set_source_files_properties("${version_inc}"
2424

2525
add_lld_library(lldCommon
2626
Args.cpp
27+
BPSectionOrdererBase.cpp
2728
CommonLinkerContext.cpp
2829
DriverDispatcher.cpp
2930
DWARF.cpp
@@ -47,6 +48,7 @@ add_lld_library(lldCommon
4748
Demangle
4849
MC
4950
Option
51+
ProfileData
5052
Support
5153
Target
5254
TargetParser

0 commit comments

Comments
 (0)