1
+ // ===- BPSectionOrdererBase.cpp -------------------------------------------===//
2
+ //
3
+ // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
+ // See https://llvm.org/LICENSE.txt for license information.
5
+ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
+ //
7
+ // ===----------------------------------------------------------------------===//
8
+
9
+ #include " lld/Common/BPSectionOrdererBase.h"
10
+ #include " lld/Common/ErrorHandler.h"
11
+ #include " llvm/ADT/DenseSet.h"
12
+ #include " llvm/ADT/SetVector.h"
13
+ #include " llvm/ADT/SmallSet.h"
14
+ #include " llvm/ADT/StringMap.h"
15
+ #include " llvm/ProfileData/InstrProfReader.h"
16
+ #include " llvm/Support/BalancedPartitioning.h"
17
+ #include " llvm/Support/TimeProfiler.h"
18
+ #include " llvm/Support/VirtualFileSystem.h"
19
+
20
+ #define DEBUG_TYPE " bp-section-orderer"
21
+
22
+ using namespace llvm ;
23
+ using namespace lld ;
24
+
25
+ using UtilityNodes = SmallVector<BPFunctionNode::UtilityNodeT>;
26
+
27
+ static SmallVector<std::pair<unsigned , UtilityNodes>> getUnsForCompression (
28
+ ArrayRef<const BPSectionBase *> sections,
29
+ const DenseMap<const void *, uint64_t > §ionToIdx,
30
+ ArrayRef<unsigned > sectionIdxs,
31
+ DenseMap<unsigned , SmallVector<unsigned >> *duplicateSectionIdxs,
32
+ BPFunctionNode::UtilityNodeT &maxUN) {
33
+ TimeTraceScope timeScope (" Build nodes for compression" );
34
+
35
+ SmallVector<std::pair<unsigned , SmallVector<uint64_t >>> sectionHashes;
36
+ sectionHashes.reserve (sectionIdxs.size ());
37
+ SmallVector<uint64_t > hashes;
38
+
39
+ for (unsigned sectionIdx : sectionIdxs) {
40
+ const auto *isec = sections[sectionIdx];
41
+ isec->getSectionHashes (hashes, sectionToIdx);
42
+ sectionHashes.emplace_back (sectionIdx, std::move (hashes));
43
+ hashes.clear ();
44
+ }
45
+
46
+ DenseMap<uint64_t , unsigned > hashFrequency;
47
+ for (auto &[sectionIdx, hashes] : sectionHashes)
48
+ for (auto hash : hashes)
49
+ ++hashFrequency[hash];
50
+
51
+ if (duplicateSectionIdxs) {
52
+ // Merge sections that are nearly identical
53
+ SmallVector<std::pair<unsigned , SmallVector<uint64_t >>> newSectionHashes;
54
+ DenseMap<uint64_t , unsigned > wholeHashToSectionIdx;
55
+ for (auto &[sectionIdx, hashes] : sectionHashes) {
56
+ uint64_t wholeHash = 0 ;
57
+ for (auto hash : hashes)
58
+ if (hashFrequency[hash] > 5 )
59
+ wholeHash ^= hash;
60
+ auto [it, wasInserted] =
61
+ wholeHashToSectionIdx.insert (std::make_pair (wholeHash, sectionIdx));
62
+ if (wasInserted) {
63
+ newSectionHashes.emplace_back (sectionIdx, hashes);
64
+ } else {
65
+ (*duplicateSectionIdxs)[it->getSecond ()].push_back (sectionIdx);
66
+ }
67
+ }
68
+ sectionHashes = newSectionHashes;
69
+
70
+ // Recompute hash frequencies
71
+ hashFrequency.clear ();
72
+ for (auto &[sectionIdx, hashes] : sectionHashes)
73
+ for (auto hash : hashes)
74
+ ++hashFrequency[hash];
75
+ }
76
+
77
+ // Filter rare and common hashes and assign each a unique utility node that
78
+ // doesn't conflict with the trace utility nodes
79
+ DenseMap<uint64_t , BPFunctionNode::UtilityNodeT> hashToUN;
80
+ for (auto &[hash, frequency] : hashFrequency) {
81
+ if (frequency <= 1 || frequency * 2 > sectionHashes.size ())
82
+ continue ;
83
+ hashToUN[hash] = ++maxUN;
84
+ }
85
+
86
+ SmallVector<std::pair<unsigned , UtilityNodes>> sectionUns;
87
+ for (auto &[sectionIdx, hashes] : sectionHashes) {
88
+ UtilityNodes uns;
89
+ for (auto &hash : hashes) {
90
+ auto it = hashToUN.find (hash);
91
+ if (it != hashToUN.end ())
92
+ uns.push_back (it->second );
93
+ }
94
+ sectionUns.emplace_back (sectionIdx, uns);
95
+ }
96
+ return sectionUns;
97
+ }
98
+
99
+ llvm::DenseMap<const BPSectionBase *, size_t >
100
+ BPSectionBase::reorderSectionsByBalancedPartitioning (
101
+ size_t &highestAvailablePriority, llvm::StringRef profilePath,
102
+ bool forFunctionCompression, bool forDataCompression,
103
+ bool compressionSortStartupFunctions, bool verbose,
104
+ SmallVector<std::unique_ptr<BPSectionBase>> &inputSections) {
105
+ TimeTraceScope timeScope (" Setup Balanced Partitioning" );
106
+ SmallVector<const BPSectionBase *> sections;
107
+ DenseMap<const void *, uint64_t > sectionToIdx;
108
+ StringMap<DenseSet<unsigned >> symbolToSectionIdxs;
109
+
110
+ // Process input sections
111
+ for (const auto &isec : inputSections) {
112
+ if (!isec->hasValidData ())
113
+ continue ;
114
+
115
+ unsigned sectionIdx = sections.size ();
116
+ sectionToIdx.try_emplace (isec->getSection (), sectionIdx);
117
+ sections.emplace_back (isec.get ());
118
+ for (auto &sym : isec->getSymbols ())
119
+ symbolToSectionIdxs[sym->getName ()].insert (sectionIdx);
120
+ }
121
+ StringMap<DenseSet<unsigned >> rootSymbolToSectionIdxs;
122
+ for (auto &entry : symbolToSectionIdxs) {
123
+ StringRef name = entry.getKey ();
124
+ auto §ionIdxs = entry.getValue ();
125
+ name = BPSectionBase::getRootSymbol (name);
126
+ rootSymbolToSectionIdxs[name].insert (sectionIdxs.begin (),
127
+ sectionIdxs.end ());
128
+ if (auto resolvedLinkageName =
129
+ sections[*sectionIdxs.begin ()]->getResolvedLinkageName (name))
130
+ rootSymbolToSectionIdxs[resolvedLinkageName.value ()].insert (
131
+ sectionIdxs.begin (), sectionIdxs.end ());
132
+ }
133
+
134
+ BPFunctionNode::UtilityNodeT maxUN = 0 ;
135
+ DenseMap<unsigned , UtilityNodes> startupSectionIdxUNs;
136
+ // Used to define the initial order for startup functions.
137
+ DenseMap<unsigned , size_t > sectionIdxToTimestamp;
138
+ std::unique_ptr<InstrProfReader> reader;
139
+ if (!profilePath.empty ()) {
140
+ auto fs = vfs::getRealFileSystem ();
141
+ auto readerOrErr = InstrProfReader::create (profilePath, *fs);
142
+ lld::checkError (readerOrErr.takeError ());
143
+
144
+ reader = std::move (readerOrErr.get ());
145
+ for (auto &entry : *reader) {
146
+ // Read all entries
147
+ (void )entry;
148
+ }
149
+ auto &traces = reader->getTemporalProfTraces ();
150
+
151
+ DenseMap<unsigned , BPFunctionNode::UtilityNodeT> sectionIdxToFirstUN;
152
+ for (size_t traceIdx = 0 ; traceIdx < traces.size (); traceIdx++) {
153
+ uint64_t currentSize = 0 , cutoffSize = 1 ;
154
+ size_t cutoffTimestamp = 1 ;
155
+ auto &trace = traces[traceIdx].FunctionNameRefs ;
156
+ for (size_t timestamp = 0 ; timestamp < trace.size (); timestamp++) {
157
+ auto [Filename, ParsedFuncName] = getParsedIRPGOName (
158
+ reader->getSymtab ().getFuncOrVarName (trace[timestamp]));
159
+ ParsedFuncName = BPSectionBase::getRootSymbol (ParsedFuncName);
160
+
161
+ auto sectionIdxsIt = rootSymbolToSectionIdxs.find (ParsedFuncName);
162
+ if (sectionIdxsIt == rootSymbolToSectionIdxs.end ())
163
+ continue ;
164
+ auto §ionIdxs = sectionIdxsIt->getValue ();
165
+ // If the same symbol is found in multiple sections, they might be
166
+ // identical, so we arbitrarily use the size from the first section.
167
+ currentSize += sections[*sectionIdxs.begin ()]->getSize ();
168
+
169
+ // Since BalancedPartitioning is sensitive to the initial order, we need
170
+ // to explicitly define it to be ordered by earliest timestamp.
171
+ for (unsigned sectionIdx : sectionIdxs) {
172
+ auto [it, wasInserted] =
173
+ sectionIdxToTimestamp.try_emplace (sectionIdx, timestamp);
174
+ if (!wasInserted)
175
+ it->getSecond () = std::min<size_t >(it->getSecond (), timestamp);
176
+ }
177
+
178
+ if (timestamp >= cutoffTimestamp || currentSize >= cutoffSize) {
179
+ ++maxUN;
180
+ cutoffSize = 2 * currentSize;
181
+ cutoffTimestamp = 2 * cutoffTimestamp;
182
+ }
183
+ for (unsigned sectionIdx : sectionIdxs)
184
+ sectionIdxToFirstUN.try_emplace (sectionIdx, maxUN);
185
+ }
186
+ for (auto &[sectionIdx, firstUN] : sectionIdxToFirstUN)
187
+ for (auto un = firstUN; un <= maxUN; ++un)
188
+ startupSectionIdxUNs[sectionIdx].push_back (un);
189
+ ++maxUN;
190
+ sectionIdxToFirstUN.clear ();
191
+ }
192
+ }
193
+
194
+ SmallVector<unsigned > sectionIdxsForFunctionCompression,
195
+ sectionIdxsForDataCompression;
196
+ for (unsigned sectionIdx = 0 ; sectionIdx < sections.size (); sectionIdx++) {
197
+ if (startupSectionIdxUNs.count (sectionIdx))
198
+ continue ;
199
+ const auto *isec = sections[sectionIdx];
200
+ if (isec->isCodeSection ()) {
201
+ if (forFunctionCompression)
202
+ sectionIdxsForFunctionCompression.push_back (sectionIdx);
203
+ } else {
204
+ if (forDataCompression)
205
+ sectionIdxsForDataCompression.push_back (sectionIdx);
206
+ }
207
+ }
208
+
209
+ if (compressionSortStartupFunctions) {
210
+ SmallVector<unsigned > startupIdxs;
211
+ for (auto &[sectionIdx, uns] : startupSectionIdxUNs)
212
+ startupIdxs.push_back (sectionIdx);
213
+ auto unsForStartupFunctionCompression =
214
+ getUnsForCompression (sections, sectionToIdx, startupIdxs,
215
+ /* duplicateSectionIdxs=*/ nullptr , maxUN);
216
+ for (auto &[sectionIdx, compressionUns] :
217
+ unsForStartupFunctionCompression) {
218
+ auto &uns = startupSectionIdxUNs[sectionIdx];
219
+ uns.append (compressionUns);
220
+ llvm::sort (uns);
221
+ uns.erase (std::unique (uns.begin (), uns.end ()), uns.end ());
222
+ }
223
+ }
224
+
225
+ // Map a section index (order directly) to a list of duplicate section indices
226
+ // (not ordered directly).
227
+ DenseMap<unsigned , SmallVector<unsigned >> duplicateSectionIdxs;
228
+ auto unsForFunctionCompression = getUnsForCompression (
229
+ sections, sectionToIdx, sectionIdxsForFunctionCompression,
230
+ &duplicateSectionIdxs, maxUN);
231
+ auto unsForDataCompression = getUnsForCompression (
232
+ sections, sectionToIdx, sectionIdxsForDataCompression,
233
+ &duplicateSectionIdxs, maxUN);
234
+
235
+ std::vector<BPFunctionNode> nodesForStartup, nodesForFunctionCompression,
236
+ nodesForDataCompression;
237
+ for (auto &[sectionIdx, uns] : startupSectionIdxUNs)
238
+ nodesForStartup.emplace_back (sectionIdx, uns);
239
+ for (auto &[sectionIdx, uns] : unsForFunctionCompression)
240
+ nodesForFunctionCompression.emplace_back (sectionIdx, uns);
241
+ for (auto &[sectionIdx, uns] : unsForDataCompression)
242
+ nodesForDataCompression.emplace_back (sectionIdx, uns);
243
+
244
+ // Use the first timestamp to define the initial order for startup nodes.
245
+ llvm::sort (nodesForStartup, [§ionIdxToTimestamp](auto &L, auto &R) {
246
+ return std::make_pair (sectionIdxToTimestamp[L.Id ], L.Id ) <
247
+ std::make_pair (sectionIdxToTimestamp[R.Id ], R.Id );
248
+ });
249
+ // Sort compression nodes by their Id (which is the section index) because the
250
+ // input linker order tends to be not bad.
251
+ llvm::sort (nodesForFunctionCompression,
252
+ [](auto &L, auto &R) { return L.Id < R.Id ; });
253
+ llvm::sort (nodesForDataCompression,
254
+ [](auto &L, auto &R) { return L.Id < R.Id ; });
255
+
256
+ {
257
+ TimeTraceScope timeScope (" Balanced Partitioning" );
258
+ BalancedPartitioningConfig config;
259
+ BalancedPartitioning bp (config);
260
+ bp.run (nodesForStartup);
261
+ bp.run (nodesForFunctionCompression);
262
+ bp.run (nodesForDataCompression);
263
+ }
264
+
265
+ unsigned numStartupSections = 0 ;
266
+ unsigned numCodeCompressionSections = 0 ;
267
+ unsigned numDuplicateCodeSections = 0 ;
268
+ unsigned numDataCompressionSections = 0 ;
269
+ unsigned numDuplicateDataSections = 0 ;
270
+ SetVector<const BPSectionBase *> orderedSections;
271
+ // Order startup functions,
272
+ for (auto &node : nodesForStartup) {
273
+ const auto *isec = sections[node.Id ];
274
+ if (orderedSections.insert (isec))
275
+ ++numStartupSections;
276
+ }
277
+ // then functions for compression,
278
+ for (auto &node : nodesForFunctionCompression) {
279
+ const auto *isec = sections[node.Id ];
280
+ if (orderedSections.insert (isec))
281
+ ++numCodeCompressionSections;
282
+
283
+ auto It = duplicateSectionIdxs.find (node.Id );
284
+ if (It == duplicateSectionIdxs.end ())
285
+ continue ;
286
+ for (auto dupSecIdx : It->getSecond ()) {
287
+ const auto *dupIsec = sections[dupSecIdx];
288
+ if (orderedSections.insert (dupIsec))
289
+ ++numDuplicateCodeSections;
290
+ }
291
+ }
292
+ // then data for compression.
293
+ for (auto &node : nodesForDataCompression) {
294
+ const auto *isec = sections[node.Id ];
295
+ if (orderedSections.insert (isec))
296
+ ++numDataCompressionSections;
297
+ auto It = duplicateSectionIdxs.find (node.Id );
298
+ if (It == duplicateSectionIdxs.end ())
299
+ continue ;
300
+ for (auto dupSecIdx : It->getSecond ()) {
301
+ const auto *dupIsec = sections[dupSecIdx];
302
+ if (orderedSections.insert (dupIsec))
303
+ ++numDuplicateDataSections;
304
+ }
305
+ }
306
+
307
+ if (verbose) {
308
+ unsigned numTotalOrderedSections =
309
+ numStartupSections + numCodeCompressionSections +
310
+ numDuplicateCodeSections + numDataCompressionSections +
311
+ numDuplicateDataSections;
312
+ dbgs ()
313
+ << " Ordered " << numTotalOrderedSections
314
+ << " sections using balanced partitioning:\n Functions for startup: "
315
+ << numStartupSections
316
+ << " \n Functions for compression: " << numCodeCompressionSections
317
+ << " \n Duplicate functions: " << numDuplicateCodeSections
318
+ << " \n Data for compression: " << numDataCompressionSections
319
+ << " \n Duplicate data: " << numDuplicateDataSections << " \n " ;
320
+
321
+ if (!profilePath.empty ()) {
322
+ // Evaluate this function order for startup
323
+ StringMap<std::pair<uint64_t , uint64_t >> symbolToPageNumbers;
324
+ const uint64_t pageSize = (1 << 14 );
325
+ uint64_t currentAddress = 0 ;
326
+ for (const auto *isec : orderedSections) {
327
+ for (auto &sym : isec->getSymbols ()) {
328
+ uint64_t startAddress = currentAddress + sym->getValue ().value_or (0 );
329
+ uint64_t endAddress = startAddress + sym->getSize ().value_or (0 );
330
+ uint64_t firstPage = startAddress / pageSize;
331
+ // I think the kernel might pull in a few pages when one it touched,
332
+ // so it might be more accurate to force lastPage to be aligned by
333
+ // 4?
334
+ uint64_t lastPage = endAddress / pageSize;
335
+ StringRef rootSymbol = sym->getName ();
336
+ rootSymbol = BPSectionBase::getRootSymbol (rootSymbol);
337
+ symbolToPageNumbers.try_emplace (rootSymbol, firstPage, lastPage);
338
+ if (auto resolvedLinkageName =
339
+ isec->getResolvedLinkageName (rootSymbol))
340
+ symbolToPageNumbers.try_emplace (resolvedLinkageName.value (),
341
+ firstPage, lastPage);
342
+ }
343
+ currentAddress += isec->getSize ();
344
+ }
345
+
346
+ // The area under the curve F where F(t) is the total number of page
347
+ // faults at step t.
348
+ unsigned area = 0 ;
349
+ for (auto &trace : reader->getTemporalProfTraces ()) {
350
+ SmallSet<uint64_t , 0 > touchedPages;
351
+ for (unsigned step = 0 ; step < trace.FunctionNameRefs .size (); step++) {
352
+ auto traceId = trace.FunctionNameRefs [step];
353
+ auto [Filename, ParsedFuncName] =
354
+ getParsedIRPGOName (reader->getSymtab ().getFuncOrVarName (traceId));
355
+ ParsedFuncName = BPSectionBase::getRootSymbol (ParsedFuncName);
356
+ auto it = symbolToPageNumbers.find (ParsedFuncName);
357
+ if (it != symbolToPageNumbers.end ()) {
358
+ auto &[firstPage, lastPage] = it->getValue ();
359
+ for (uint64_t i = firstPage; i <= lastPage; i++)
360
+ touchedPages.insert (i);
361
+ }
362
+ area += touchedPages.size ();
363
+ }
364
+ }
365
+ dbgs () << " Total area under the page fault curve: " << (float )area
366
+ << " \n " ;
367
+ }
368
+ }
369
+
370
+ DenseMap<const BPSectionBase *, size_t > sectionPriorities;
371
+ for (const auto *isec : orderedSections)
372
+ sectionPriorities[isec] = --highestAvailablePriority;
373
+ return sectionPriorities;
374
+ }
0 commit comments