Skip to content

Commit b402487

Browse files
author
spupyrev
committed
[BOLT] A new code layout algorithm for function reordering [3b/3]
This is a new algorithm for function layout (reordering) based on the call graph extracted from a profile data; see diffs down the stack for more details. This layout is very similar to the existing hfsort+, but perhaps a little better on some benchmarks. The goals of the change is as follows: (i) rename and replace hfsort+ with a newer (hopefully better) implementation. I'd prefer to keep both algs together for some time to simplify evaluation and transition, but do want to remove hfsort+ once we're confident that there are no regressions. (ii) unify the implementation of code layout algorithms across LLVM. Currently Passes/HfsortPlus.cpp and Utils/CodeLayout.cpp share many implementation-specific details; this diff unifies the code. Reviewed By: Amir Differential Revision: https://reviews.llvm.org/D153039
1 parent 087c136 commit b402487

File tree

2 files changed

+34
-0
lines changed

2 files changed

+34
-0
lines changed

bolt/include/bolt/Passes/ReorderFunctions.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ class ReorderFunctions : public BinaryFunctionPass {
3232
RT_EXEC_COUNT,
3333
RT_HFSORT,
3434
RT_HFSORT_PLUS,
35+
RT_CDS,
3536
RT_PETTIS_HANSEN,
3637
RT_RANDOM,
3738
RT_USER

bolt/lib/Passes/ReorderFunctions.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "bolt/Utils/Utils.h"
1616
#include "llvm/ADT/STLExtras.h"
1717
#include "llvm/Support/CommandLine.h"
18+
#include "llvm/Transforms/Utils/CodeLayout.h"
1819
#include <fstream>
1920

2021
#define DEBUG_TYPE "hfsort"
@@ -41,6 +42,8 @@ cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions(
4142
"use hfsort algorithm"),
4243
clEnumValN(bolt::ReorderFunctions::RT_HFSORT_PLUS, "hfsort+",
4344
"use hfsort+ algorithm"),
45+
clEnumValN(bolt::ReorderFunctions::RT_CDS, "cds",
46+
"use cache-directed sort"),
4447
clEnumValN(bolt::ReorderFunctions::RT_PETTIS_HANSEN,
4548
"pettis-hansen", "use Pettis-Hansen algorithm"),
4649
clEnumValN(bolt::ReorderFunctions::RT_RANDOM, "random",
@@ -309,6 +312,36 @@ void ReorderFunctions::runOnFunctions(BinaryContext &BC) {
309312
case RT_HFSORT_PLUS:
310313
Clusters = hfsortPlus(Cg);
311314
break;
315+
case RT_CDS: {
316+
// It is required that the sum of incoming arc weights is not greater
317+
// than the number of samples for every function. Ensuring the call graph
318+
// obeys the property before running the algorithm.
319+
Cg.adjustArcWeights();
320+
321+
// Initialize CFG nodes and their data
322+
std::vector<uint64_t> FuncSizes;
323+
std::vector<uint64_t> FuncCounts;
324+
using JumpT = std::pair<uint64_t, uint64_t>;
325+
std::vector<std::pair<JumpT, uint64_t>> CallCounts;
326+
std::vector<uint64_t> CallOffsets;
327+
for (NodeId F = 0; F < Cg.numNodes(); ++F) {
328+
FuncSizes.push_back(Cg.size(F));
329+
FuncCounts.push_back(Cg.samples(F));
330+
for (NodeId Succ : Cg.successors(F)) {
331+
const Arc &Arc = *Cg.findArc(F, Succ);
332+
auto It = std::make_pair(F, Succ);
333+
CallCounts.push_back(std::make_pair(It, Arc.weight()));
334+
CallOffsets.push_back(uint64_t(Arc.avgCallOffset()));
335+
}
336+
}
337+
338+
// Run the layout algorithm.
339+
std::vector<uint64_t> Result =
340+
applyCDSLayout(FuncSizes, FuncCounts, CallCounts, CallOffsets);
341+
342+
// Create a single cluster from the computed order of hot functions.
343+
Clusters.emplace_back(Cluster(Result, Cg));
344+
} break;
312345
case RT_PETTIS_HANSEN:
313346
Clusters = pettisAndHansen(Cg);
314347
break;

0 commit comments

Comments
 (0)