Skip to content

Commit bcdc047

Browse files
author
spupyrev
committed
speeding up ext-tsp for huge instances
Differential Revision: https://reviews.llvm.org/D120780
1 parent 59814a8 commit bcdc047

File tree

2 files changed

+21
-2
lines changed

2 files changed

+21
-2
lines changed

llvm/lib/CodeGen/MachineBlockPlacement.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,7 @@ static cl::opt<unsigned> TriangleChainCount(
201201
cl::Hidden);
202202

203203
extern cl::opt<bool> EnableExtTspBlockPlacement;
204+
extern cl::opt<bool> ApplyExtTspWithoutProfile;
204205

205206
namespace llvm {
206207
extern cl::opt<unsigned> StaticLikelyProb;
@@ -3419,7 +3420,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
34193420
}
34203421

34213422
// Apply a post-processing optimizing block placement.
3422-
if (MF.size() >= 3 && EnableExtTspBlockPlacement) {
3423+
if (MF.size() >= 3 && EnableExtTspBlockPlacement &&
3424+
(ApplyExtTspWithoutProfile || MF.getFunction().hasProfileData())) {
34233425
// Find a new placement and modify the layout of the blocks in the function.
34243426
applyExtTsp();
34253427

llvm/lib/Transforms/Utils/CodeLayout.cpp

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,11 @@ cl::opt<bool> EnableExtTspBlockPlacement(
4949
cl::desc("Enable machine block placement based on the ext-tsp model, "
5050
"optimizing I-cache utilization."));
5151

52+
cl::opt<bool> ApplyExtTspWithoutProfile(
53+
"ext-tsp-apply-without-profile",
54+
cl::desc("Whether to apply ext-tsp placement for instances w/o profile"),
55+
cl::init(true), cl::Hidden, cl::ZeroOrMore);
56+
5257
// Algorithm-specific constants. The values are tuned for the best performance
5358
// of large-scale front-end bound binaries.
5459
static cl::opt<double>
@@ -67,6 +72,12 @@ static cl::opt<unsigned> BackwardDistance(
6772
"ext-tsp-backward-distance", cl::Hidden, cl::init(640),
6873
cl::desc("The maximum distance (in bytes) of a backward jump for ExtTSP"));
6974

75+
// The maximum size of a chain created by the algorithm. The size is bounded
76+
// so that the algorithm can efficiently process extremely large instance.
77+
static cl::opt<unsigned>
78+
MaxChainSize("ext-tsp-max-chain-size", cl::Hidden, cl::init(4096),
79+
cl::desc("The maximum size of a chain to create."));
80+
7081
// The maximum size of a chain for splitting. Larger values of the threshold
7182
// may yield better quality at the cost of worsen run-time.
7283
static cl::opt<unsigned> ChainSplitThreshold(
@@ -226,6 +237,8 @@ class Chain {
226237

227238
const std::vector<Block *> &blocks() const { return Blocks; }
228239

240+
size_t numBlocks() const { return Blocks.size(); }
241+
229242
const std::vector<std::pair<Chain *, ChainEdge *>> &edges() const {
230243
return Edges;
231244
}
@@ -502,7 +515,7 @@ class ExtTSPImpl {
502515
AllEdges.reserve(AllJumps.size());
503516
for (auto &Block : AllBlocks) {
504517
for (auto &Jump : Block.OutJumps) {
505-
const auto SuccBlock = Jump->Target;
518+
auto SuccBlock = Jump->Target;
506519
auto CurEdge = Block.CurChain->getEdge(SuccBlock->CurChain);
507520
// this edge is already present in the graph
508521
if (CurEdge != nullptr) {
@@ -592,6 +605,10 @@ class ExtTSPImpl {
592605
if (ChainPred == ChainSucc)
593606
continue;
594607

608+
// Stop early if the combined chain violates the maximum allowed size
609+
if (ChainPred->numBlocks() + ChainSucc->numBlocks() >= MaxChainSize)
610+
continue;
611+
595612
// Compute the gain of merging the two chains
596613
auto CurGain = getBestMergeGain(ChainPred, ChainSucc, ChainEdge);
597614
if (CurGain.score() <= EPS)

0 commit comments

Comments
 (0)