Skip to content

Commit 5bae81b

Browse files
authored
[CodeGen] Add an option to skip extTSP BB placement for huge functions. (#99310)
The extTSP-based basic block layout algorithm improves the performance of the generated code, but unfortunately it has a super-linear time complexity. This leads to extremely long compilation times for certain relatively rare kinds of autogenerated code. This patch adds an `-mllvm` flag to optionally restrict extTSP only to functions smaller than a specified threshold. While commit bcdc047 added a knob to to limit the maximum chain size, it's still possible that for certain huge functions the number of chains is very large, leading to a quadratic behaviour in ExtTSPImpl::mergeChainPairs.
1 parent 0c03b4c commit 5bae81b

File tree

2 files changed

+23
-1
lines changed

2 files changed

+23
-1
lines changed

llvm/lib/CodeGen/MachineBlockPlacement.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,12 @@ static cl::opt<bool> RenumberBlocksBeforeView(
213213
"into a dot graph. Only used when a function is being printed."),
214214
cl::init(false), cl::Hidden);
215215

216+
static cl::opt<unsigned> ExtTspBlockPlacementMaxBlocks(
217+
"ext-tsp-block-placement-max-blocks",
218+
cl::desc("Maximum number of basic blocks in a function to run ext-TSP "
219+
"block placement."),
220+
cl::init(UINT_MAX), cl::Hidden);
221+
216222
namespace llvm {
217223
extern cl::opt<bool> EnableExtTspBlockPlacement;
218224
extern cl::opt<bool> ApplyExtTspWithoutProfile;
@@ -3523,7 +3529,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
35233529

35243530
// Apply a post-processing optimizing block placement.
35253531
if (MF.size() >= 3 && EnableExtTspBlockPlacement &&
3526-
(ApplyExtTspWithoutProfile || MF.getFunction().hasProfileData())) {
3532+
(ApplyExtTspWithoutProfile || MF.getFunction().hasProfileData()) &&
3533+
MF.size() <= ExtTspBlockPlacementMaxBlocks) {
35273534
// Find a new placement and modify the layout of the blocks in the function.
35283535
applyExtTsp();
35293536

llvm/test/CodeGen/X86/code_placement_ext_tsp_large.ll

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux -enable-ext-tsp-block-placement=1 -ext-tsp-chain-split-threshold=128 -debug-only=block-placement < %s 2>&1 | FileCheck %s
33
; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux -enable-ext-tsp-block-placement=1 -ext-tsp-chain-split-threshold=1 -debug-only=block-placement < %s 2>&1 | FileCheck %s -check-prefix=CHECK2
44
; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux -enable-ext-tsp-block-placement=0 -debug-only=block-placement < %s 2>&1 | FileCheck %s -check-prefix=CHECK3
5+
; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux -enable-ext-tsp-block-placement=1 -ext-tsp-block-placement-max-blocks=8 -debug-only=block-placement < %s 2>&1 | FileCheck %s -check-prefix=CHECK4
56

67
@yydebug = dso_local global i32 0, align 4
78

@@ -110,6 +111,20 @@ define void @func_large() !prof !0 {
110111
; CHECK3: b7
111112
; CHECK3: b8
112113
; CHECK3: b9
114+
;
115+
; An expected output with function size larger than the threshold -- the layout is not modified:
116+
;
117+
; CHECK4-LABEL: func_large:
118+
; CHECK4: b0
119+
; CHECK4: b1
120+
; CHECK4: b2
121+
; CHECK4: b3
122+
; CHECK4: b4
123+
; CHECK4: b5
124+
; CHECK4: b6
125+
; CHECK4: b7
126+
; CHECK4: b8
127+
; CHECK4: b9
113128

114129
b0:
115130
%0 = load i32, ptr @yydebug, align 4

0 commit comments

Comments
 (0)