Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit 653861f

Browse files
author
Evandro Menezes
committed
Add support to optionally limit the size of jump tables.
Many high-performance processors have a dedicated branch predictor for indirect branches, commonly used with jump tables. As sophisticated as such branch predictors are, they tend to have well defined limits beyond which their effectiveness is hampered or even nullified. One such limit is the number of possible destinations for a given indirect branches that such branch predictors can handle. This patch considers a limit that a target may set to the number of destination addresses in a jump table. Patch by: Evandro Menezes <[email protected]>, Aditya Kumar <[email protected]>, Sebastian Pop <[email protected]>. Differential revision: https://reviews.llvm.org/D21940 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@282412 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 3455264 commit 653861f

File tree

7 files changed

+154
-18
lines changed

7 files changed

+154
-18
lines changed

include/llvm/Target/TargetLowering.h

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1022,12 +1022,15 @@ class TargetLoweringBase {
10221022
return UseUnderscoreLongJmp;
10231023
}
10241024

1025-
/// Return integer threshold on number of blocks to use jump tables rather
1026-
/// than if sequence.
1027-
int getMinimumJumpTableEntries() const {
1025+
/// Return lower limit for number of blocks in a jump table.
1026+
unsigned getMinimumJumpTableEntries() const {
10281027
return MinimumJumpTableEntries;
10291028
}
10301029

1030+
/// Return upper limit for number of entries in a jump table.
1031+
/// Zero if no limit.
1032+
unsigned getMaximumJumpTableSize() const;
1033+
10311034
/// If a physical register, this specifies the register that
10321035
/// llvm.savestack/llvm.restorestack should save and restore.
10331036
unsigned getStackPointerRegisterToSaveRestore() const {
@@ -1353,12 +1356,15 @@ class TargetLoweringBase {
13531356
UseUnderscoreLongJmp = Val;
13541357
}
13551358

1356-
/// Indicate the number of blocks to generate jump tables rather than if
1357-
/// sequence.
1358-
void setMinimumJumpTableEntries(int Val) {
1359+
/// Indicate the minimum number of blocks to generate jump tables.
1360+
void setMinimumJumpTableEntries(unsigned Val) {
13591361
MinimumJumpTableEntries = Val;
13601362
}
13611363

1364+
/// Indicate the maximum number of entries in jump tables.
1365+
/// Set to zero to generate unlimited jump tables.
1366+
void setMaximumJumpTableSize(unsigned);
1367+
13621368
/// If set to a physical register, this specifies the register that
13631369
/// llvm.savestack/llvm.restorestack should save and restore.
13641370
void setStackPointerRegisterToSaveRestore(unsigned R) {

lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8454,12 +8454,19 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
84548454
if (!areJTsAllowed(TLI, SI))
84558455
return;
84568456

8457+
const bool OptForSize = DefaultMBB->getParent()->getFunction()->optForSize();
8458+
84578459
const int64_t N = Clusters.size();
8458-
const unsigned MinJumpTableSize = TLI.getMinimumJumpTableEntries();
8460+
const unsigned MinJumpTableEntries = TLI.getMinimumJumpTableEntries();
8461+
const unsigned MaxJumpTableSize =
8462+
OptForSize ? UINT_MAX : TLI.getMaximumJumpTableSize() ?
8463+
TLI.getMaximumJumpTableSize() : UINT_MAX;
8464+
8465+
if (N < 2 || N < MinJumpTableEntries)
8466+
return;
84598467

84608468
// TotalCases[i]: Total nbr of cases in Clusters[0..i].
84618469
SmallVector<unsigned, 8> TotalCases(N);
8462-
84638470
for (unsigned i = 0; i < N; ++i) {
84648471
const APInt &Hi = Clusters[i].High->getValue();
84658472
const APInt &Lo = Clusters[i].Low->getValue();
@@ -8468,12 +8475,16 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
84688475
TotalCases[i] += TotalCases[i - 1];
84698476
}
84708477

8471-
unsigned MinDensity = JumpTableDensity;
8472-
if (DefaultMBB->getParent()->getFunction()->optForSize())
8473-
MinDensity = OptsizeJumpTableDensity;
8474-
if (N >= MinJumpTableSize
8475-
&& isDense(Clusters, TotalCases, 0, N - 1, MinDensity)) {
8476-
// Cheap case: the whole range might be suitable for jump table.
8478+
const unsigned MinDensity =
8479+
OptForSize ? OptsizeJumpTableDensity : JumpTableDensity;
8480+
8481+
// Cheap case: the whole range may be suitable for jump table.
8482+
unsigned JumpTableSize = (Clusters[N - 1].High->getValue() -
8483+
Clusters[0].Low->getValue())
8484+
.getLimitedValue(UINT_MAX - 1) + 1;
8485+
if (JumpTableSize <= MaxJumpTableSize &&
8486+
isDense(Clusters, TotalCases, 0, N - 1, MinDensity)) {
8487+
84778488
CaseCluster JTCluster;
84788489
if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) {
84798490
Clusters[0] = JTCluster;
@@ -8503,7 +8514,6 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
85038514
// Base case: There is only one way to partition Clusters[N-1].
85048515
MinPartitions[N - 1] = 1;
85058516
LastElement[N - 1] = N - 1;
8506-
assert(MinJumpTableSize > 1);
85078517
NumTables[N - 1] = 0;
85088518

85098519
// Note: loop indexes are signed to avoid underflow.
@@ -8517,9 +8527,13 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
85178527
// Search for a solution that results in fewer partitions.
85188528
for (int64_t j = N - 1; j > i; j--) {
85198529
// Try building a partition from Clusters[i..j].
8520-
if (isDense(Clusters, TotalCases, i, j, MinDensity)) {
8530+
JumpTableSize = (Clusters[j].High->getValue() -
8531+
Clusters[i].Low->getValue())
8532+
.getLimitedValue(UINT_MAX - 1) + 1;
8533+
if (JumpTableSize <= MaxJumpTableSize &&
8534+
isDense(Clusters, TotalCases, i, j, MinDensity)) {
85218535
unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
8522-
bool IsTable = j - i + 1 >= MinJumpTableSize;
8536+
bool IsTable = j - i + 1 >= MinJumpTableEntries;
85238537
unsigned Tables = IsTable + (j == N - 1 ? 0 : NumTables[j + 1]);
85248538

85258539
// If this j leads to fewer partitions, or same number of partitions
@@ -8543,7 +8557,7 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
85438557
unsigned NumClusters = Last - First + 1;
85448558

85458559
CaseCluster JTCluster;
8546-
if (NumClusters >= MinJumpTableSize &&
8560+
if (NumClusters >= MinJumpTableEntries &&
85478561
buildJumpTable(Clusters, First, Last, SI, DefaultMBB, JTCluster)) {
85488562
Clusters[DstIndex++] = JTCluster;
85498563
} else {

lib/CodeGen/TargetLoweringBase.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,10 @@ static cl::opt<bool> JumpIsExpensiveOverride(
4444
cl::desc("Do not create extra branches to split comparison logic."),
4545
cl::Hidden);
4646

47+
static cl::opt<unsigned> MaximumJumpTableSize
48+
("max-jump-table", cl::init(0), cl::Hidden,
49+
cl::desc("Set maximum number of jump table entries; zero for no limit."));
50+
4751
// Although this default value is arbitrary, it is not random. It is assumed
4852
// that a condition that evaluates the same way by a higher percentage than this
4953
// is best represented as control flow. Therefore, the default value N should be
@@ -1831,3 +1835,11 @@ Value *TargetLoweringBase::getSDagStackGuard(const Module &M) const {
18311835
Value *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const {
18321836
return nullptr;
18331837
}
1838+
1839+
unsigned TargetLoweringBase::getMaximumJumpTableSize() const {
1840+
return MaximumJumpTableSize;
1841+
}
1842+
1843+
void TargetLoweringBase::setMaximumJumpTableSize(unsigned Val) {
1844+
MaximumJumpTableSize = Val;
1845+
}

lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -513,6 +513,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
513513
setPrefFunctionAlignment(STI.getPrefFunctionAlignment());
514514
setPrefLoopAlignment(STI.getPrefLoopAlignment());
515515

516+
// Only change the limit for entries in a jump table if specified by
517+
// the subtarget, but not at the command line.
518+
unsigned MaxJT = STI.getMaximumJumpTableSize();
519+
if (MaxJT && getMaximumJumpTableSize() == 0)
520+
setMaximumJumpTableSize(MaxJT);
521+
516522
setHasExtractBitsInsn(true);
517523

518524
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);

lib/Target/AArch64/AArch64Subtarget.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ void AArch64Subtarget::initializeProperties() {
6565
case ExynosM1:
6666
PrefFunctionAlignment = 4;
6767
PrefLoopAlignment = 3;
68+
MaxJumpTableSize = 12;
6869
break;
6970
case Kryo:
7071
MaxInterleaveFactor = 4;

lib/Target/AArch64/AArch64Subtarget.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
9090
unsigned MaxPrefetchIterationsAhead = UINT_MAX;
9191
unsigned PrefFunctionAlignment = 0;
9292
unsigned PrefLoopAlignment = 0;
93+
unsigned MaxJumpTableSize = 0;
9394

9495
// ReserveX18 - X18 is not available as a general purpose register.
9596
bool ReserveX18;
@@ -203,6 +204,8 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
203204
unsigned getPrefFunctionAlignment() const { return PrefFunctionAlignment; }
204205
unsigned getPrefLoopAlignment() const { return PrefLoopAlignment; }
205206

207+
unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; }
208+
206209
/// CPU has TBI (top byte of addresses is ignored during HW address
207210
/// translation) and OS enables it.
208211
bool supportsAddressTopByteIgnored() const;
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
; RUN: llc %s -O2 -print-machineinstrs -march=aarch64 -jump-table-density=40 -o - 2>%t; FileCheck %s --check-prefixes=CHECK,CHECK0 <%t
2+
; RUN: llc %s -O2 -print-machineinstrs -march=aarch64 -jump-table-density=40 -max-jump-table=4 -o - 2>%t; FileCheck %s --check-prefixes=CHECK,CHECK4 <%t
3+
; RUN: llc %s -O2 -print-machineinstrs -march=aarch64 -jump-table-density=40 -max-jump-table=8 -o - 2>%t; FileCheck %s --check-prefixes=CHECK,CHECK8 <%t
4+
; RUN: llc %s -O2 -print-machineinstrs -march=aarch64 -jump-table-density=40 -mcpu=exynos-m1 -o - 2>%t; FileCheck %s --check-prefixes=CHECK,CHECKM1 <%t
5+
6+
declare void @ext(i32)
7+
8+
define i32 @jt1(i32 %a, i32 %b) {
9+
entry:
10+
switch i32 %a, label %return [
11+
i32 1, label %bb1
12+
i32 2, label %bb2
13+
i32 3, label %bb3
14+
i32 4, label %bb4
15+
i32 5, label %bb5
16+
i32 6, label %bb6
17+
i32 7, label %bb7
18+
i32 8, label %bb8
19+
i32 9, label %bb9
20+
i32 10, label %bb10
21+
i32 11, label %bb11
22+
i32 12, label %bb12
23+
i32 13, label %bb13
24+
i32 14, label %bb14
25+
i32 15, label %bb15
26+
i32 16, label %bb16
27+
i32 17, label %bb17
28+
]
29+
; CHECK-LABEL: function jt1:
30+
; CHECK: Jump Tables:
31+
; CHECK0-NEXT: jt#0:
32+
; CHECK0-NOT: jt#1:
33+
; CHECK4-NEXT: jt#0:
34+
; CHECK4-SAME: jt#1:
35+
; CHECK4-SAME: jt#2:
36+
; CHECK4-SAME: jt#3:
37+
; CHECK4-NOT: jt#4:
38+
; CHECK8-NEXT: jt#0:
39+
; CHECK8-SAME: jt#1:
40+
; CHECK8-SAME: jt#2: BB#14 BB#15 BB#16 BB#17{{$}}
41+
; CHECK8-NOT: jt#3:
42+
; CHECKM1-NEXT: jt#0:
43+
; CHECKM1-SAME: jt#1: BB#13 BB#14 BB#15 BB#16 BB#17{{$}}
44+
; CHECKM1-NOT: jt#2:
45+
; CHEC-NEXT: Function Live Ins:
46+
47+
bb1: tail call void @ext(i32 0) br label %return
48+
bb2: tail call void @ext(i32 2) br label %return
49+
bb3: tail call void @ext(i32 4) br label %return
50+
bb4: tail call void @ext(i32 6) br label %return
51+
bb5: tail call void @ext(i32 8) br label %return
52+
bb6: tail call void @ext(i32 10) br label %return
53+
bb7: tail call void @ext(i32 12) br label %return
54+
bb8: tail call void @ext(i32 14) br label %return
55+
bb9: tail call void @ext(i32 16) br label %return
56+
bb10: tail call void @ext(i32 18) br label %return
57+
bb11: tail call void @ext(i32 20) br label %return
58+
bb12: tail call void @ext(i32 22) br label %return
59+
bb13: tail call void @ext(i32 24) br label %return
60+
bb14: tail call void @ext(i32 26) br label %return
61+
bb15: tail call void @ext(i32 28) br label %return
62+
bb16: tail call void @ext(i32 30) br label %return
63+
bb17: tail call void @ext(i32 32) br label %return
64+
65+
return: ret i32 %b
66+
}
67+
68+
define void @jt2(i32 %x) {
69+
entry:
70+
switch i32 %x, label %return [
71+
i32 1, label %bb1
72+
i32 2, label %bb2
73+
i32 3, label %bb3
74+
i32 4, label %bb4
75+
76+
i32 14, label %bb5
77+
i32 15, label %bb6
78+
]
79+
; CHECK-LABEL: function jt2:
80+
; CHECK: Jump Tables:
81+
; CHECK0-NEXT: jt#0: BB#1 BB#2 BB#3 BB#4 BB#7 BB#7 BB#7 BB#7 BB#7 BB#7 BB#7 BB#7 BB#7 BB#5 BB#6{{$}}
82+
; CHECK4-NEXT: jt#0: BB#1 BB#2 BB#3 BB#4{{$}}
83+
; CHECK8-NEXT: jt#0: BB#1 BB#2 BB#3 BB#4{{$}}
84+
; CHECKM1-NEXT: jt#0: BB#1 BB#2 BB#3 BB#4{{$}}
85+
; CHEC-NEXT: Function Live Ins:
86+
87+
bb1: tail call void @ext(i32 1) br label %return
88+
bb2: tail call void @ext(i32 2) br label %return
89+
bb3: tail call void @ext(i32 3) br label %return
90+
bb4: tail call void @ext(i32 4) br label %return
91+
bb5: tail call void @ext(i32 5) br label %return
92+
bb6: tail call void @ext(i32 6) br label %return
93+
return: ret void
94+
}

0 commit comments

Comments
 (0)