Skip to content

Commit bdc0afc

Browse files
authored
[CodeGen][AArch64] Set min jump table entries to 13 for AArch64 targets (#71166)
There are some workloads that are negatively impacted by using jump tables when the number of entries is small. The SPEC2017 perlbench benchmark is one example of this, where increasing the threshold to around 13 gives a ~1.5% improvement on neoverse-v1. I chose the minimum threshold based on empirical evidence rather than science, and just manually increased the threshold until I got the best performance without impacting other workloads. For neoverse-v1 I saw around ~0.2% improvement in the SPEC2017 integer geomean, and no overall change for neoverse-n1. If we find issues with this threshold later on we can always revisit this. The most significant SPEC2017 score changes on neoverse-v1 were: 500.perlbench_r: +1.6% 520.omnetpp_r: +0.6% and the rest saw changes < 0.5%. I updated CodeGen/AArch64/min-jump-table.ll to reflect the new threshold. For most of the affected tests I manually set the min number of entries back to 4 on the RUN line because the tests seem to rely upon this behaviour.
1 parent 074e4ae commit bdc0afc

21 files changed

+219
-64
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26603,3 +26603,7 @@ bool AArch64TargetLowering::preferScalarizeSplat(SDNode *N) const {
2660326603
}
2660426604
return true;
2660526605
}
26606+
26607+
unsigned AArch64TargetLowering::getMinimumJumpTableEntries() const {
26608+
return Subtarget->getMinimumJumpTableEntries();
26609+
}

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1252,6 +1252,8 @@ class AArch64TargetLowering : public TargetLowering {
12521252
SDLoc DL, EVT VT) const;
12531253

12541254
bool preferScalarizeSplat(SDNode *N) const override;
1255+
1256+
unsigned getMinimumJumpTableEntries() const override;
12551257
};
12561258

12571259
namespace AArch64 {

llvm/lib/Target/AArch64/AArch64Subtarget.cpp

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -77,14 +77,19 @@ static cl::opt<AArch64PAuth::AuthCheckMethod>
7777
"to authenticated LR during tail call"),
7878
cl::values(AUTH_CHECK_METHOD_CL_VALUES_LR));
7979

80+
static cl::opt<unsigned> AArch64MinimumJumpTableEntries(
81+
"aarch64-min-jump-table-entries", cl::init(13), cl::Hidden,
82+
cl::desc("Set minimum number of entries to use a jump table on AArch64"));
83+
8084
unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const {
8185
if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > 0)
8286
return OverrideVectorInsertExtractBaseCost;
8387
return VectorInsertExtractBaseCost;
8488
}
8589

8690
AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies(
87-
StringRef FS, StringRef CPUString, StringRef TuneCPUString) {
91+
StringRef FS, StringRef CPUString, StringRef TuneCPUString,
92+
bool HasMinSize) {
8893
// Determine default and user-specified characteristics
8994

9095
if (CPUString.empty())
@@ -94,12 +99,12 @@ AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies(
9499
TuneCPUString = CPUString;
95100

96101
ParseSubtargetFeatures(CPUString, TuneCPUString, FS);
97-
initializeProperties();
102+
initializeProperties(HasMinSize);
98103

99104
return *this;
100105
}
101106

102-
void AArch64Subtarget::initializeProperties() {
107+
void AArch64Subtarget::initializeProperties(bool HasMinSize) {
103108
// Initialize CPU specific properties. We should add a tablegen feature for
104109
// this in the future so we can specify it together with the subtarget
105110
// features.
@@ -292,6 +297,9 @@ void AArch64Subtarget::initializeProperties() {
292297
MaxInterleaveFactor = 4;
293298
break;
294299
}
300+
301+
if (AArch64MinimumJumpTableEntries.getNumOccurrences() > 0 || !HasMinSize)
302+
MinimumJumpTableEntries = AArch64MinimumJumpTableEntries;
295303
}
296304

297305
AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
@@ -300,17 +308,17 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
300308
unsigned MinSVEVectorSizeInBitsOverride,
301309
unsigned MaxSVEVectorSizeInBitsOverride,
302310
bool StreamingSVEMode,
303-
bool StreamingCompatibleSVEMode)
311+
bool StreamingCompatibleSVEMode,
312+
bool HasMinSize)
304313
: AArch64GenSubtargetInfo(TT, CPU, TuneCPU, FS),
305314
ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
306315
ReserveXRegisterForRA(AArch64::GPR64commonRegClass.getNumRegs()),
307316
CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
308-
IsLittle(LittleEndian),
309-
StreamingSVEMode(StreamingSVEMode),
317+
IsLittle(LittleEndian), StreamingSVEMode(StreamingSVEMode),
310318
StreamingCompatibleSVEMode(StreamingCompatibleSVEMode),
311319
MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
312320
MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),
313-
InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU)),
321+
InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU, HasMinSize)),
314322
TLInfo(TM, *this) {
315323
if (AArch64::isX18ReservedByDefault(TT))
316324
ReserveXRegister.set(18);

llvm/lib/Target/AArch64/AArch64Subtarget.h

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
112112
Align PrefFunctionAlignment;
113113
Align PrefLoopAlignment;
114114
unsigned MaxBytesForLoopAlignment = 0;
115+
unsigned MinimumJumpTableEntries = 4;
115116
unsigned MaxJumpTableSize = 0;
116117

117118
// ReserveXRegister[i] - X#i is not available as a general purpose register.
@@ -153,10 +154,11 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
153154
/// subtarget initialization.
154155
AArch64Subtarget &initializeSubtargetDependencies(StringRef FS,
155156
StringRef CPUString,
156-
StringRef TuneCPUString);
157+
StringRef TuneCPUString,
158+
bool HasMinSize);
157159

158160
/// Initialize properties based on the selected processor family.
159-
void initializeProperties();
161+
void initializeProperties(bool HasMinSize);
160162

161163
public:
162164
/// This constructor initializes the data members to match that
@@ -166,7 +168,8 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
166168
unsigned MinSVEVectorSizeInBitsOverride = 0,
167169
unsigned MaxSVEVectorSizeInBitsOverride = 0,
168170
bool StreamingSVEMode = false,
169-
bool StreamingCompatibleSVEMode = false);
171+
bool StreamingCompatibleSVEMode = false,
172+
bool HasMinSize = false);
170173

171174
// Getters for SubtargetFeatures defined in tablegen
172175
#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
@@ -274,6 +277,9 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
274277
}
275278

276279
unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; }
280+
unsigned getMinimumJumpTableEntries() const {
281+
return MinimumJumpTableEntries;
282+
}
277283

278284
/// CPU has TBI (top byte of addresses is ignored during HW address
279285
/// translation) and OS enables it.

llvm/lib/Target/AArch64/AArch64TargetMachine.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,7 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
397397
StringRef CPU = CPUAttr.isValid() ? CPUAttr.getValueAsString() : TargetCPU;
398398
StringRef TuneCPU = TuneAttr.isValid() ? TuneAttr.getValueAsString() : CPU;
399399
StringRef FS = FSAttr.isValid() ? FSAttr.getValueAsString() : TargetFS;
400+
bool HasMinSize = F.hasMinSize();
400401

401402
bool StreamingSVEMode = F.hasFnAttribute("aarch64_pstate_sm_enabled") ||
402403
F.hasFnAttribute("aarch64_pstate_sm_body");
@@ -432,8 +433,8 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
432433
<< MaxSVEVectorSize
433434
<< "StreamingSVEMode=" << StreamingSVEMode
434435
<< "StreamingCompatibleSVEMode="
435-
<< StreamingCompatibleSVEMode << CPU << TuneCPU
436-
<< FS;
436+
<< StreamingCompatibleSVEMode << CPU << TuneCPU << FS
437+
<< "HasMinSize=" << HasMinSize;
437438

438439
auto &I = SubtargetMap[Key];
439440
if (!I) {
@@ -443,7 +444,8 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
443444
resetTargetOptions(F);
444445
I = std::make_unique<AArch64Subtarget>(
445446
TargetTriple, CPU, TuneCPU, FS, *this, isLittle, MinSVEVectorSize,
446-
MaxSVEVectorSize, StreamingSVEMode, StreamingCompatibleSVEMode);
447+
MaxSVEVectorSize, StreamingSVEMode, StreamingCompatibleSVEMode,
448+
HasMinSize);
447449
}
448450

449451
assert((!StreamingSVEMode || I->hasSME()) &&

llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-switch.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
; RUN: llc -global-isel -mtriple aarch64 -O0 -aarch64-enable-atomic-cfg-tidy=0 -stop-after=irtranslator -simplify-mir -verify-machineinstrs %s -o - 2>&1 | FileCheck %s
2+
; RUN: llc -global-isel -mtriple aarch64 -aarch64-min-jump-table-entries=4 -O0 -aarch64-enable-atomic-cfg-tidy=0 -stop-after=irtranslator -simplify-mir -verify-machineinstrs %s -o - 2>&1 | FileCheck %s
33

44
define i32 @switch(i32 %argc) {
55
; CHECK-LABEL: name: switch

llvm/test/CodeGen/AArch64/arm64-jumptable.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
; RUN: llc -mtriple=arm64-apple-ios < %s | FileCheck %s
2-
; RUN: llc -mtriple=arm64-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-LINUX
1+
; RUN: llc -mtriple=arm64-apple-ios -aarch64-min-jump-table-entries=4 < %s | FileCheck %s
2+
; RUN: llc -mtriple=arm64-linux-gnu -aarch64-min-jump-table-entries=4 < %s | FileCheck %s --check-prefix=CHECK-LINUX
33
; <rdar://11417675>
44

55
define void @sum(i32 %a, ptr %to, i32 %c) {

llvm/test/CodeGen/AArch64/bti-branch-relaxation.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: llc %s -o - | FileCheck %s
1+
; RUN: llc %s -aarch64-min-jump-table-entries=4 -o - | FileCheck %s
22
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
33
target triple = "aarch64"
44

llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2-
; RUN: llc -mtriple=arm64-apple-ios < %s | FileCheck %s
2+
; RUN: llc -aarch64-min-jump-table-entries=4 -mtriple=arm64-apple-ios < %s | FileCheck %s
33

44
; Check there's no assert in spilling from implicit-def operands on an
55
; IMPLICIT_DEF.

llvm/test/CodeGen/AArch64/jump-table-32.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64_32-apple-ios7.0 -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s
1+
; RUN: llc -verify-machineinstrs -o - %s -aarch64-min-jump-table-entries=4 -mtriple=arm64_32-apple-ios7.0 -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s
22

33
define i32 @test_jumptable(i32 %in) {
44
; CHECK: test_jumptable

llvm/test/CodeGen/AArch64/jump-table-exynos.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
; RUN: llc -o - %s -mtriple=aarch64-none-linux-gnu -mattr=+force-32bit-jump-tables -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s
2-
; RUN: llc -o - %s -mtriple=aarch64-none-linux-gnu -mcpu=exynos-m3 -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s
1+
; RUN: llc -o - %s -mtriple=aarch64-none-linux-gnu -mattr=+force-32bit-jump-tables -aarch64-min-jump-table-entries=4 -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s
2+
; RUN: llc -o - %s -mtriple=aarch64-none-linux-gnu -mcpu=exynos-m3 -aarch64-min-jump-table-entries=4 -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s
33

44
; Exynos doesn't want jump tables to be compressed for now.
55

llvm/test/CodeGen/AArch64/jump-table.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
; RUN: llc -no-integrated-as -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s
2-
; RUN: llc -no-integrated-as -code-model=large -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-LARGE %s
3-
; RUN: llc -no-integrated-as -code-model=large -relocation-model=pic -o - %s -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-PIC %s
4-
; RUN: llc -no-integrated-as -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic -aarch64-enable-atomic-cfg-tidy=0 -o - %s | FileCheck --check-prefix=CHECK-PIC %s
5-
; RUN: llc -no-integrated-as -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios -aarch64-enable-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-IOS %s
6-
; RUN: llc -no-integrated-as -code-model=tiny -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-TINY %s
1+
; RUN: llc -no-integrated-as -verify-machineinstrs -o - %s -aarch64-min-jump-table-entries=4 -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s
2+
; RUN: llc -no-integrated-as -code-model=large -verify-machineinstrs -o - %s -aarch64-min-jump-table-entries=4 -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-LARGE %s
3+
; RUN: llc -no-integrated-as -code-model=large -relocation-model=pic -o - %s -aarch64-min-jump-table-entries=4 -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-PIC %s
4+
; RUN: llc -no-integrated-as -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic -aarch64-min-jump-table-entries=4 -aarch64-enable-atomic-cfg-tidy=0 -o - %s | FileCheck --check-prefix=CHECK-PIC %s
5+
; RUN: llc -no-integrated-as -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios -aarch64-min-jump-table-entries=4 -aarch64-enable-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-IOS %s
6+
; RUN: llc -no-integrated-as -code-model=tiny -verify-machineinstrs -o - %s -aarch64-min-jump-table-entries=4 -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-TINY %s
77

88
define i32 @test_jumptable(i32 %in) {
99
; CHECK: test_jumptable

llvm/test/CodeGen/AArch64/max-jump-table.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -jump-table-density=40 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK0 < %t
2-
; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -jump-table-density=40 -max-jump-table-size=4 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK4 < %t
3-
; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -jump-table-density=40 -max-jump-table-size=8 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK8 < %t
4-
; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -jump-table-density=40 -max-jump-table-size=16 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK16 < %t
5-
; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -jump-table-density=40 -mcpu=exynos-m3 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECKM3 < %t
1+
; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -aarch64-min-jump-table-entries=4 -jump-table-density=40 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK0 < %t
2+
; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -aarch64-min-jump-table-entries=4 -jump-table-density=40 -max-jump-table-size=4 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK4 < %t
3+
; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -aarch64-min-jump-table-entries=4 -jump-table-density=40 -max-jump-table-size=8 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK8 < %t
4+
; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -aarch64-min-jump-table-entries=4 -jump-table-density=40 -max-jump-table-size=16 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK16 < %t
5+
; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -aarch64-min-jump-table-entries=4 -jump-table-density=40 -mcpu=exynos-m3 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECKM3 < %t
66

77
declare void @ext(i32, i32)
88

0 commit comments

Comments
 (0)