Skip to content

Commit f6ddf77

Browse files
committed
[LowerTypeTests] Support generating Armv6-M jump tables.
The LowerTypeTests pass emits a jump table in the form of an `inlineasm` IR node containing a string representation of some assembly. It tests the target triple to see what architecture it should be generating assembly for. But that's not good enough for `Triple::thumb`, because the 32-bit PC-relative `b.w` branch instruction isn't available in all supported architecture versions. In particular, Armv6-M doesn't support that instruction (although the similar Armv8-M Baseline does). Most of this patch is concerned with working out whether the compilation target is Armv6-M or not, which I'm doing by going through all the functions in the module, retrieving a TargetTransformInfo for each one, and querying it via a new method I've added to check its SubtargetInfo. If any function's TTI indicates that it's targeting an architecture supporting B.W, then we assume we're also allowed to use B.W in the jump table. The Armv6-M compatible jump table format requires a temporary register, and therefore also has to use the stack in order to restore that register. Another consequence of this change is that jump tables on Arm/Thumb are no longer always the same size. In particular, on an architecture that supports Arm and Thumb-1 but not Thumb-2, the Arm and Thumb tables are different sizes from //each other//. As a consequence, ``getJumpTableEntrySize`` can no longer base its answer on the target triple's architecture: it has to take into account the decision that ``selectJumpTableArmEncoding`` made, which meant I had to move that function to an earlier point in the code and store its answer in the ``LowerTypeTestsModule`` class. Reviewed By: lenary Differential Revision: https://reviews.llvm.org/D143576
1 parent afde3f5 commit f6ddf77

File tree

8 files changed

+170
-43
lines changed

8 files changed

+170
-43
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1570,6 +1570,17 @@ class TargetTransformInfo {
15701570
VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const;
15711571
/// @}
15721572

1573+
/// \returns Whether a 32-bit branch instruction is available in Arm or Thumb
1574+
/// state.
1575+
///
1576+
/// Used by the LowerTypeTests pass, which constructs an IR inline assembler
1577+
/// node containing a jump table in a format suitable for the target, so it
1578+
/// needs to know what format of jump table it can legally use.
1579+
///
1580+
/// For non-Arm targets, this function isn't used. It defaults to returning
1581+
/// false, but it shouldn't matter what it returns anyway.
1582+
bool hasArmWideBranch(bool Thumb) const;
1583+
15731584
/// @}
15741585

15751586
private:
@@ -1927,6 +1938,7 @@ class TargetTransformInfo::Concept {
19271938
Align Alignment) const = 0;
19281939
virtual VPLegalization
19291940
getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0;
1941+
virtual bool hasArmWideBranch(bool Thumb) const = 0;
19301942
};
19311943

19321944
template <typename T>
@@ -2606,6 +2618,10 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
26062618
getVPLegalizationStrategy(const VPIntrinsic &PI) const override {
26072619
return Impl.getVPLegalizationStrategy(PI);
26082620
}
2621+
2622+
bool hasArmWideBranch(bool Thumb) const override {
2623+
return Impl.hasArmWideBranch(Thumb);
2624+
}
26092625
};
26102626

26112627
template <typename T>

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -862,6 +862,8 @@ class TargetTransformInfoImplBase {
862862
/* OperatorStrategy */ TargetTransformInfo::VPLegalization::Convert);
863863
}
864864

865+
bool hasArmWideBranch(bool) const { return false; }
866+
865867
protected:
866868
// Obtain the minimum required size to hold the value (without the sign)
867869
// In case of a vector it returns the min required size for one element.

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1170,6 +1170,10 @@ TargetTransformInfo::getVPLegalizationStrategy(const VPIntrinsic &VPI) const {
11701170
return TTIImpl->getVPLegalizationStrategy(VPI);
11711171
}
11721172

1173+
bool TargetTransformInfo::hasArmWideBranch(bool Thumb) const {
1174+
return TTIImpl->hasArmWideBranch(Thumb);
1175+
}
1176+
11731177
bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const {
11741178
return TTIImpl->shouldExpandReduction(II);
11751179
}

llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2441,3 +2441,16 @@ InstructionCost ARMTTIImpl::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
24412441
}
24422442
return -1;
24432443
}
2444+
2445+
bool ARMTTIImpl::hasArmWideBranch(bool Thumb) const {
2446+
if (Thumb) {
2447+
// B.W is available in any Thumb2-supporting target, and also in every
2448+
// version of Armv8-M, even Baseline which does not include the rest of
2449+
// Thumb2.
2450+
return ST->isThumb2() || ST->hasV8MBaselineOps();
2451+
} else {
2452+
// B is available in all versions of the Arm ISA, so the only question is
2453+
// whether that ISA is available at all.
2454+
return ST->hasARMOps();
2455+
}
2456+
}

llvm/lib/Target/ARM/ARMTargetTransformInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,9 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
325325

326326
return true;
327327
}
328+
329+
bool hasArmWideBranch(bool Thumb) const;
330+
328331
/// @}
329332
};
330333

llvm/lib/Transforms/IPO/LowerTypeTests.cpp

Lines changed: 106 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "llvm/ADT/Statistic.h"
2525
#include "llvm/ADT/StringRef.h"
2626
#include "llvm/ADT/TinyPtrVector.h"
27+
#include "llvm/Analysis/TargetTransformInfo.h"
2728
#include "llvm/Analysis/TypeMetadataUtils.h"
2829
#include "llvm/Analysis/ValueTracking.h"
2930
#include "llvm/IR/Attributes.h"
@@ -406,6 +407,15 @@ class LowerTypeTestsModule {
406407
Triple::OSType OS;
407408
Triple::ObjectFormatType ObjectFormat;
408409

410+
// Determines which kind of Thumb jump table we generate. If arch is
411+
// either 'arm' or 'thumb' we need to find this out, because
412+
// selectJumpTableArmEncoding may decide to use Thumb in either case.
413+
bool CanUseArmJumpTable = false, CanUseThumbBWJumpTable = false;
414+
415+
// The jump table type we ended up deciding on. (Usually the same as
416+
// Arch, except that 'arm' and 'thumb' are often interchangeable.)
417+
Triple::ArchType JumpTableArch = Triple::UnknownArch;
418+
409419
IntegerType *Int1Ty = Type::getInt1Ty(M.getContext());
410420
IntegerType *Int8Ty = Type::getInt8Ty(M.getContext());
411421
PointerType *Int8PtrTy = Type::getInt8PtrTy(M.getContext());
@@ -481,6 +491,8 @@ class LowerTypeTestsModule {
481491

482492
void buildBitSetsFromGlobalVariables(ArrayRef<Metadata *> TypeIds,
483493
ArrayRef<GlobalTypeMember *> Globals);
494+
Triple::ArchType
495+
selectJumpTableArmEncoding(ArrayRef<GlobalTypeMember *> Functions);
484496
unsigned getJumpTableEntrySize();
485497
Type *getJumpTableEntryType();
486498
void createJumpTableEntry(raw_ostream &AsmOS, raw_ostream &ConstraintOS,
@@ -518,15 +530,16 @@ class LowerTypeTestsModule {
518530
void replaceDirectCalls(Value *Old, Value *New);
519531

520532
public:
521-
LowerTypeTestsModule(Module &M, ModuleSummaryIndex *ExportSummary,
533+
LowerTypeTestsModule(Module &M, ModuleAnalysisManager &AM,
534+
ModuleSummaryIndex *ExportSummary,
522535
const ModuleSummaryIndex *ImportSummary,
523536
bool DropTypeTests);
524537

525538
bool lower();
526539

527540
// Lower the module using the action and summary passed as command line
528541
// arguments. For testing purposes only.
529-
static bool runForTesting(Module &M);
542+
static bool runForTesting(Module &M, ModuleAnalysisManager &AM);
530543
};
531544
} // end anonymous namespace
532545

@@ -1182,31 +1195,36 @@ static const unsigned kX86JumpTableEntrySize = 8;
11821195
static const unsigned kX86IBTJumpTableEntrySize = 16;
11831196
static const unsigned kARMJumpTableEntrySize = 4;
11841197
static const unsigned kARMBTIJumpTableEntrySize = 8;
1198+
static const unsigned kARMv6MJumpTableEntrySize = 16;
11851199
static const unsigned kRISCVJumpTableEntrySize = 8;
11861200

11871201
unsigned LowerTypeTestsModule::getJumpTableEntrySize() {
1188-
switch (Arch) {
1189-
case Triple::x86:
1190-
case Triple::x86_64:
1191-
if (const auto *MD = mdconst::extract_or_null<ConstantInt>(
1202+
switch (JumpTableArch) {
1203+
case Triple::x86:
1204+
case Triple::x86_64:
1205+
if (const auto *MD = mdconst::extract_or_null<ConstantInt>(
11921206
M.getModuleFlag("cf-protection-branch")))
1193-
if (MD->getZExtValue())
1194-
return kX86IBTJumpTableEntrySize;
1195-
return kX86JumpTableEntrySize;
1196-
case Triple::arm:
1197-
case Triple::thumb:
1207+
if (MD->getZExtValue())
1208+
return kX86IBTJumpTableEntrySize;
1209+
return kX86JumpTableEntrySize;
1210+
case Triple::arm:
1211+
return kARMJumpTableEntrySize;
1212+
case Triple::thumb:
1213+
if (CanUseThumbBWJumpTable)
11981214
return kARMJumpTableEntrySize;
1199-
case Triple::aarch64:
1200-
if (const auto *BTE = mdconst::extract_or_null<ConstantInt>(
1215+
else
1216+
return kARMv6MJumpTableEntrySize;
1217+
case Triple::aarch64:
1218+
if (const auto *BTE = mdconst::extract_or_null<ConstantInt>(
12011219
M.getModuleFlag("branch-target-enforcement")))
1202-
if (BTE->getZExtValue())
1203-
return kARMBTIJumpTableEntrySize;
1204-
return kARMJumpTableEntrySize;
1205-
case Triple::riscv32:
1206-
case Triple::riscv64:
1207-
return kRISCVJumpTableEntrySize;
1208-
default:
1209-
report_fatal_error("Unsupported architecture for jump tables");
1220+
if (BTE->getZExtValue())
1221+
return kARMBTIJumpTableEntrySize;
1222+
return kARMJumpTableEntrySize;
1223+
case Triple::riscv32:
1224+
case Triple::riscv64:
1225+
return kRISCVJumpTableEntrySize;
1226+
default:
1227+
report_fatal_error("Unsupported architecture for jump tables");
12101228
}
12111229
}
12121230

@@ -1240,7 +1258,32 @@ void LowerTypeTestsModule::createJumpTableEntry(
12401258
AsmOS << "bti c\n";
12411259
AsmOS << "b $" << ArgIndex << "\n";
12421260
} else if (JumpTableArch == Triple::thumb) {
1243-
AsmOS << "b.w $" << ArgIndex << "\n";
1261+
if (!CanUseThumbBWJumpTable) {
1262+
// In Armv6-M, this sequence will generate a branch without corrupting
1263+
// any registers. We use two stack words; in the second, we construct the
1264+
// address we'll pop into pc, and the first is used to save and restore
1265+
// r0 which we use as a temporary register.
1266+
//
1267+
// To support position-independent use cases, the offset of the target
1268+
// function is stored as a relative offset (which will expand into an
1269+
// R_ARM_REL32 relocation in ELF, and presumably the equivalent in other
1270+
// object file types), and added to pc after we load it. (The alternative
1271+
// B.W is automatically pc-relative.)
1272+
//
1273+
// There are five 16-bit Thumb instructions here, so the .balign 4 adds a
1274+
// sixth halfword of padding, and then the offset consumes a further 4
1275+
// bytes, for a total of 16, which is very convenient since entries in
1276+
// this jump table need to have power-of-two size.
1277+
AsmOS << "push {r0,r1}\n"
1278+
<< "ldr r0, 1f\n"
1279+
<< "0: add r0, r0, pc\n"
1280+
<< "str r0, [sp, #4]\n"
1281+
<< "pop {r0,pc}\n"
1282+
<< ".balign 4\n"
1283+
<< "1: .word $" << ArgIndex << " - (0b + 4)\n";
1284+
} else {
1285+
AsmOS << "b.w $" << ArgIndex << "\n";
1286+
}
12441287
} else if (JumpTableArch == Triple::riscv32 ||
12451288
JumpTableArch == Triple::riscv64) {
12461289
AsmOS << "tail $" << ArgIndex << "@plt\n";
@@ -1352,12 +1395,19 @@ static bool isThumbFunction(Function *F, Triple::ArchType ModuleArch) {
13521395
// Each jump table must be either ARM or Thumb as a whole for the bit-test math
13531396
// to work. Pick one that matches the majority of members to minimize interop
13541397
// veneers inserted by the linker.
1355-
static Triple::ArchType
1356-
selectJumpTableArmEncoding(ArrayRef<GlobalTypeMember *> Functions,
1357-
Triple::ArchType ModuleArch) {
1358-
if (ModuleArch != Triple::arm && ModuleArch != Triple::thumb)
1359-
return ModuleArch;
1398+
Triple::ArchType LowerTypeTestsModule::selectJumpTableArmEncoding(
1399+
ArrayRef<GlobalTypeMember *> Functions) {
1400+
if (Arch != Triple::arm && Arch != Triple::thumb)
1401+
return Arch;
1402+
1403+
if (!CanUseThumbBWJumpTable && CanUseArmJumpTable) {
1404+
// In architectures that provide Arm and Thumb-1 but not Thumb-2,
1405+
// we should always prefer the Arm jump table format, because the
1406+
// Thumb-1 one is larger and slower.
1407+
return Triple::arm;
1408+
}
13601409

1410+
// Otherwise, go with majority vote.
13611411
unsigned ArmCount = 0, ThumbCount = 0;
13621412
for (const auto GTM : Functions) {
13631413
if (!GTM->isJumpTableCanonical()) {
@@ -1368,7 +1418,7 @@ selectJumpTableArmEncoding(ArrayRef<GlobalTypeMember *> Functions,
13681418
}
13691419

13701420
Function *F = cast<Function>(GTM->getGlobal());
1371-
++(isThumbFunction(F, ModuleArch) ? ThumbCount : ArmCount);
1421+
++(isThumbFunction(F, Arch) ? ThumbCount : ArmCount);
13721422
}
13731423

13741424
return ArmCount > ThumbCount ? Triple::arm : Triple::thumb;
@@ -1381,8 +1431,6 @@ void LowerTypeTestsModule::createJumpTable(
13811431
SmallVector<Value *, 16> AsmArgs;
13821432
AsmArgs.reserve(Functions.size() * 2);
13831433

1384-
Triple::ArchType JumpTableArch = selectJumpTableArmEncoding(Functions, Arch);
1385-
13861434
for (GlobalTypeMember *GTM : Functions)
13871435
createJumpTableEntry(AsmOS, ConstraintOS, JumpTableArch, AsmArgs,
13881436
cast<Function>(GTM->getGlobal()));
@@ -1399,9 +1447,11 @@ void LowerTypeTestsModule::createJumpTable(
13991447
F->addFnAttr("target-features", "-thumb-mode");
14001448
if (JumpTableArch == Triple::thumb) {
14011449
F->addFnAttr("target-features", "+thumb-mode");
1402-
// Thumb jump table assembly needs Thumb2. The following attribute is added
1403-
// by Clang for -march=armv7.
1404-
F->addFnAttr("target-cpu", "cortex-a8");
1450+
if (CanUseThumbBWJumpTable) {
1451+
// Thumb jump table assembly needs Thumb2. The following attribute is
1452+
// added by Clang for -march=armv7.
1453+
F->addFnAttr("target-cpu", "cortex-a8");
1454+
}
14051455
}
14061456
// When -mbranch-protection= is used, the inline asm adds a BTI. Suppress BTI
14071457
// for the function to avoid double BTI. This is a no-op without
@@ -1521,6 +1571,10 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative(
15211571
// FIXME: find a better way to represent the jumptable in the IR.
15221572
assert(!Functions.empty());
15231573

1574+
// Decide on the jump table encoding, so that we know how big the
1575+
// entries will be.
1576+
JumpTableArch = selectJumpTableArmEncoding(Functions);
1577+
15241578
// Build a simple layout based on the regular layout of jump tables.
15251579
DenseMap<GlobalTypeMember *, uint64_t> GlobalLayout;
15261580
unsigned EntrySize = getJumpTableEntrySize();
@@ -1706,18 +1760,31 @@ void LowerTypeTestsModule::buildBitSetsFromDisjointSet(
17061760

17071761
/// Lower all type tests in this module.
17081762
LowerTypeTestsModule::LowerTypeTestsModule(
1709-
Module &M, ModuleSummaryIndex *ExportSummary,
1763+
Module &M, ModuleAnalysisManager &AM, ModuleSummaryIndex *ExportSummary,
17101764
const ModuleSummaryIndex *ImportSummary, bool DropTypeTests)
17111765
: M(M), ExportSummary(ExportSummary), ImportSummary(ImportSummary),
17121766
DropTypeTests(DropTypeTests || ClDropTypeTests) {
17131767
assert(!(ExportSummary && ImportSummary));
17141768
Triple TargetTriple(M.getTargetTriple());
17151769
Arch = TargetTriple.getArch();
1770+
if (Arch == Triple::arm)
1771+
CanUseArmJumpTable = true;
1772+
if (Arch == Triple::arm || Arch == Triple::thumb) {
1773+
auto &FAM =
1774+
AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1775+
for (Function &F : M) {
1776+
auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
1777+
if (TTI.hasArmWideBranch(false))
1778+
CanUseArmJumpTable = true;
1779+
if (TTI.hasArmWideBranch(true))
1780+
CanUseThumbBWJumpTable = true;
1781+
}
1782+
}
17161783
OS = TargetTriple.getOS();
17171784
ObjectFormat = TargetTriple.getObjectFormat();
17181785
}
17191786

1720-
bool LowerTypeTestsModule::runForTesting(Module &M) {
1787+
bool LowerTypeTestsModule::runForTesting(Module &M, ModuleAnalysisManager &AM) {
17211788
ModuleSummaryIndex Summary(/*HaveGVs=*/false);
17221789

17231790
// Handle the command-line summary arguments. This code is for testing
@@ -1735,7 +1802,8 @@ bool LowerTypeTestsModule::runForTesting(Module &M) {
17351802

17361803
bool Changed =
17371804
LowerTypeTestsModule(
1738-
M, ClSummaryAction == PassSummaryAction::Export ? &Summary : nullptr,
1805+
M, AM,
1806+
ClSummaryAction == PassSummaryAction::Export ? &Summary : nullptr,
17391807
ClSummaryAction == PassSummaryAction::Import ? &Summary : nullptr,
17401808
/*DropTypeTests*/ false)
17411809
.lower();
@@ -2298,10 +2366,10 @@ PreservedAnalyses LowerTypeTestsPass::run(Module &M,
22982366
ModuleAnalysisManager &AM) {
22992367
bool Changed;
23002368
if (UseCommandLine)
2301-
Changed = LowerTypeTestsModule::runForTesting(M);
2369+
Changed = LowerTypeTestsModule::runForTesting(M, AM);
23022370
else
23032371
Changed =
2304-
LowerTypeTestsModule(M, ExportSummary, ImportSummary, DropTypeTests)
2372+
LowerTypeTestsModule(M, AM, ExportSummary, ImportSummary, DropTypeTests)
23052373
.lower();
23062374
if (!Changed)
23072375
return PreservedAnalyses::all();

llvm/test/Transforms/LowerTypeTests/function-arm-thumb.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
target datalayout = "e-p:64:64"
44

5-
define void @f1() "target-features"="+thumb-mode" !type !0 {
5+
define void @f1() "target-features"="+thumb-mode,+v6t2" !type !0 {
66
ret void
77
}
88

0 commit comments

Comments
 (0)