Skip to content

Commit 5331441

Browse files
committed
LLVM and SPIRV-LLVM-Translator pulldown (WW27)
LLVM: llvm/llvm-project@9aaba9d SPIRV-LLVM-Translator: KhronosGroup/SPIRV-LLVM-Translator@ae90401
2 parents 4429372 + f6667e1 commit 5331441

File tree

6,944 files changed

+229152
-152648
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

6,944 files changed

+229152
-152648
lines changed

bolt/include/bolt/Core/BinaryContext.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -498,6 +498,14 @@ class BinaryContext {
498498
/// to function \p BF.
499499
std::string generateJumpTableName(const BinaryFunction &BF, uint64_t Address);
500500

501+
/// Free memory used by jump table offsets
502+
void clearJumpTableOffsets() {
503+
for (auto &JTI : JumpTables) {
504+
JumpTable &JT = *JTI.second;
505+
JumpTable::OffsetsType Temp;
506+
Temp.swap(JT.OffsetEntries);
507+
}
508+
}
501509
/// Return true if the array of bytes represents a valid code padding.
502510
bool hasValidCodePadding(const BinaryFunction &BF);
503511

bolt/include/bolt/Core/MCPlusBuilder.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1046,9 +1046,14 @@ class MCPlusBuilder {
10461046
/// Return handler and action info for invoke instruction if present.
10471047
Optional<MCPlus::MCLandingPad> getEHInfo(const MCInst &Inst) const;
10481048

1049-
// Add handler and action info for call instruction.
1049+
/// Add handler and action info for call instruction.
10501050
void addEHInfo(MCInst &Inst, const MCPlus::MCLandingPad &LP);
10511051

1052+
/// Update exception-handling info for the invoke instruction \p Inst.
1053+
/// Return true on success and false otherwise, e.g. if the instruction is
1054+
/// not an invoke.
1055+
bool updateEHInfo(MCInst &Inst, const MCPlus::MCLandingPad &LP);
1056+
10521057
/// Return non-negative GNU_args_size associated with the instruction
10531058
/// or -1 if there's no associated info.
10541059
int64_t getGnuArgsSize(const MCInst &Inst) const;

bolt/include/bolt/Passes/SplitFunctions.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,13 @@ class SplitFunctions : public BinaryFunctionPass {
3131
/// Split function body into fragments.
3232
void splitFunction(BinaryFunction &Function);
3333

34+
/// Create trampoline landing pads for exception handling code to guarantee
35+
/// that every landing pad is placed in the same function fragment as the
36+
/// corresponding thrower block. The trampoline landing pad, when created,
37+
/// will redirect the execution to the real landing pad in a different
38+
/// fragment.
39+
void createEHTrampolines(BinaryFunction &Function) const;
40+
3441
std::atomic<uint64_t> SplitBytesHot{0ull};
3542
std::atomic<uint64_t> SplitBytesCold{0ull};
3643

bolt/lib/Core/BinaryContext.cpp

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -764,12 +764,31 @@ BinaryFunction *BinaryContext::createBinaryFunction(
764764
const MCSymbol *
765765
BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
766766
JumpTable::JumpTableType Type) {
767+
auto isFragmentOf = [](BinaryFunction *Fragment, BinaryFunction *Parent) {
768+
return (Fragment->isFragment() && Fragment->isParentFragment(Parent));
769+
};
770+
767771
if (JumpTable *JT = getJumpTableContainingAddress(Address)) {
768772
assert(JT->Type == Type && "jump table types have to match");
769-
assert(JT->Parent == &Function &&
773+
bool HasMultipleParents = isFragmentOf(JT->Parent, &Function) ||
774+
isFragmentOf(&Function, JT->Parent);
775+
assert((JT->Parent == &Function || HasMultipleParents) &&
770776
"cannot re-use jump table of a different function");
771777
assert(Address == JT->getAddress() && "unexpected non-empty jump table");
772778

779+
// Flush OffsetEntries with INVALID_OFFSET if multiple parents
780+
// Duplicate the entry for the parent function for easy access
781+
if (HasMultipleParents) {
782+
if (opts::Verbosity > 2) {
783+
outs() << "BOLT-WARNING: Multiple fragments access same jump table: "
784+
<< JT->Parent->getPrintName() << "; " << Function.getPrintName()
785+
<< "\n";
786+
}
787+
constexpr uint64_t INVALID_OFFSET = std::numeric_limits<uint64_t>::max();
788+
for (unsigned I = 0; I < JT->OffsetEntries.size(); ++I)
789+
JT->OffsetEntries[I] = INVALID_OFFSET;
790+
Function.JumpTables.emplace(Address, JT);
791+
}
773792
return JT->getFirstLabel();
774793
}
775794

bolt/lib/Core/BinaryEmitter.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -912,8 +912,8 @@ void BinaryEmitter::emitLSDA(BinaryFunction &BF, bool EmitColdPart) {
912912
// defined in the same section and hence cannot place the landing pad into a
913913
// cold fragment when the corresponding call site is in the hot fragment.
914914
// Because of this issue and the previously described issue of possible
915-
// zero-offset landing pad we disable splitting of exception-handling
916-
// code for shared objects.
915+
// zero-offset landing pad we have to place landing pads in the same section
916+
// as the corresponding invokes for shared objects.
917917
std::function<void(const MCSymbol *)> emitLandingPad;
918918
if (BC.HasFixedLoadAddress) {
919919
Streamer.emitIntValue(dwarf::DW_EH_PE_udata4, 1); // LPStart format
@@ -925,8 +925,6 @@ void BinaryEmitter::emitLSDA(BinaryFunction &BF, bool EmitColdPart) {
925925
Streamer.emitSymbolValue(LPSymbol, 4);
926926
};
927927
} else {
928-
assert(!EmitColdPart &&
929-
"cannot have exceptions in cold fragment for shared object");
930928
Streamer.emitIntValue(dwarf::DW_EH_PE_omit, 1); // LPStart format
931929
emitLandingPad = [&](const MCSymbol *LPSymbol) {
932930
if (!LPSymbol)

bolt/lib/Core/BinaryFunction.cpp

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1654,11 +1654,13 @@ void BinaryFunction::postProcessJumpTables() {
16541654
"detected in function "
16551655
<< *this << '\n';
16561656
}
1657-
for (unsigned I = 0; I < JT.OffsetEntries.size(); ++I) {
1658-
MCSymbol *Label =
1659-
getOrCreateLocalLabel(getAddress() + JT.OffsetEntries[I],
1660-
/*CreatePastEnd*/ true);
1661-
JT.Entries.push_back(Label);
1657+
if (JT.Entries.empty()) {
1658+
for (unsigned I = 0; I < JT.OffsetEntries.size(); ++I) {
1659+
MCSymbol *Label =
1660+
getOrCreateLocalLabel(getAddress() + JT.OffsetEntries[I],
1661+
/*CreatePastEnd*/ true);
1662+
JT.Entries.push_back(Label);
1663+
}
16621664
}
16631665

16641666
const uint64_t BDSize =
@@ -1700,12 +1702,6 @@ void BinaryFunction::postProcessJumpTables() {
17001702
}
17011703
clearList(JTSites);
17021704

1703-
// Free memory used by jump table offsets.
1704-
for (auto &JTI : JumpTables) {
1705-
JumpTable &JT = *JTI.second;
1706-
clearList(JT.OffsetEntries);
1707-
}
1708-
17091705
// Conservatively populate all possible destinations for unknown indirect
17101706
// branches.
17111707
if (opts::StrictMode && hasInternalReference()) {

bolt/lib/Core/MCPlusBuilder.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,17 @@ void MCPlusBuilder::addEHInfo(MCInst &Inst, const MCLandingPad &LP) {
159159
}
160160
}
161161

162+
bool MCPlusBuilder::updateEHInfo(MCInst &Inst, const MCLandingPad &LP) {
163+
if (!isInvoke(Inst))
164+
return false;
165+
166+
setAnnotationOpValue(Inst, MCAnnotation::kEHLandingPad,
167+
reinterpret_cast<int64_t>(LP.first));
168+
setAnnotationOpValue(Inst, MCAnnotation::kEHAction,
169+
static_cast<int64_t>(LP.second));
170+
return true;
171+
}
172+
162173
int64_t MCPlusBuilder::getGnuArgsSize(const MCInst &Inst) const {
163174
Optional<int64_t> Value =
164175
getAnnotationOpValue(Inst, MCAnnotation::kGnuArgsSize);

bolt/lib/Passes/SplitFunctions.cpp

Lines changed: 76 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ void SplitFunctions::splitFunction(BinaryFunction &BF) {
118118

119119
bool AllCold = true;
120120
for (BinaryBasicBlock *BB : BF.layout()) {
121-
uint64_t ExecCount = BB->getExecutionCount();
121+
const uint64_t ExecCount = BB->getExecutionCount();
122122
if (ExecCount == BinaryBasicBlock::COUNT_NO_PROFILE)
123123
return;
124124
if (ExecCount != 0)
@@ -140,12 +140,12 @@ void SplitFunctions::splitFunction(BinaryFunction &BF) {
140140
<< " pre-split is <0x"
141141
<< Twine::utohexstr(OriginalHotSize) << ", 0x"
142142
<< Twine::utohexstr(ColdSize) << ">\n");
143-
}
144-
145-
if (opts::SplitFunctions == SplitFunctions::ST_LARGE && !BC.HasRelocations) {
146-
// Split only if the function wouldn't fit.
147-
if (OriginalHotSize <= BF.getMaxSize())
148-
return;
143+
if (opts::SplitFunctions == SplitFunctions::ST_LARGE &&
144+
!BC.HasRelocations) {
145+
// Split only if the function wouldn't fit.
146+
if (OriginalHotSize <= BF.getMaxSize())
147+
return;
148+
}
149149
}
150150

151151
// Never outline the first basic block.
@@ -164,9 +164,9 @@ void SplitFunctions::splitFunction(BinaryFunction &BF) {
164164
BB->setCanOutline(false);
165165
continue;
166166
}
167+
167168
if (BF.hasEHRanges() && !opts::SplitEH) {
168-
// We cannot move landing pads (or rather entry points for landing
169-
// pads).
169+
// We cannot move landing pads (or rather entry points for landing pads).
170170
if (BB->isLandingPad()) {
171171
BB->setCanOutline(false);
172172
continue;
@@ -176,7 +176,7 @@ void SplitFunctions::splitFunction(BinaryFunction &BF) {
176176
// that the block never throws, it is safe to move the block to
177177
// decrease the size of the function.
178178
for (MCInst &Instr : *BB) {
179-
if (BF.getBinaryContext().MIB->isInvoke(Instr)) {
179+
if (BC.MIB->isInvoke(Instr)) {
180180
BB->setCanOutline(false);
181181
break;
182182
}
@@ -214,6 +214,12 @@ void SplitFunctions::splitFunction(BinaryFunction &BF) {
214214
BB->setIsCold(true);
215215
}
216216

217+
// For shared objects, place invoke instructions and corresponding landing
218+
// pads in the same fragment. To reduce hot code size, create trampoline
219+
// landing pads that will redirect the execution to the real LP.
220+
if (!BC.HasFixedLoadAddress && BF.hasEHRanges() && BF.isSplit())
221+
createEHTrampolines(BF);
222+
217223
// Check the new size to see if it's worth splitting the function.
218224
if (BC.isX86() && BF.isSplit()) {
219225
std::tie(HotSize, ColdSize) = BC.calculateEmittedSize(BF);
@@ -237,5 +243,65 @@ void SplitFunctions::splitFunction(BinaryFunction &BF) {
237243
}
238244
}
239245

246+
void SplitFunctions::createEHTrampolines(BinaryFunction &BF) const {
247+
const auto &MIB = BF.getBinaryContext().MIB;
248+
249+
// Map real landing pads to the corresponding trampolines.
250+
std::unordered_map<const MCSymbol *, const MCSymbol *> LPTrampolines;
251+
252+
// Iterate over the copy of basic blocks since we are adding new blocks to the
253+
// function which will invalidate its iterators.
254+
std::vector<BinaryBasicBlock *> Blocks(BF.pbegin(), BF.pend());
255+
for (BinaryBasicBlock *BB : Blocks) {
256+
for (MCInst &Instr : *BB) {
257+
const Optional<MCPlus::MCLandingPad> EHInfo = MIB->getEHInfo(Instr);
258+
if (!EHInfo || !EHInfo->first)
259+
continue;
260+
261+
const MCSymbol *LPLabel = EHInfo->first;
262+
BinaryBasicBlock *LPBlock = BF.getBasicBlockForLabel(LPLabel);
263+
if (BB->isCold() == LPBlock->isCold())
264+
continue;
265+
266+
const MCSymbol *TrampolineLabel = nullptr;
267+
auto Iter = LPTrampolines.find(LPLabel);
268+
if (Iter != LPTrampolines.end()) {
269+
TrampolineLabel = Iter->second;
270+
} else {
271+
// Create a trampoline basic block in the same fragment as the thrower.
272+
// Note: there's no need to insert the jump instruction, it will be
273+
// added by fixBranches().
274+
BinaryBasicBlock *TrampolineBB = BF.addBasicBlock();
275+
TrampolineBB->setIsCold(BB->isCold());
276+
TrampolineBB->setExecutionCount(LPBlock->getExecutionCount());
277+
TrampolineBB->addSuccessor(LPBlock, TrampolineBB->getExecutionCount());
278+
TrampolineBB->setCFIState(LPBlock->getCFIState());
279+
TrampolineLabel = TrampolineBB->getLabel();
280+
LPTrampolines.emplace(std::make_pair(LPLabel, TrampolineLabel));
281+
}
282+
283+
// Substitute the landing pad with the trampoline.
284+
MIB->updateEHInfo(Instr,
285+
MCPlus::MCLandingPad(TrampolineLabel, EHInfo->second));
286+
}
287+
}
288+
289+
if (LPTrampolines.empty())
290+
return;
291+
292+
// All trampoline blocks were added to the end of the function. Place them at
293+
// the end of corresponding fragments.
294+
std::stable_sort(BF.layout_begin(), BF.layout_end(),
295+
[&](BinaryBasicBlock *A, BinaryBasicBlock *B) {
296+
return A->isCold() < B->isCold();
297+
});
298+
299+
// Conservatively introduce branch instructions.
300+
BF.fixBranches();
301+
302+
// Update exception-handling CFG for the function.
303+
BF.recomputeLandingPads();
304+
}
305+
240306
} // namespace bolt
241307
} // namespace llvm

bolt/lib/Passes/ValidateInternalCalls.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ bool ValidateInternalCalls::fixCFGForPIC(BinaryFunction &Function) const {
106106
// block.
107107
std::vector<std::unique_ptr<BinaryBasicBlock>> NewBBs;
108108
NewBBs.emplace_back(Function.createBasicBlock());
109+
NewBBs.back()->setOffset(0);
109110
NewBBs.back()->addInstructions(MovedInsts.begin(), MovedInsts.end());
110111
BB.moveAllSuccessorsTo(NewBBs.back().get());
111112
Function.insertBasicBlocks(&BB, std::move(NewBBs));

bolt/lib/Rewrite/RewriteInstance.cpp

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1686,11 +1686,6 @@ void RewriteInstance::adjustCommandLineOptions() {
16861686
opts::SplitEH = false;
16871687
}
16881688

1689-
if (opts::SplitEH && !BC->HasFixedLoadAddress) {
1690-
errs() << "BOLT-WARNING: disabling -split-eh for shared object\n";
1691-
opts::SplitEH = false;
1692-
}
1693-
16941689
if (opts::StrictMode && !BC->HasRelocations) {
16951690
errs() << "BOLT-WARNING: disabling strict mode (-strict) in non-relocation "
16961691
"mode\n";
@@ -2903,6 +2898,7 @@ void RewriteInstance::disassembleFunctions() {
29032898
BC->processInterproceduralReferences(Function);
29042899
}
29052900

2901+
BC->clearJumpTableOffsets();
29062902
BC->populateJumpTables();
29072903
BC->skipMarkedFragments();
29082904

@@ -5206,8 +5202,6 @@ uint64_t RewriteInstance::getNewFunctionAddress(uint64_t OldAddress) {
52065202
if (!Function)
52075203
return 0;
52085204

5209-
assert(!Function->isFragment() && "cannot get new address for a fragment");
5210-
52115205
return Function->getOutputAddress();
52125206
}
52135207

bolt/lib/Target/X86/X86MCPlusBuilder.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3129,7 +3129,12 @@ class X86MCPlusBuilder : public MCPlusBuilder {
31293129
case 1: Opcode = X86::MOV8ri; break;
31303130
case 2: Opcode = X86::MOV16ri; break;
31313131
case 4: Opcode = X86::MOV32ri; break;
3132-
case 8: Opcode = X86::MOV64ri; break;
3132+
// Writing to a 32-bit register always zeros the upper 32 bits of the
3133+
// full-width register
3134+
case 8:
3135+
Opcode = X86::MOV32ri;
3136+
Reg = getAliasSized(Reg, 4);
3137+
break;
31333138
default:
31343139
llvm_unreachable("Unexpected size");
31353140
}

bolt/test/X86/shared_object_entry.s

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
2+
# RUN: ld.lld %t.o -o %t.so --shared --entry=func1.cold.1 --emit-relocs
3+
# RUN: llvm-bolt -relocs %t.so -o %t -reorder-functions=hfsort+ \
4+
# RUN: -split-functions=3 -reorder-blocks=ext-tsp -split-all-cold \
5+
# RUN: -dyno-stats -icf=1 -use-gnu-stack
6+
7+
# Check that an entry point is a cold symbol
8+
# RUN: llvm-readelf -h %t.so > %t.log
9+
# RUN: llvm-nm %t.so >> %t.log
10+
# RUN: FileCheck %s --input-file %t.log
11+
# CHECK: Entry point address: 0x[[#%X,ENTRY:]]
12+
# CHECK: [[#%x,ENTRY]] {{.*}} func1.cold.1
13+
14+
.globl func1.cold.1
15+
.type func1.cold.1,@function
16+
func1.cold.1:
17+
.cfi_startproc
18+
.L1:
19+
movq %rbx, %rdx
20+
jmp .L3
21+
.L2:
22+
# exit(0)
23+
movq $60, %rax
24+
xorq %rdi, %rdi
25+
syscall
26+
.size func1.cold.1, .-func1.cold.1
27+
.cfi_endproc
28+
29+
.globl func1
30+
.type func1,@function
31+
func1:
32+
.cfi_startproc
33+
.L3:
34+
movq %rax, %rdi
35+
jmp .L2
36+
call exit
37+
.size func1, .-func1
38+
.cfi_endproc

0 commit comments

Comments
 (0)