Skip to content

Commit 823adc7

Browse files
authored
[BOLT] Validate secondary entry point (#135731)
Some functions have their sizes as zero in input binary's symbol table, like those compiled by assembler. When figuring out function sizes, we may create label symbol if it doesn't point to any constant island. However, before function size is known, marker symbol can not be correctly associated to a function and therefore all such checks would fail and we could end up adding a code label pointing to constant island as secondary entry point and later mistakenly marking the function as not simple. Querying the global marker symbol array has big throughput overhead. Instead we can run an extra check when post processing entry points to identify such label symbols that actually point to constant islands.
1 parent 2271f0b commit 823adc7

File tree

4 files changed

+86
-0
lines changed

4 files changed

+86
-0
lines changed

bolt/include/bolt/Core/BinaryFunction.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1174,6 +1174,11 @@ class BinaryFunction {
11741174
return getSecondaryEntryPointSymbol(BB.getLabel());
11751175
}
11761176

1177+
/// Remove a label from the secondary entry point map.
1178+
void removeSymbolFromSecondaryEntryPointMap(const MCSymbol *Label) {
1179+
SecondaryEntryPoints.erase(Label);
1180+
}
1181+
11771182
/// Return true if the basic block is an entry point into the function
11781183
/// (either primary or secondary).
11791184
bool isEntryPoint(const BinaryBasicBlock &BB) const {
@@ -2126,6 +2131,10 @@ class BinaryFunction {
21262131
return Islands && !Islands->DataOffsets.empty();
21272132
}
21282133

2134+
bool isStartOfConstantIsland(uint64_t Offset) const {
2135+
return hasConstantIsland() && Islands->DataOffsets.count(Offset);
2136+
}
2137+
21292138
/// Return true iff the symbol could be seen inside this function otherwise
21302139
/// it is probably another function.
21312140
bool isSymbolValidInScope(const SymbolRef &Symbol, uint64_t SymbolSize) const;

bolt/lib/Core/BinaryFunction.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1896,6 +1896,15 @@ void BinaryFunction::postProcessEntryPoints() {
18961896
if (BC.isAArch64() && Offset == getSize())
18971897
continue;
18981898

1899+
// If we have grabbed a wrong code label which actually points to some
1900+
// constant island inside the function, ignore this label and remove it
1901+
// from the secondary entry point map.
1902+
if (isStartOfConstantIsland(Offset)) {
1903+
BC.SymbolToFunctionMap.erase(Label);
1904+
removeSymbolFromSecondaryEntryPointMap(Label);
1905+
continue;
1906+
}
1907+
18991908
BC.errs() << "BOLT-WARNING: reference in the middle of instruction "
19001909
"detected in function "
19011910
<< *this << " at offset 0x" << Twine::utohexstr(Offset) << '\n';
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# This test is to verify that BOLT won't take a label pointing to constant
2+
# island as a secondary entry point (function `_start` doesn't have ELF size
3+
# set originally) and the function won't otherwise be mistaken as non-simple.
4+
5+
# RUN: %clang %cflags -pie %s -o %t.so -Wl,-q -Wl,--init=_foo -Wl,--fini=_foo
6+
# RUN: llvm-bolt %t.so -o %t.bolt.so --print-cfg 2>&1 | FileCheck %s
7+
# CHECK-NOT: BOLT-WARNING: reference in the middle of instruction detected \
8+
# CHECK-NOT: function _start at offset 0x{{[0-9a-f]+}}
9+
# CHECK: Binary Function "_start" after building cfg
10+
11+
.text
12+
13+
.global _foo
14+
.type _foo, %function
15+
_foo:
16+
ret
17+
18+
.global _start
19+
.type _start, %function
20+
_start:
21+
b _foo
22+
23+
.balign 16
24+
_random_consts:
25+
.long 0x12345678
26+
.long 0x90abcdef
27+
28+
.global _bar
29+
.type _bar, %function
30+
_bar:
31+
ret
32+
33+
# Dummy relocation to force relocation mode
34+
.reloc 0, R_AARCH64_NONE
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# This test is to verify that BOLT won't take a label pointing to constant
2+
# island as a secondary entry point (function `_start` doesn't have ELF size
3+
# set originally) and the function won't otherwise be mistaken as non-simple.
4+
5+
# RUN: %clang %cflags -pie %s -o %t.so -Wl,-q -Wl,--init=_foo -Wl,--fini=_foo
6+
# RUN: llvm-bolt %t.so -o %t.bolt.so --print-cfg 2>&1 | FileCheck %s
7+
# CHECK-NOT: BOLT-WARNING: reference in the middle of instruction detected \
8+
# CHECK-NOT: function _start at offset 0x{{[0-9a-f]+}}
9+
# CHECK: Binary Function "_start" after building cfg
10+
11+
.text
12+
13+
.global _foo
14+
.type _foo, %function
15+
_foo:
16+
ret
17+
18+
.global _start
19+
.type _start, %function
20+
_start:
21+
j _foo
22+
23+
.balign 16
24+
_random_consts:
25+
.long 0x12345678
26+
.long 0x90abcdef
27+
28+
.global _bar
29+
.type _bar, %function
30+
_bar:
31+
ret
32+
33+
# Dummy relocation to force relocation mode
34+
.reloc 0, R_RISCV_NONE

0 commit comments

Comments
 (0)