Skip to content

Commit b98e6a5

Browse files
[BOLT][AArch64] Skip BBs only instead of functions (#81989)
After [this ](846eb76) commit we noticed that the size of fdata file decreased a lot. That's why the better and more precise way will be to skip basic blocks with exclusive instructions only instead of the whole function
1 parent 8a87f76 commit b98e6a5

File tree

4 files changed

+196
-28
lines changed

4 files changed

+196
-28
lines changed

bolt/include/bolt/Core/MCPlusBuilder.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -620,7 +620,17 @@ class MCPlusBuilder {
620620
return Info->get(Inst.getOpcode()).mayStore();
621621
}
622622

623-
virtual bool isAArch64Exclusive(const MCInst &Inst) const {
623+
virtual bool isAArch64ExclusiveLoad(const MCInst &Inst) const {
624+
llvm_unreachable("not implemented");
625+
return false;
626+
}
627+
628+
virtual bool isAArch64ExclusiveStore(const MCInst &Inst) const {
629+
llvm_unreachable("not implemented");
630+
return false;
631+
}
632+
633+
virtual bool isAArch64ExclusiveClear(const MCInst &Inst) const {
624634
llvm_unreachable("not implemented");
625635
return false;
626636
}

bolt/lib/Passes/Instrumentation.cpp

Lines changed: 86 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@
1717
#include "bolt/Utils/Utils.h"
1818
#include "llvm/Support/CommandLine.h"
1919
#include "llvm/Support/RWMutex.h"
20+
#include <queue>
2021
#include <stack>
22+
#include <unordered_set>
2123

2224
#define DEBUG_TYPE "bolt-instrumentation"
2325

@@ -86,21 +88,89 @@ cl::opt<bool> InstrumentCalls("instrument-calls",
8688
namespace llvm {
8789
namespace bolt {
8890

89-
static bool hasAArch64ExclusiveMemop(BinaryFunction &Function) {
91+
static bool hasAArch64ExclusiveMemop(
92+
BinaryFunction &Function,
93+
std::unordered_set<const BinaryBasicBlock *> &BBToSkip) {
9094
// FIXME ARMv8-a architecture reference manual says that software must avoid
9195
// having any explicit memory accesses between exclusive load and associated
92-
// store instruction. So for now skip instrumentation for functions that have
93-
// these instructions, since it might lead to runtime deadlock.
96+
// store instruction. So for now skip instrumentation for basic blocks that
97+
// have these instructions, since it might lead to runtime deadlock.
9498
BinaryContext &BC = Function.getBinaryContext();
95-
for (const BinaryBasicBlock &BB : Function)
96-
for (const MCInst &Inst : BB)
97-
if (BC.MIB->isAArch64Exclusive(Inst)) {
98-
if (opts::Verbosity >= 1)
99-
BC.outs() << "BOLT-INSTRUMENTER: Function " << Function
100-
<< " has exclusive instructions, skip instrumentation\n";
99+
std::queue<std::pair<BinaryBasicBlock *, bool>> BBQueue; // {BB, isLoad}
100+
std::unordered_set<BinaryBasicBlock *> Visited;
101+
102+
if (Function.getLayout().block_begin() == Function.getLayout().block_end())
103+
return 0;
104+
105+
BinaryBasicBlock *BBfirst = *Function.getLayout().block_begin();
106+
BBQueue.push({BBfirst, false});
107+
108+
while (!BBQueue.empty()) {
109+
BinaryBasicBlock *BB = BBQueue.front().first;
110+
bool IsLoad = BBQueue.front().second;
111+
BBQueue.pop();
112+
if (Visited.find(BB) != Visited.end())
113+
continue;
114+
Visited.insert(BB);
115+
116+
for (const MCInst &Inst : *BB) {
117+
// Two loads one after another - skip whole function
118+
if (BC.MIB->isAArch64ExclusiveLoad(Inst) && IsLoad) {
119+
if (opts::Verbosity >= 2) {
120+
outs() << "BOLT-INSTRUMENTER: function " << Function.getPrintName()
121+
<< " has two exclusive loads. Ignoring the function.\n";
122+
}
101123
return true;
102124
}
103125

126+
if (BC.MIB->isAArch64ExclusiveLoad(Inst))
127+
IsLoad = true;
128+
129+
if (IsLoad && BBToSkip.find(BB) == BBToSkip.end()) {
130+
BBToSkip.insert(BB);
131+
if (opts::Verbosity >= 2) {
132+
outs() << "BOLT-INSTRUMENTER: skip BB " << BB->getName()
133+
<< " due to exclusive instruction in function "
134+
<< Function.getPrintName() << "\n";
135+
}
136+
}
137+
138+
if (!IsLoad && BC.MIB->isAArch64ExclusiveStore(Inst)) {
139+
if (opts::Verbosity >= 2) {
140+
outs() << "BOLT-INSTRUMENTER: function " << Function.getPrintName()
141+
<< " has exclusive store without corresponding load. Ignoring "
142+
"the function.\n";
143+
}
144+
return true;
145+
}
146+
147+
if (IsLoad && (BC.MIB->isAArch64ExclusiveStore(Inst) ||
148+
BC.MIB->isAArch64ExclusiveClear(Inst)))
149+
IsLoad = false;
150+
}
151+
152+
if (IsLoad && BB->succ_size() == 0) {
153+
if (opts::Verbosity >= 2) {
154+
outs()
155+
<< "BOLT-INSTRUMENTER: function " << Function.getPrintName()
156+
<< " has exclusive load in trailing BB. Ignoring the function.\n";
157+
}
158+
return true;
159+
}
160+
161+
for (BinaryBasicBlock *BBS : BB->successors())
162+
BBQueue.push({BBS, IsLoad});
163+
}
164+
165+
if (BBToSkip.size() == Visited.size()) {
166+
if (opts::Verbosity >= 2) {
167+
outs() << "BOLT-INSTRUMENTER: all BBs are marked with true. Ignoring the "
168+
"function "
169+
<< Function.getPrintName() << "\n";
170+
}
171+
return true;
172+
}
173+
104174
return false;
105175
}
106176

@@ -307,7 +377,8 @@ void Instrumentation::instrumentFunction(BinaryFunction &Function,
307377
if (BC.isMachO() && Function.hasName("___GLOBAL_init_65535/1"))
308378
return;
309379

310-
if (BC.isAArch64() && hasAArch64ExclusiveMemop(Function))
380+
std::unordered_set<const BinaryBasicBlock *> BBToSkip;
381+
if (BC.isAArch64() && hasAArch64ExclusiveMemop(Function, BBToSkip))
311382
return;
312383

313384
SplitWorklistTy SplitWorklist;
@@ -389,6 +460,11 @@ void Instrumentation::instrumentFunction(BinaryFunction &Function,
389460

390461
for (auto BBI = Function.begin(), BBE = Function.end(); BBI != BBE; ++BBI) {
391462
BinaryBasicBlock &BB = *BBI;
463+
464+
// Skip BBs with exclusive load/stores
465+
if (BBToSkip.find(&BB) != BBToSkip.end())
466+
continue;
467+
392468
bool HasUnconditionalBranch = false;
393469
bool HasJumpTable = false;
394470
bool IsInvokeBlock = InvokeBlocks.count(&BB) > 0;

bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -270,32 +270,38 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
270270
return isLDRB(Inst) || isLDRH(Inst) || isLDRW(Inst) || isLDRX(Inst);
271271
}
272272

273-
bool isAArch64Exclusive(const MCInst &Inst) const override {
273+
bool isAArch64ExclusiveLoad(const MCInst &Inst) const override {
274274
return (Inst.getOpcode() == AArch64::LDXPX ||
275275
Inst.getOpcode() == AArch64::LDXPW ||
276276
Inst.getOpcode() == AArch64::LDXRX ||
277277
Inst.getOpcode() == AArch64::LDXRW ||
278278
Inst.getOpcode() == AArch64::LDXRH ||
279279
Inst.getOpcode() == AArch64::LDXRB ||
280-
Inst.getOpcode() == AArch64::STXPX ||
281-
Inst.getOpcode() == AArch64::STXPW ||
282-
Inst.getOpcode() == AArch64::STXRX ||
283-
Inst.getOpcode() == AArch64::STXRW ||
284-
Inst.getOpcode() == AArch64::STXRH ||
285-
Inst.getOpcode() == AArch64::STXRB ||
286280
Inst.getOpcode() == AArch64::LDAXPX ||
287281
Inst.getOpcode() == AArch64::LDAXPW ||
288282
Inst.getOpcode() == AArch64::LDAXRX ||
289283
Inst.getOpcode() == AArch64::LDAXRW ||
290284
Inst.getOpcode() == AArch64::LDAXRH ||
291-
Inst.getOpcode() == AArch64::LDAXRB ||
285+
Inst.getOpcode() == AArch64::LDAXRB);
286+
}
287+
288+
bool isAArch64ExclusiveStore(const MCInst &Inst) const override {
289+
return (Inst.getOpcode() == AArch64::STXPX ||
290+
Inst.getOpcode() == AArch64::STXPW ||
291+
Inst.getOpcode() == AArch64::STXRX ||
292+
Inst.getOpcode() == AArch64::STXRW ||
293+
Inst.getOpcode() == AArch64::STXRH ||
294+
Inst.getOpcode() == AArch64::STXRB ||
292295
Inst.getOpcode() == AArch64::STLXPX ||
293296
Inst.getOpcode() == AArch64::STLXPW ||
294297
Inst.getOpcode() == AArch64::STLXRX ||
295298
Inst.getOpcode() == AArch64::STLXRW ||
296299
Inst.getOpcode() == AArch64::STLXRH ||
297-
Inst.getOpcode() == AArch64::STLXRB ||
298-
Inst.getOpcode() == AArch64::CLREX);
300+
Inst.getOpcode() == AArch64::STLXRB);
301+
}
302+
303+
bool isAArch64ExclusiveClear(const MCInst &Inst) const override {
304+
return (Inst.getOpcode() == AArch64::CLREX);
299305
}
300306

301307
bool isLoadFromStack(const MCInst &Inst) const {

bolt/test/AArch64/exclusive-instrument.s

Lines changed: 83 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,32 +6,108 @@
66
// RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
77
// RUN: %s -o %t.o
88
// RUN: %clang %cflags -fPIC -pie %t.o -o %t.exe -nostdlib -Wl,-q -Wl,-fini=dummy
9-
// RUN: llvm-bolt %t.exe -o %t.bolt -instrument -v=1 | FileCheck %s
9+
// RUN: llvm-bolt %t.exe -o %t.bolt -instrument -v=2 | FileCheck %s
1010

11-
// CHECK: Function foo has exclusive instructions, skip instrumentation
11+
// CHECK: BOLT-INSTRUMENTER: skip BB {{.*}} due to exclusive instruction in function foo
12+
// CHECK: BOLT-INSTRUMENTER: skip BB {{.*}} due to exclusive instruction in function foo
13+
// CHECK: BOLT-INSTRUMENTER: skip BB {{.*}} due to exclusive instruction in function foo
14+
// CHECK: BOLT-INSTRUMENTER: skip BB {{.*}} due to exclusive instruction in function case1
15+
// CHECK: BOLT-INSTRUMENTER: skip BB {{.*}} due to exclusive instruction in function case2
16+
// CHECK: BOLT-INSTRUMENTER: skip BB {{.*}} due to exclusive instruction in function case2
17+
// CHECK: BOLT-INSTRUMENTER: function case3 has exclusive store without corresponding load. Ignoring the function.
18+
// CHECK: BOLT-INSTRUMENTER: skip BB {{.*}} due to exclusive instruction in function case4
19+
// CHECK: BOLT-INSTRUMENTER: function case4 has two exclusive loads. Ignoring the function.
20+
// CHECK: BOLT-INSTRUMENTER: skip BB {{.*}} due to exclusive instruction in function case5
21+
// CHECK: BOLT-INSTRUMENTER: function case5 has exclusive load in trailing BB. Ignoring the function.
1222

1323
.global foo
1424
.type foo, %function
1525
foo:
26+
# exclusive load and store in two bbs
1627
ldaxr w9, [x10]
1728
cbnz w9, .Lret
1829
stlxr w12, w11, [x9]
1930
cbz w12, foo
20-
clrex
2131
.Lret:
32+
clrex
2233
ret
2334
.size foo, .-foo
2435

2536
.global _start
2637
.type _start, %function
2738
_start:
28-
cmp x0, #0
29-
b.eq .Lexit
30-
bl foo
31-
.Lexit:
39+
mov x0, #0
40+
mov x1, #1
41+
mov x2, #2
42+
mov x3, #3
43+
44+
bl case1
45+
bl case2
46+
bl case3
47+
bl case4
48+
bl case5
49+
3250
ret
3351
.size _start, .-_start
3452

53+
# Case 1: exclusive load and store in one basic block
54+
.global case1
55+
.type case1, %function
56+
case1:
57+
str x0, [x2]
58+
ldxr w0, [x2]
59+
add w0, w0, #1
60+
stxr w1, w0, [x2]
61+
ret
62+
.size case1, .-case1
63+
64+
# Case 2: exclusive load and store in different blocks
65+
.global case2
66+
.type case2, %function
67+
case2:
68+
b case2_load
69+
70+
case2_load:
71+
ldxr x0, [x2]
72+
b case2_store
73+
74+
case2_store:
75+
add x0, x0, #1
76+
stxr w1, x0, [x2]
77+
ret
78+
.size case2, .-case2
79+
80+
# Case 3: store without preceding load
81+
.global case3
82+
.type case3, %function
83+
case3:
84+
stxr w1, x3, [x2]
85+
ret
86+
.size case3, .-case3
87+
88+
# Case 4: two exclusive load instructions in neighboring blocks
89+
.global case4
90+
.type case4, %function
91+
case4:
92+
b case4_load
93+
94+
case4_load:
95+
ldxr x0, [x2]
96+
b case4_load_next
97+
98+
case4_load_next:
99+
ldxr x1, [x2]
100+
ret
101+
.size case4, .-case4
102+
103+
# Case 5: Exclusive load without successor
104+
.global case5
105+
.type case5, %function
106+
case5:
107+
ldxr x0, [x2]
108+
ret
109+
.size case5, .-case5
110+
35111
.global dummy
36112
.type dummy, %function
37113
dummy:

0 commit comments

Comments
 (0)