Skip to content

Commit ad4e077

Browse files
committed
[BOLT][Instrumentation] Put Allocator itslef in shared memory by default
In absence of instrumentation-file-append-pid option, global allocator uses shared pages for allocation. However, since it is a global variable, it gets COW'd after fork if instrumentation-sleep-time is used, or if a process forks by itself. This means it handles the same pages to every process which causes hash table corruption. Thus, if we want shared pages, we need to put the allocator itself in a shared page, which we do in this commit in __bolt_instr_setup. I also added a couple of assertions to sanity-check the hash table. Reviewed By: rafauler, Amir Differential Revision: https://reviews.llvm.org/D153771
1 parent 02c3724 commit ad4e077

File tree

1 file changed

+45
-14
lines changed

1 file changed

+45
-14
lines changed

bolt/runtime/instr.cpp

Lines changed: 45 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,12 @@ class BumpPtrAllocator {
210210

211211
/// Used for allocating indirect call instrumentation counters. Initialized by
212212
/// __bolt_instr_setup, our initialization routine.
213-
BumpPtrAllocator GlobalAlloc;
213+
BumpPtrAllocator *GlobalAlloc;
214+
215+
// Storage for GlobalAlloc which can be shared if not using
216+
// instrumentation-file-append-pid.
217+
void *GlobalMetadataStorage;
218+
214219
} // anonymous namespace
215220

216221
// User-defined placement new operators. We only use those (as opposed to
@@ -230,6 +235,10 @@ void *operator new[](size_t Sz, BumpPtrAllocator &A, char C) {
230235
memset(Ptr, C, Sz);
231236
return Ptr;
232237
}
238+
239+
// Declaration for global allocator to construct it in shared memory if needed.
240+
// Needed because we can't #include <new>
241+
void *operator new(size_t, void *) noexcept;
233242
// Only called during exception unwinding (useless). We must manually dealloc.
234243
// C++ language weirdness
235244
void operator delete(void *Ptr, BumpPtrAllocator &A) { A.deallocate(Ptr); }
@@ -264,7 +273,17 @@ class SimpleHashTable {
264273
/// Increment by 1 the value of \p Key. If it is not in this table, it will be
265274
/// added to the table and its value set to 1.
266275
void incrementVal(uint64_t Key, BumpPtrAllocator &Alloc) {
267-
++get(Key, Alloc).Val;
276+
if (!__bolt_instr_conservative) {
277+
TryLock L(M);
278+
if (!L.isLocked())
279+
return;
280+
auto &E = getOrAllocEntry(Key, Alloc);
281+
++E.Val;
282+
return;
283+
}
284+
Lock L(M);
285+
auto &E = getOrAllocEntry(Key, Alloc);
286+
++E.Val;
268287
}
269288

270289
/// Basic member accessing interface. Here we pass the allocator explicitly to
@@ -308,10 +327,10 @@ class SimpleHashTable {
308327
if (Entry.Key == VacantMarker)
309328
continue;
310329
if (Entry.Key & FollowUpTableMarker) {
311-
forEachElement(Callback, IncSize,
312-
reinterpret_cast<MapEntry *>(Entry.Key &
313-
~FollowUpTableMarker),
314-
args...);
330+
MapEntry *Next =
331+
reinterpret_cast<MapEntry *>(Entry.Key & ~FollowUpTableMarker);
332+
assert(Next != Entries, "Circular reference!");
333+
forEachElement(Callback, IncSize, Next, args...);
315334
continue;
316335
}
317336
Callback(Entry, args...);
@@ -358,12 +377,18 @@ class SimpleHashTable {
358377
CurEntrySelector = CurEntrySelector % IncSize;
359378
NextLevelTbl[CurEntrySelector] = Entry;
360379
Entry.Key = reinterpret_cast<uint64_t>(NextLevelTbl) | FollowUpTableMarker;
380+
assert((NextLevelTbl[CurEntrySelector].Key & ~FollowUpTableMarker) !=
381+
uint64_t(Entries),
382+
"circular reference created!\n");
361383
return getEntry(NextLevelTbl, Key, Remainder, Alloc, CurLevel + 1);
362384
}
363385

364386
MapEntry &getOrAllocEntry(uint64_t Key, BumpPtrAllocator &Alloc) {
365-
if (TableRoot)
366-
return getEntry(TableRoot, Key, Key, Alloc, 0);
387+
if (TableRoot) {
388+
MapEntry &E = getEntry(TableRoot, Key, Key, Alloc, 0);
389+
assert(!(E.Key & FollowUpTableMarker), "Invalid entry!");
390+
return E;
391+
}
367392
return firstAllocation(Key, Alloc);
368393
}
369394
};
@@ -1561,13 +1586,19 @@ extern "C" void __attribute((force_align_arg_pointer)) __bolt_instr_setup() {
15611586
MAP_ANONYMOUS | MapPrivateOrShared | MAP_FIXED, -1, 0);
15621587
assert(Ret != MAP_FAILED, "__bolt_instr_setup: Failed to mmap counters!");
15631588

1564-
// Conservatively reserve 100MiB shared pages
1565-
GlobalAlloc.setMaxSize(0x6400000);
1566-
GlobalAlloc.setShared(Shared);
1567-
GlobalWriteProfileMutex = new (GlobalAlloc, 0) Mutex();
1589+
GlobalMetadataStorage = __mmap(0, 4096, PROT_READ | PROT_WRITE,
1590+
MapPrivateOrShared | MAP_ANONYMOUS, -1, 0);
1591+
assert(GlobalMetadataStorage != MAP_FAILED,
1592+
"__bolt_instr_setup: failed to mmap page for metadata!");
1593+
1594+
GlobalAlloc = new (GlobalMetadataStorage) BumpPtrAllocator;
1595+
// Conservatively reserve 100MiB
1596+
GlobalAlloc->setMaxSize(0x6400000);
1597+
GlobalAlloc->setShared(Shared);
1598+
GlobalWriteProfileMutex = new (*GlobalAlloc, 0) Mutex();
15681599
if (__bolt_instr_num_ind_calls > 0)
15691600
GlobalIndCallCounters =
1570-
new (GlobalAlloc, 0) IndirectCallHashTable[__bolt_instr_num_ind_calls];
1601+
new (*GlobalAlloc, 0) IndirectCallHashTable[__bolt_instr_num_ind_calls];
15711602

15721603
if (__bolt_instr_sleep_time != 0) {
15731604
// Separate instrumented process to the own process group
@@ -1582,7 +1613,7 @@ extern "C" void __attribute((force_align_arg_pointer)) __bolt_instr_setup() {
15821613

15831614
extern "C" __attribute((force_align_arg_pointer)) void
15841615
instrumentIndirectCall(uint64_t Target, uint64_t IndCallID) {
1585-
GlobalIndCallCounters[IndCallID].incrementVal(Target, GlobalAlloc);
1616+
GlobalIndCallCounters[IndCallID].incrementVal(Target, *GlobalAlloc);
15861617
}
15871618

15881619
/// We receive as in-stack arguments the identifier of the indirect call site

0 commit comments

Comments
 (0)