Skip to content

[NFC] Make RingBuffer an atomic pointer #82547

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Feb 23, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
148 changes: 86 additions & 62 deletions compiler-rt/lib/scudo/standalone/combined.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,18 @@ class Allocator {
mapAndInitializeRingBuffer();
}

void enableRingBuffer() {
AllocationRingBuffer *RB = getRingBuffer();
if (RB)
RB->Depot->enable();
}

void disableRingBuffer() {
AllocationRingBuffer *RB = getRingBuffer();
if (RB)
RB->Depot->disable();
}

// Initialize the embedded GWP-ASan instance. Requires the main allocator to
// be functional, best called from PostInitCallback.
void initGwpAsan() {
Expand Down Expand Up @@ -688,14 +700,12 @@ class Allocator {
Quarantine.disable();
Primary.disable();
Secondary.disable();
if (Depot)
Depot->disable();
disableRingBuffer();
}

void enable() NO_THREAD_SAFETY_ANALYSIS {
initThreadMaybe();
if (Depot)
Depot->enable();
enableRingBuffer();
Secondary.enable();
Primary.enable();
Quarantine.enable();
Expand Down Expand Up @@ -920,12 +930,14 @@ class Allocator {

const char *getStackDepotAddress() {
initThreadMaybe();
return reinterpret_cast<char *>(Depot);
AllocationRingBuffer *RB = getRingBuffer();
return RB ? reinterpret_cast<char *>(RB->Depot) : nullptr;
}

uptr getStackDepotSize() {
initThreadMaybe();
return StackDepotSize;
AllocationRingBuffer *RB = getRingBuffer();
return RB ? RB->StackDepotSize : 0;
}

const char *getRegionInfoArrayAddress() const {
Expand All @@ -938,12 +950,15 @@ class Allocator {

const char *getRingBufferAddress() {
initThreadMaybe();
return RawRingBuffer;
return reinterpret_cast<char *>(getRingBuffer());
}

uptr getRingBufferSize() {
initThreadMaybe();
return RingBufferElements ? ringBufferSizeInBytes(RingBufferElements) : 0;
AllocationRingBuffer *RB = getRingBuffer();
return RB && RB->RingBufferElements
? ringBufferSizeInBytes(RB->RingBufferElements)
: 0;
}

static const uptr MaxTraceSize = 64;
Expand Down Expand Up @@ -1048,10 +1063,6 @@ class Allocator {
uptr GuardedAllocSlotSize = 0;
#endif // GWP_ASAN_HOOKS

StackDepot *Depot = nullptr;
uptr StackDepotSize = 0;
MemMapT RawStackDepotMap;

struct AllocationRingBuffer {
struct Entry {
atomic_uptr Ptr;
Expand All @@ -1061,16 +1072,23 @@ class Allocator {
atomic_u32 DeallocationTrace;
atomic_u32 DeallocationTid;
};

StackDepot *Depot = nullptr;
uptr StackDepotSize = 0;
MemMapT RawRingBufferMap;
MemMapT RawStackDepotMap;
u32 RingBufferElements = 0;
atomic_uptr Pos;
// An array of Size (at least one) elements of type Entry is immediately
// following to this struct.
};
// Pointer to memory mapped area starting with AllocationRingBuffer struct,
// and immediately followed by Size elements of type Entry.
char *RawRingBuffer = {};
u32 RingBufferElements = 0;
MemMapT RawRingBufferMap;
atomic_uptr RingBufferAddress = {};

AllocationRingBuffer *getRingBuffer() {
return reinterpret_cast<AllocationRingBuffer *>(
atomic_load(&RingBufferAddress, memory_order_acquire));
}

// The following might get optimized out by the compiler.
NOINLINE void performSanityChecks() {
Expand Down Expand Up @@ -1259,27 +1277,24 @@ class Allocator {
storeEndMarker(RoundNewPtr, NewSize, BlockEnd);
}

StackDepot *getDepotIfEnabled(const Options &Options) {
if (!UNLIKELY(Options.get(OptionBit::TrackAllocationStacks)))
return nullptr;
return Depot;
}

void storePrimaryAllocationStackMaybe(const Options &Options, void *Ptr) {
auto *Depot = getDepotIfEnabled(Options);
if (!Depot)
if (!UNLIKELY(Options.get(OptionBit::TrackAllocationStacks)))
return;
AllocationRingBuffer *RB = getRingBuffer();
if (!RB)
return;
auto *Ptr32 = reinterpret_cast<u32 *>(Ptr);
Ptr32[MemTagAllocationTraceIndex] = collectStackTrace(Depot);
Ptr32[MemTagAllocationTraceIndex] = collectStackTrace(RB->Depot);
Ptr32[MemTagAllocationTidIndex] = getThreadID();
}

void storeRingBufferEntry(void *Ptr, u32 AllocationTrace, u32 AllocationTid,
void storeRingBufferEntry(AllocationRingBuffer *RB, void *Ptr,
u32 AllocationTrace, u32 AllocationTid,
uptr AllocationSize, u32 DeallocationTrace,
u32 DeallocationTid) {
uptr Pos = atomic_fetch_add(&getRingBuffer()->Pos, 1, memory_order_relaxed);
uptr Pos = atomic_fetch_add(&RB->Pos, 1, memory_order_relaxed);
typename AllocationRingBuffer::Entry *Entry =
getRingBufferEntry(RawRingBuffer, Pos % RingBufferElements);
getRingBufferEntry(RB, Pos % RB->RingBufferElements);

// First invalidate our entry so that we don't attempt to interpret a
// partially written state in getSecondaryErrorInfo(). The fences below
Expand All @@ -1300,32 +1315,36 @@ class Allocator {

void storeSecondaryAllocationStackMaybe(const Options &Options, void *Ptr,
uptr Size) {
auto *Depot = getDepotIfEnabled(Options);
if (!Depot)
if (!UNLIKELY(Options.get(OptionBit::TrackAllocationStacks)))
return;
u32 Trace = collectStackTrace(Depot);
AllocationRingBuffer *RB = getRingBuffer();
if (!RB)
return;
u32 Trace = collectStackTrace(RB->Depot);
u32 Tid = getThreadID();

auto *Ptr32 = reinterpret_cast<u32 *>(Ptr);
Ptr32[MemTagAllocationTraceIndex] = Trace;
Ptr32[MemTagAllocationTidIndex] = Tid;

storeRingBufferEntry(untagPointer(Ptr), Trace, Tid, Size, 0, 0);
storeRingBufferEntry(RB, untagPointer(Ptr), Trace, Tid, Size, 0, 0);
}

void storeDeallocationStackMaybe(const Options &Options, void *Ptr,
u8 PrevTag, uptr Size) {
auto *Depot = getDepotIfEnabled(Options);
if (!Depot)
if (!UNLIKELY(Options.get(OptionBit::TrackAllocationStacks)))
return;
AllocationRingBuffer *RB = getRingBuffer();
if (!RB)
return;
Comment on lines +1335 to 1339
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just want to add some thought about inlining this.

I was asking for putting them to a function (the getDepotIfEnabled) to simplify the code. The reason I suggest that we inline it again is the names (getRingBuffer/getRingBufferIfEnabled) can be kind of confusing (like if it's disabled, why do we still allocate the ring buffer? until we know it can be disabled temporarily)

I'm still thinking that we can reduce the check of OptionBit::TrackAllocationStacks by something like,

enum AllocationStackType {
  Primary,
  Secondary,
  Deallocation,
};
storeAllocationStack(AllocationStackType Type, ...) {
  if (!UNLIKELY(Options.get(OptionBit::TrackAllocationStacks)))
      return;
  /* call the functions respectively */
}

And/or we can simplify some logic as well. For example, storePrimaryAllocationStackMaybe/storeSecondaryAllocationStackMaybe seem to have some same logic.

We don't need to do the change in this one but I would like to have further refactoring in the future (I can do it if you don't have bandwidth for this)

auto *Ptr32 = reinterpret_cast<u32 *>(Ptr);
u32 AllocationTrace = Ptr32[MemTagAllocationTraceIndex];
u32 AllocationTid = Ptr32[MemTagAllocationTidIndex];

u32 DeallocationTrace = collectStackTrace(Depot);
u32 DeallocationTrace = collectStackTrace(RB->Depot);
u32 DeallocationTid = getThreadID();

storeRingBufferEntry(addFixedTag(untagPointer(Ptr), PrevTag),
storeRingBufferEntry(RB, addFixedTag(untagPointer(Ptr), PrevTag),
AllocationTrace, AllocationTid, Size,
DeallocationTrace, DeallocationTid);
}
Expand Down Expand Up @@ -1434,7 +1453,7 @@ class Allocator {
for (uptr I = Pos - 1; I != Pos - 1 - RingBufferElements &&
NextErrorReport != NumErrorReports;
--I) {
auto *Entry = getRingBufferEntry(RingBufferPtr, I % RingBufferElements);
auto *Entry = getRingBufferEntry(RingBuffer, I % RingBufferElements);
uptr EntryPtr = atomic_load_relaxed(&Entry->Ptr);
if (!EntryPtr)
continue;
Expand Down Expand Up @@ -1502,14 +1521,18 @@ class Allocator {
}

static typename AllocationRingBuffer::Entry *
getRingBufferEntry(char *RawRingBuffer, uptr N) {
getRingBufferEntry(AllocationRingBuffer *RB, uptr N) {
char *RBEntryStart =
&reinterpret_cast<char *>(RB)[sizeof(AllocationRingBuffer)];
return &reinterpret_cast<typename AllocationRingBuffer::Entry *>(
&RawRingBuffer[sizeof(AllocationRingBuffer)])[N];
RBEntryStart)[N];
}
static const typename AllocationRingBuffer::Entry *
getRingBufferEntry(const char *RawRingBuffer, uptr N) {
getRingBufferEntry(const AllocationRingBuffer *RB, uptr N) {
const char *RBEntryStart =
&reinterpret_cast<const char *>(RB)[sizeof(AllocationRingBuffer)];
return &reinterpret_cast<const typename AllocationRingBuffer::Entry *>(
&RawRingBuffer[sizeof(AllocationRingBuffer)])[N];
RBEntryStart)[N];
}

void mapAndInitializeRingBuffer() {
Expand Down Expand Up @@ -1544,42 +1567,47 @@ class Allocator {
u32 RingSize = static_cast<u32>(TabSize * kFramesPerStack);
DCHECK(isPowerOfTwo(RingSize));

StackDepotSize = sizeof(StackDepot) + sizeof(atomic_u64) * RingSize +
sizeof(atomic_u32) * TabSize;
uptr StackDepotSize = sizeof(StackDepot) + sizeof(atomic_u64) * RingSize +
sizeof(atomic_u32) * TabSize;
MemMapT DepotMap;
DepotMap.map(
/*Addr=*/0U, roundUp(StackDepotSize, getPageSizeCached()),
"scudo:stack_depot");
Depot = reinterpret_cast<StackDepot *>(DepotMap.getBase());
auto *Depot = reinterpret_cast<StackDepot *>(DepotMap.getBase());
Depot->init(RingSize, TabSize);
RawStackDepotMap = DepotMap;

MemMapT MemMap;
MemMap.map(
/*Addr=*/0U,
roundUp(ringBufferSizeInBytes(AllocationRingBufferSize),
getPageSizeCached()),
"scudo:ring_buffer");
RawRingBuffer = reinterpret_cast<char *>(MemMap.getBase());
RawRingBufferMap = MemMap;
RingBufferElements = AllocationRingBufferSize;
auto *RB = reinterpret_cast<AllocationRingBuffer *>(MemMap.getBase());
RB->RawRingBufferMap = MemMap;
RB->RingBufferElements = AllocationRingBufferSize;
RB->Depot = Depot;
RB->StackDepotSize = StackDepotSize;
RB->RawStackDepotMap = DepotMap;

atomic_store(&RingBufferAddress, reinterpret_cast<uptr>(RB),
memory_order_release);
static_assert(sizeof(AllocationRingBuffer) %
alignof(typename AllocationRingBuffer::Entry) ==
0,
"invalid alignment");
}

void unmapRingBuffer() {
auto *RingBuffer = getRingBuffer();
if (RingBuffer != nullptr) {
RawRingBufferMap.unmap(RawRingBufferMap.getBase(),
RawRingBufferMap.getCapacity());
}
RawRingBuffer = nullptr;
if (Depot) {
RawStackDepotMap.unmap(RawStackDepotMap.getBase(),
RawStackDepotMap.getCapacity());
}
AllocationRingBuffer *RB = getRingBuffer();
if (RB == nullptr)
return;
// N.B. because RawStackDepotMap is part of RawRingBufferMap, the order
// is very important.
RB->RawStackDepotMap.unmap(RB->RawStackDepotMap.getBase(),
RB->RawStackDepotMap.getCapacity());
RB->RawRingBufferMap.unmap(RB->RawRingBufferMap.getBase(),
RB->RawRingBufferMap.getCapacity());
atomic_store(&RingBufferAddress, 0, memory_order_release);
}

static constexpr size_t ringBufferSizeInBytes(u32 RingBufferElements) {
Expand All @@ -1594,10 +1622,6 @@ class Allocator {
return (Bytes - sizeof(AllocationRingBuffer)) /
sizeof(typename AllocationRingBuffer::Entry);
}

inline AllocationRingBuffer *getRingBuffer() {
return reinterpret_cast<AllocationRingBuffer *>(RawRingBuffer);
}
};

} // namespace scudo
Expand Down