Skip to content

Commit 27dc247

Browse files
authored
[scudo] Add partial chunk heuristic to retrieval algorithm. (#104807)
Previously the secondary cache retrieval algorithm would not allow retrievals of memory chunks where the number of unused bytes would be greater than than `MaxUnusedCachePages * PageSize` bytes. This meant that even if a memory chunk satisfied the requirements of the optimal fit algorithm, it may not be returned. This remains true if memory tagging is enabled. However, if memory tagging is disabled, a new heuristic has been put in place. Specifically, If a memory chunk is a non-optimal fit, the cache retrieval algorithm will attempt to release the excess memory to force a cache hit while keeping RSS down. In the event that a memory chunk is a non-optimal fit, the retrieval algorithm will release excess memory as long as the amount of memory to be released is less than or equal to 16 KB. If the amount of memory to be released exceeds 16 KB, the retrieval algorithm will not consider that cached memory chunk valid for retrieval.
1 parent 43b5085 commit 27dc247

File tree

2 files changed

+123
-34
lines changed

2 files changed

+123
-34
lines changed

compiler-rt/lib/scudo/standalone/secondary.h

Lines changed: 91 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,14 @@ namespace {
7272
struct CachedBlock {
7373
static constexpr u16 CacheIndexMax = UINT16_MAX;
7474
static constexpr u16 InvalidEntry = CacheIndexMax;
75+
// * ReleaseMemoryUpperBound default is currently 16 KB
76+
// - We arrived at this value after noticing that mapping
77+
// in larger memory regions performs better than releasing
78+
// memory and forcing a cache hit. According to the data,
79+
// it suggests that beyond 16KB, the release execution time is
80+
// longer than the map execution time. In this way, the default
81+
// is dependent on the platform.
82+
static constexpr uptr ReleaseMemoryUpperBound = 1 << 14;
7583

7684
uptr CommitBase = 0;
7785
uptr CommitSize = 0;
@@ -90,8 +98,9 @@ struct CachedBlock {
9098
template <typename Config> class MapAllocatorNoCache {
9199
public:
92100
void init(UNUSED s32 ReleaseToOsInterval) {}
93-
CachedBlock retrieve(UNUSED uptr Size, UNUSED uptr Alignment,
94-
UNUSED uptr HeadersSize, UNUSED uptr &EntryHeaderPos) {
101+
CachedBlock retrieve(UNUSED uptr MaxAllowedFragmentedBytes, UNUSED uptr Size,
102+
UNUSED uptr Alignment, UNUSED uptr HeadersSize,
103+
UNUSED uptr &EntryHeaderPos) {
95104
return {};
96105
}
97106
void store(UNUSED Options Options, UNUSED uptr CommitBase,
@@ -334,61 +343,103 @@ class MapAllocatorCache {
334343
}
335344
}
336345

337-
CachedBlock retrieve(uptr Size, uptr Alignment, uptr HeadersSize,
338-
uptr &EntryHeaderPos) EXCLUDES(Mutex) {
346+
CachedBlock retrieve(uptr MaxAllowedFragmentedBytes, uptr Size,
347+
uptr Alignment, uptr HeadersSize, uptr &EntryHeaderPos)
348+
EXCLUDES(Mutex) {
339349
const uptr PageSize = getPageSizeCached();
340350
// 10% of the requested size proved to be the optimal choice for
341351
// retrieving cached blocks after testing several options.
342352
constexpr u32 FragmentedBytesDivisor = 10;
343-
bool Found = false;
353+
bool FoundOptimalFit = false;
344354
CachedBlock Entry;
345355
EntryHeaderPos = 0;
346356
{
347357
ScopedLock L(Mutex);
348358
CallsToRetrieve++;
349359
if (EntriesCount == 0)
350360
return {};
351-
u32 OptimalFitIndex = 0;
361+
u16 RetrievedIndex = CachedBlock::InvalidEntry;
352362
uptr MinDiff = UINTPTR_MAX;
353-
for (u32 I = LRUHead; I != CachedBlock::InvalidEntry;
363+
364+
// Since allocation sizes don't always match cached memory chunk sizes
365+
// we allow some memory to be unused (called fragmented bytes). The
366+
// amount of unused bytes is exactly EntryHeaderPos - CommitBase.
367+
//
368+
// CommitBase CommitBase + CommitSize
369+
// V V
370+
// +---+------------+-----------------+---+
371+
// | | | | |
372+
// +---+------------+-----------------+---+
373+
// ^ ^ ^
374+
// Guard EntryHeaderPos Guard-page-end
375+
// page-begin
376+
//
377+
// [EntryHeaderPos, CommitBase + CommitSize) contains the user data as
378+
// well as the header metadata. If EntryHeaderPos - CommitBase exceeds
379+
// MaxAllowedFragmentedBytes, the cached memory chunk is not considered
380+
// valid for retrieval.
381+
for (u16 I = LRUHead; I != CachedBlock::InvalidEntry;
354382
I = Entries[I].Next) {
355383
const uptr CommitBase = Entries[I].CommitBase;
356384
const uptr CommitSize = Entries[I].CommitSize;
357385
const uptr AllocPos =
358386
roundDown(CommitBase + CommitSize - Size, Alignment);
359387
const uptr HeaderPos = AllocPos - HeadersSize;
360-
if (HeaderPos > CommitBase + CommitSize)
388+
if (HeaderPos > CommitBase + CommitSize || HeaderPos < CommitBase)
361389
continue;
362-
if (HeaderPos < CommitBase ||
363-
AllocPos > CommitBase + PageSize * MaxUnusedCachePages) {
390+
391+
const uptr Diff = roundDown(HeaderPos, PageSize) - CommitBase;
392+
393+
if (Diff > MaxAllowedFragmentedBytes || Diff >= MinDiff)
364394
continue;
365-
}
366-
Found = true;
367-
const uptr Diff = HeaderPos - CommitBase;
368-
// immediately use a cached block if it's size is close enough to the
369-
// requested size.
370-
const uptr MaxAllowedFragmentedBytes =
395+
396+
MinDiff = Diff;
397+
RetrievedIndex = I;
398+
EntryHeaderPos = HeaderPos;
399+
400+
const uptr OptimalFitThesholdBytes =
371401
(CommitBase + CommitSize - HeaderPos) / FragmentedBytesDivisor;
372-
if (Diff <= MaxAllowedFragmentedBytes) {
373-
OptimalFitIndex = I;
374-
EntryHeaderPos = HeaderPos;
402+
if (Diff <= OptimalFitThesholdBytes) {
403+
FoundOptimalFit = true;
375404
break;
376405
}
377-
// keep track of the smallest cached block
378-
// that is greater than (AllocSize + HeaderSize)
379-
if (Diff > MinDiff)
380-
continue;
381-
OptimalFitIndex = I;
382-
MinDiff = Diff;
383-
EntryHeaderPos = HeaderPos;
384406
}
385-
if (Found) {
386-
Entry = Entries[OptimalFitIndex];
387-
remove(OptimalFitIndex);
407+
if (RetrievedIndex != CachedBlock::InvalidEntry) {
408+
Entry = Entries[RetrievedIndex];
409+
remove(RetrievedIndex);
388410
SuccessfulRetrieves++;
389411
}
390412
}
391413

414+
// The difference between the retrieved memory chunk and the request
415+
// size is at most MaxAllowedFragmentedBytes
416+
//
417+
// / MaxAllowedFragmentedBytes \
418+
// +--------------------------+-----------+
419+
// | | |
420+
// +--------------------------+-----------+
421+
// \ Bytes to be released / ^
422+
// |
423+
// (may or may not be commited)
424+
//
425+
// The maximum number of bytes released to the OS is capped by
426+
// ReleaseMemoryUpperBound
427+
//
428+
// TODO : Consider making ReleaseMemoryUpperBound configurable since
429+
// the release to OS API can vary across systems.
430+
if (!FoundOptimalFit && Entry.Time != 0) {
431+
const uptr FragmentedBytes =
432+
roundDown(EntryHeaderPos, PageSize) - Entry.CommitBase;
433+
const uptr MaxUnusedCacheBytes = MaxUnusedCachePages * PageSize;
434+
if (FragmentedBytes > MaxUnusedCacheBytes) {
435+
uptr BytesToRelease =
436+
roundUp(Min<uptr>(CachedBlock::ReleaseMemoryUpperBound,
437+
FragmentedBytes - MaxUnusedCacheBytes),
438+
PageSize);
439+
Entry.MemMap.releaseAndZeroPagesToOS(Entry.CommitBase, BytesToRelease);
440+
}
441+
}
442+
392443
return Entry;
393444
}
394445

@@ -659,8 +710,18 @@ MapAllocator<Config>::tryAllocateFromCache(const Options &Options, uptr Size,
659710
FillContentsMode FillContents) {
660711
CachedBlock Entry;
661712
uptr EntryHeaderPos;
713+
uptr MaxAllowedFragmentedBytes;
714+
const uptr PageSize = getPageSizeCached();
715+
716+
if (LIKELY(!useMemoryTagging<Config>(Options))) {
717+
MaxAllowedFragmentedBytes =
718+
MaxUnusedCachePages * PageSize + CachedBlock::ReleaseMemoryUpperBound;
719+
} else {
720+
MaxAllowedFragmentedBytes = MaxUnusedCachePages * PageSize;
721+
}
662722

663-
Entry = Cache.retrieve(Size, Alignment, getHeadersSize(), EntryHeaderPos);
723+
Entry = Cache.retrieve(MaxAllowedFragmentedBytes, Size, Alignment,
724+
getHeadersSize(), EntryHeaderPos);
664725
if (!Entry.isValid())
665726
return nullptr;
666727

compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -281,8 +281,8 @@ struct MapAllocatorCacheTest : public Test {
281281
std::unique_ptr<CacheT> Cache = std::make_unique<CacheT>();
282282

283283
const scudo::uptr PageSize = scudo::getPageSizeCached();
284-
// The current test allocation size is set to the minimum size
285-
// needed for the scudo allocator to fall back to the secondary allocator
284+
// The current test allocation size is set to the maximum
285+
// cache entry size
286286
static constexpr scudo::uptr TestAllocSize =
287287
CacheConfig::getDefaultMaxEntrySize();
288288

@@ -327,7 +327,8 @@ TEST_F(MapAllocatorCacheTest, CacheOrder) {
327327
for (scudo::uptr I = CacheConfig::getEntriesArraySize(); I > 0; I--) {
328328
scudo::uptr EntryHeaderPos;
329329
scudo::CachedBlock Entry =
330-
Cache->retrieve(TestAllocSize, PageSize, 0, EntryHeaderPos);
330+
Cache->retrieve(scudo::MaxUnusedCachePages * PageSize, TestAllocSize,
331+
PageSize, 0, EntryHeaderPos);
331332
EXPECT_EQ(Entry.MemMap.getBase(), MemMaps[I - 1].getBase());
332333
}
333334

@@ -336,6 +337,32 @@ TEST_F(MapAllocatorCacheTest, CacheOrder) {
336337
MemMap.unmap();
337338
}
338339

340+
TEST_F(MapAllocatorCacheTest, PartialChunkHeuristicRetrievalTest) {
341+
const scudo::uptr MaxUnusedCacheBytes = PageSize;
342+
const scudo::uptr FragmentedBytes =
343+
MaxUnusedCacheBytes + scudo::CachedBlock::ReleaseMemoryUpperBound;
344+
345+
scudo::uptr EntryHeaderPos;
346+
scudo::CachedBlock Entry;
347+
scudo::MemMapT MemMap = allocate(PageSize + FragmentedBytes);
348+
Cache->store(Options, MemMap.getBase(), MemMap.getCapacity(),
349+
MemMap.getBase(), MemMap);
350+
351+
// FragmentedBytes > MaxAllowedFragmentedBytes so PageSize
352+
// cannot be retrieved from the cache
353+
Entry = Cache->retrieve(/*MaxAllowedFragmentedBytes=*/0, PageSize, PageSize,
354+
0, EntryHeaderPos);
355+
EXPECT_FALSE(Entry.isValid());
356+
357+
// FragmentedBytes <= MaxAllowedFragmentedBytes so PageSize
358+
// can be retrieved from the cache
359+
Entry =
360+
Cache->retrieve(FragmentedBytes, PageSize, PageSize, 0, EntryHeaderPos);
361+
EXPECT_TRUE(Entry.isValid());
362+
363+
MemMap.unmap();
364+
}
365+
339366
TEST_F(MapAllocatorCacheTest, MemoryLeakTest) {
340367
std::vector<scudo::MemMapT> MemMaps;
341368
// Fill the cache above MaxEntriesCount to force an eviction
@@ -351,7 +378,8 @@ TEST_F(MapAllocatorCacheTest, MemoryLeakTest) {
351378
for (scudo::uptr I = CacheConfig::getDefaultMaxEntriesCount(); I > 0; I--) {
352379
scudo::uptr EntryHeaderPos;
353380
RetrievedEntries.push_back(
354-
Cache->retrieve(TestAllocSize, PageSize, 0, EntryHeaderPos));
381+
Cache->retrieve(scudo::MaxUnusedCachePages * PageSize, TestAllocSize,
382+
PageSize, 0, EntryHeaderPos));
355383
EXPECT_EQ(MemMaps[I].getBase(), RetrievedEntries.back().MemMap.getBase());
356384
}
357385

0 commit comments

Comments
 (0)