@@ -72,6 +72,7 @@ namespace {
72
72
struct CachedBlock {
73
73
static constexpr u16 CacheIndexMax = UINT16_MAX;
74
74
static constexpr u16 InvalidEntry = CacheIndexMax;
75
+ static constexpr uptr ReleaseMemoryUpperBound = 1 << 14 ;
75
76
76
77
uptr CommitBase = 0 ;
77
78
uptr CommitSize = 0 ;
@@ -90,8 +91,9 @@ struct CachedBlock {
90
91
template <typename Config> class MapAllocatorNoCache {
91
92
public:
92
93
void init (UNUSED s32 ReleaseToOsInterval) {}
93
- CachedBlock retrieve (UNUSED uptr Size, UNUSED uptr Alignment,
94
- UNUSED uptr HeadersSize, UNUSED uptr &EntryHeaderPos) {
94
+ CachedBlock retrieve (UNUSED uptr MaxAllowedFragmentedBytes, UNUSED uptr Size,
95
+ UNUSED uptr Alignment, UNUSED uptr HeadersSize,
96
+ UNUSED uptr &EntryHeaderPos) {
95
97
return {};
96
98
}
97
99
void store (UNUSED Options Options, UNUSED uptr CommitBase,
@@ -334,61 +336,111 @@ class MapAllocatorCache {
334
336
}
335
337
}
336
338
337
- CachedBlock retrieve (uptr Size, uptr Alignment, uptr HeadersSize,
338
- uptr &EntryHeaderPos) EXCLUDES(Mutex) {
339
+ CachedBlock retrieve (uptr MaxAllowedFragmentedBytes, uptr Size,
340
+ uptr Alignment, uptr HeadersSize, uptr &EntryHeaderPos)
341
+ EXCLUDES(Mutex) {
339
342
const uptr PageSize = getPageSizeCached ();
340
343
// 10% of the requested size proved to be the optimal choice for
341
344
// retrieving cached blocks after testing several options.
342
345
constexpr u32 FragmentedBytesDivisor = 10 ;
343
- bool Found = false ;
346
+ bool FoundOptimalFit = false ;
344
347
CachedBlock Entry;
345
348
EntryHeaderPos = 0 ;
346
349
{
347
350
ScopedLock L (Mutex);
348
351
CallsToRetrieve++;
349
352
if (EntriesCount == 0 )
350
353
return {};
351
- u32 OptimalFitIndex = 0 ;
354
+ u16 RetrievedIndex = CachedBlock::InvalidEntry ;
352
355
uptr MinDiff = UINTPTR_MAX;
353
- for (u32 I = LRUHead; I != CachedBlock::InvalidEntry;
356
+
357
+ // Since allocation sizes don't always match cached memory chunk sizes
358
+ // we allow some memory to be unused (called fragmented bytes). The
359
+ // amount of unused bytes is exactly EntryHeaderPos - CommitBase.
360
+ //
361
+ // CommitBase CommitBase + CommitSize
362
+ // V V
363
+ // +---+------------+-----------------+---+
364
+ // | | | | |
365
+ // +---+------------+-----------------+---+
366
+ // ^ ^ ^
367
+ // Guard EntryHeaderPos Guard-page-end
368
+ // page-begin
369
+ //
370
+ // [EntryHeaderPos, CommitBase + CommitSize) contains the user data as
371
+ // well as the header metadata. If EntryHeaderPos - CommitBase exceeds
372
+ // MaxAllowedFragmentedBytes, the cached memory chunk is not considered
373
+ // valid for retrieval.
374
+ for (u16 I = LRUHead; I != CachedBlock::InvalidEntry;
354
375
I = Entries[I].Next ) {
355
376
const uptr CommitBase = Entries[I].CommitBase ;
356
377
const uptr CommitSize = Entries[I].CommitSize ;
357
378
const uptr AllocPos =
358
379
roundDown (CommitBase + CommitSize - Size, Alignment);
359
380
const uptr HeaderPos = AllocPos - HeadersSize;
360
- if (HeaderPos > CommitBase + CommitSize)
381
+ if (HeaderPos > CommitBase + CommitSize || HeaderPos < CommitBase )
361
382
continue ;
362
- if (HeaderPos < CommitBase ||
363
- AllocPos > CommitBase + PageSize * MaxUnusedCachePages) {
383
+
384
+ const uptr Diff = roundDown (HeaderPos, PageSize) - CommitBase;
385
+
386
+ if (Diff > MaxAllowedFragmentedBytes || Diff >= MinDiff)
364
387
continue ;
365
- }
366
- Found = true ;
367
- const uptr Diff = HeaderPos - CommitBase ;
368
- // immediately use a cached block if it's size is close enough to the
369
- // requested size.
370
- const uptr MaxAllowedFragmentedBytes =
388
+
389
+ MinDiff = Diff ;
390
+ RetrievedIndex = I ;
391
+ EntryHeaderPos = HeaderPos;
392
+
393
+ const uptr OptimalFitThesholdBytes =
371
394
(CommitBase + CommitSize - HeaderPos) / FragmentedBytesDivisor;
372
- if (Diff <= MaxAllowedFragmentedBytes) {
373
- OptimalFitIndex = I;
374
- EntryHeaderPos = HeaderPos;
395
+ if (Diff <= OptimalFitThesholdBytes) {
396
+ FoundOptimalFit = true ;
375
397
break ;
376
398
}
377
- // keep track of the smallest cached block
378
- // that is greater than (AllocSize + HeaderSize)
379
- if (Diff > MinDiff)
380
- continue ;
381
- OptimalFitIndex = I;
382
- MinDiff = Diff;
383
- EntryHeaderPos = HeaderPos;
384
399
}
385
- if (Found ) {
386
- Entry = Entries[OptimalFitIndex ];
387
- remove (OptimalFitIndex );
400
+ if (RetrievedIndex != CachedBlock::InvalidEntry ) {
401
+ Entry = Entries[RetrievedIndex ];
402
+ remove (RetrievedIndex );
388
403
SuccessfulRetrieves++;
389
404
}
390
405
}
391
406
407
+ // The difference between the retrieved memory chunk and the request
408
+ // size is at most MaxAllowedFragmentedBytes
409
+ //
410
+ // / MaxAllowedFragmentedBytes \
411
+ // +--------------------------+-----------+
412
+ // | | |
413
+ // +--------------------------+-----------+
414
+ // \ Bytes to be released / ^
415
+ // |
416
+ // (may or may not have commited)
417
+ //
418
+ // The maximum number of bytes released to the OS is capped by
419
+ // ReleaseMemoryUpperBound
420
+ //
421
+ // * ReleaseMemoryUpperBound default is currently 16 KB
422
+ // - We arrived at this value after noticing that mapping
423
+ // in larger memory regions performs better than releasing
424
+ // memory and forcing a cache hit. According to the data,
425
+ // it suggests that beyond 16KB, the release execution time is
426
+ // longer than the map execution time. In this way, the default
427
+ // is dependent on the platform.
428
+ //
429
+ // TODO : Considering to make ReleaseMemoryUpperBound configurable since
430
+ // the release to OS API can vary across systems.
431
+ if (!FoundOptimalFit && Entry.Time != 0 ) {
432
+ const uptr FragmentedBytes =
433
+ roundDown (EntryHeaderPos, PageSize) - Entry.CommitBase ;
434
+ const uptr MaxUnusedCacheBytes = MaxUnusedCachePages * PageSize;
435
+ if (FragmentedBytes > MaxUnusedCacheBytes) {
436
+ uptr BytesToRelease =
437
+ roundUp (Min<uptr>(CachedBlock::ReleaseMemoryUpperBound,
438
+ FragmentedBytes - MaxUnusedCacheBytes),
439
+ PageSize);
440
+ Entry.MemMap .releaseAndZeroPagesToOS (Entry.CommitBase , BytesToRelease);
441
+ }
442
+ }
443
+
392
444
return Entry;
393
445
}
394
446
@@ -659,8 +711,18 @@ MapAllocator<Config>::tryAllocateFromCache(const Options &Options, uptr Size,
659
711
FillContentsMode FillContents) {
660
712
CachedBlock Entry;
661
713
uptr EntryHeaderPos;
714
+ uptr MaxAllowedFragmentedBytes;
715
+ const uptr PageSize = getPageSizeCached ();
716
+
717
+ if (UNLIKELY (useMemoryTagging<Config>(Options))) {
718
+ MaxAllowedFragmentedBytes = MaxUnusedCachePages * PageSize;
719
+ } else {
720
+ MaxAllowedFragmentedBytes =
721
+ MaxUnusedCachePages * PageSize + CachedBlock::ReleaseMemoryUpperBound;
722
+ }
662
723
663
- Entry = Cache.retrieve (Size, Alignment, getHeadersSize (), EntryHeaderPos);
724
+ Entry = Cache.retrieve (MaxAllowedFragmentedBytes, Size, Alignment,
725
+ getHeadersSize (), EntryHeaderPos);
664
726
if (!Entry.isValid ())
665
727
return nullptr ;
666
728
0 commit comments