@@ -72,6 +72,14 @@ namespace {
72
72
struct CachedBlock {
73
73
static constexpr u16 CacheIndexMax = UINT16_MAX;
74
74
static constexpr u16 InvalidEntry = CacheIndexMax;
75
+ // * ReleaseMemoryUpperBound default is currently 16 KB
76
+ // - We arrived at this value after noticing that mapping
77
+ // in larger memory regions performs better than releasing
78
+ // memory and forcing a cache hit. According to the data,
79
+ // it suggests that beyond 16KB, the release execution time is
80
+ // longer than the map execution time. In this way, the default
81
+ // is dependent on the platform.
82
+ static constexpr uptr ReleaseMemoryUpperBound = 1 << 14 ;
75
83
76
84
uptr CommitBase = 0 ;
77
85
uptr CommitSize = 0 ;
@@ -90,8 +98,9 @@ struct CachedBlock {
90
98
template <typename Config> class MapAllocatorNoCache {
91
99
public:
92
100
void init (UNUSED s32 ReleaseToOsInterval) {}
93
- CachedBlock retrieve (UNUSED uptr Size, UNUSED uptr Alignment,
94
- UNUSED uptr HeadersSize, UNUSED uptr &EntryHeaderPos) {
101
+ CachedBlock retrieve (UNUSED uptr MaxAllowedFragmentedBytes, UNUSED uptr Size,
102
+ UNUSED uptr Alignment, UNUSED uptr HeadersSize,
103
+ UNUSED uptr &EntryHeaderPos) {
95
104
return {};
96
105
}
97
106
void store (UNUSED Options Options, UNUSED uptr CommitBase,
@@ -334,61 +343,103 @@ class MapAllocatorCache {
334
343
}
335
344
}
336
345
337
- CachedBlock retrieve (uptr Size, uptr Alignment, uptr HeadersSize,
338
- uptr &EntryHeaderPos) EXCLUDES(Mutex) {
346
+ CachedBlock retrieve (uptr MaxAllowedFragmentedBytes, uptr Size,
347
+ uptr Alignment, uptr HeadersSize, uptr &EntryHeaderPos)
348
+ EXCLUDES(Mutex) {
339
349
const uptr PageSize = getPageSizeCached ();
340
350
// 10% of the requested size proved to be the optimal choice for
341
351
// retrieving cached blocks after testing several options.
342
352
constexpr u32 FragmentedBytesDivisor = 10 ;
343
- bool Found = false ;
353
+ bool FoundOptimalFit = false ;
344
354
CachedBlock Entry;
345
355
EntryHeaderPos = 0 ;
346
356
{
347
357
ScopedLock L (Mutex);
348
358
CallsToRetrieve++;
349
359
if (EntriesCount == 0 )
350
360
return {};
351
- u32 OptimalFitIndex = 0 ;
361
+ u16 RetrievedIndex = CachedBlock::InvalidEntry ;
352
362
uptr MinDiff = UINTPTR_MAX;
353
- for (u32 I = LRUHead; I != CachedBlock::InvalidEntry;
363
+
364
+ // Since allocation sizes don't always match cached memory chunk sizes
365
+ // we allow some memory to be unused (called fragmented bytes). The
366
+ // amount of unused bytes is exactly EntryHeaderPos - CommitBase.
367
+ //
368
+ // CommitBase CommitBase + CommitSize
369
+ // V V
370
+ // +---+------------+-----------------+---+
371
+ // | | | | |
372
+ // +---+------------+-----------------+---+
373
+ // ^ ^ ^
374
+ // Guard EntryHeaderPos Guard-page-end
375
+ // page-begin
376
+ //
377
+ // [EntryHeaderPos, CommitBase + CommitSize) contains the user data as
378
+ // well as the header metadata. If EntryHeaderPos - CommitBase exceeds
379
+ // MaxAllowedFragmentedBytes, the cached memory chunk is not considered
380
+ // valid for retrieval.
381
+ for (u16 I = LRUHead; I != CachedBlock::InvalidEntry;
354
382
I = Entries[I].Next ) {
355
383
const uptr CommitBase = Entries[I].CommitBase ;
356
384
const uptr CommitSize = Entries[I].CommitSize ;
357
385
const uptr AllocPos =
358
386
roundDown (CommitBase + CommitSize - Size, Alignment);
359
387
const uptr HeaderPos = AllocPos - HeadersSize;
360
- if (HeaderPos > CommitBase + CommitSize)
388
+ if (HeaderPos > CommitBase + CommitSize || HeaderPos < CommitBase )
361
389
continue ;
362
- if (HeaderPos < CommitBase ||
363
- AllocPos > CommitBase + PageSize * MaxUnusedCachePages) {
390
+
391
+ const uptr Diff = roundDown (HeaderPos, PageSize) - CommitBase;
392
+
393
+ if (Diff > MaxAllowedFragmentedBytes || Diff >= MinDiff)
364
394
continue ;
365
- }
366
- Found = true ;
367
- const uptr Diff = HeaderPos - CommitBase ;
368
- // immediately use a cached block if it's size is close enough to the
369
- // requested size.
370
- const uptr MaxAllowedFragmentedBytes =
395
+
396
+ MinDiff = Diff ;
397
+ RetrievedIndex = I ;
398
+ EntryHeaderPos = HeaderPos;
399
+
400
+ const uptr OptimalFitThesholdBytes =
371
401
(CommitBase + CommitSize - HeaderPos) / FragmentedBytesDivisor;
372
- if (Diff <= MaxAllowedFragmentedBytes) {
373
- OptimalFitIndex = I;
374
- EntryHeaderPos = HeaderPos;
402
+ if (Diff <= OptimalFitThesholdBytes) {
403
+ FoundOptimalFit = true ;
375
404
break ;
376
405
}
377
- // keep track of the smallest cached block
378
- // that is greater than (AllocSize + HeaderSize)
379
- if (Diff > MinDiff)
380
- continue ;
381
- OptimalFitIndex = I;
382
- MinDiff = Diff;
383
- EntryHeaderPos = HeaderPos;
384
406
}
385
- if (Found ) {
386
- Entry = Entries[OptimalFitIndex ];
387
- remove (OptimalFitIndex );
407
+ if (RetrievedIndex != CachedBlock::InvalidEntry ) {
408
+ Entry = Entries[RetrievedIndex ];
409
+ remove (RetrievedIndex );
388
410
SuccessfulRetrieves++;
389
411
}
390
412
}
391
413
414
+ // The difference between the retrieved memory chunk and the request
415
+ // size is at most MaxAllowedFragmentedBytes
416
+ //
417
+ // / MaxAllowedFragmentedBytes \
418
+ // +--------------------------+-----------+
419
+ // | | |
420
+ // +--------------------------+-----------+
421
+ // \ Bytes to be released / ^
422
+ // |
423
+ // (may or may not be commited)
424
+ //
425
+ // The maximum number of bytes released to the OS is capped by
426
+ // ReleaseMemoryUpperBound
427
+ //
428
+ // TODO : Consider making ReleaseMemoryUpperBound configurable since
429
+ // the release to OS API can vary across systems.
430
+ if (!FoundOptimalFit && Entry.Time != 0 ) {
431
+ const uptr FragmentedBytes =
432
+ roundDown (EntryHeaderPos, PageSize) - Entry.CommitBase ;
433
+ const uptr MaxUnusedCacheBytes = MaxUnusedCachePages * PageSize;
434
+ if (FragmentedBytes > MaxUnusedCacheBytes) {
435
+ uptr BytesToRelease =
436
+ roundUp (Min<uptr>(CachedBlock::ReleaseMemoryUpperBound,
437
+ FragmentedBytes - MaxUnusedCacheBytes),
438
+ PageSize);
439
+ Entry.MemMap .releaseAndZeroPagesToOS (Entry.CommitBase , BytesToRelease);
440
+ }
441
+ }
442
+
392
443
return Entry;
393
444
}
394
445
@@ -659,8 +710,18 @@ MapAllocator<Config>::tryAllocateFromCache(const Options &Options, uptr Size,
659
710
FillContentsMode FillContents) {
660
711
CachedBlock Entry;
661
712
uptr EntryHeaderPos;
713
+ uptr MaxAllowedFragmentedBytes;
714
+ const uptr PageSize = getPageSizeCached ();
715
+
716
+ if (LIKELY (!useMemoryTagging<Config>(Options))) {
717
+ MaxAllowedFragmentedBytes =
718
+ MaxUnusedCachePages * PageSize + CachedBlock::ReleaseMemoryUpperBound;
719
+ } else {
720
+ MaxAllowedFragmentedBytes = MaxUnusedCachePages * PageSize;
721
+ }
662
722
663
- Entry = Cache.retrieve (Size, Alignment, getHeadersSize (), EntryHeaderPos);
723
+ Entry = Cache.retrieve (MaxAllowedFragmentedBytes, Size, Alignment,
724
+ getHeadersSize (), EntryHeaderPos);
664
725
if (!Entry.isValid ())
665
726
return nullptr ;
666
727
0 commit comments