@@ -72,6 +72,7 @@ namespace {
72
72
struct CachedBlock {
73
73
static constexpr u16 CacheIndexMax = UINT16_MAX;
74
74
static constexpr u16 InvalidEntry = CacheIndexMax;
75
+ static constexpr uptr ReleaseMemoryUpperBound = 1 << 14 ;
75
76
76
77
uptr CommitBase = 0 ;
77
78
uptr CommitSize = 0 ;
@@ -90,8 +91,9 @@ struct CachedBlock {
90
91
template <typename Config> class MapAllocatorNoCache {
91
92
public:
92
93
void init (UNUSED s32 ReleaseToOsInterval) {}
93
- CachedBlock retrieve (UNUSED uptr Size, UNUSED uptr Alignment,
94
- UNUSED uptr HeadersSize, UNUSED uptr &EntryHeaderPos) {
94
+ CachedBlock retrieve (UNUSED uptr MaxAllowedFragmentedBytes, UNUSED uptr Size,
95
+ UNUSED uptr Alignment, UNUSED uptr HeadersSize,
96
+ UNUSED uptr &EntryHeaderPos) {
95
97
return {};
96
98
}
97
99
void store (UNUSED Options Options, UNUSED uptr CommitBase,
@@ -334,61 +336,110 @@ class MapAllocatorCache {
334
336
}
335
337
}
336
338
337
- CachedBlock retrieve (uptr Size, uptr Alignment, uptr HeadersSize,
338
- uptr &EntryHeaderPos) EXCLUDES(Mutex) {
339
+ CachedBlock retrieve (uptr MaxAllowedFragmentedBytes, uptr Size,
340
+ uptr Alignment, uptr HeadersSize, uptr &EntryHeaderPos)
341
+ EXCLUDES(Mutex) {
339
342
const uptr PageSize = getPageSizeCached ();
340
343
// 10% of the requested size proved to be the optimal choice for
341
344
// retrieving cached blocks after testing several options.
342
345
constexpr u32 FragmentedBytesDivisor = 10 ;
343
- bool Found = false ;
346
+ bool FoundOptimalFit = false ;
344
347
CachedBlock Entry;
345
348
EntryHeaderPos = 0 ;
346
349
{
347
350
ScopedLock L (Mutex);
348
351
CallsToRetrieve++;
349
352
if (EntriesCount == 0 )
350
353
return {};
351
- u32 OptimalFitIndex = 0 ;
354
+ u16 OptimalFitIndex = CachedBlock::InvalidEntry ;
352
355
uptr MinDiff = UINTPTR_MAX;
353
- for (u32 I = LRUHead; I != CachedBlock::InvalidEntry;
356
+
357
+ // Since allocation sizes don't always match cached memory chunk sizes
358
+ // we allow some memory to be unused (called fragmented bytes). The
359
+ // amount of unused bytes is exactly EntryHeaderPos - CommitBase.
360
+ //
361
+ // CommitBase CommitBase + CommitSize
362
+ // V V
363
+ // +---+------------+-----------------+---+
364
+ // | | | | |
365
+ // +---+------------+-----------------+---+
366
+ // ^ ^ ^
367
+ // Guard EntryHeaderPos Guard-page-end
368
+ // page-begin
369
+ //
370
+ // [EntryHeaderPos, CommitBase + CommitSize) contains the user data as
371
+ // well as the header metadata. If EntryHeaderPos - CommitBase exceeds
372
+ // MaxAllowedFragmentedBytes, the cached memory chunk is not considered
373
+ // valid for retrieval.
374
+ for (u16 I = LRUHead; I != CachedBlock::InvalidEntry;
354
375
I = Entries[I].Next ) {
355
376
const uptr CommitBase = Entries[I].CommitBase ;
356
377
const uptr CommitSize = Entries[I].CommitSize ;
357
378
const uptr AllocPos =
358
379
roundDown (CommitBase + CommitSize - Size, Alignment);
359
380
const uptr HeaderPos = AllocPos - HeadersSize;
360
- if (HeaderPos > CommitBase + CommitSize)
361
- continue ;
362
- if (HeaderPos < CommitBase ||
363
- AllocPos > CommitBase + PageSize * MaxUnusedCachePages) {
381
+ if (HeaderPos > CommitBase + CommitSize || HeaderPos < CommitBase)
364
382
continue ;
365
- }
366
- Found = true ;
383
+
367
384
const uptr Diff = HeaderPos - CommitBase;
368
- // immediately use a cached block if it's size is close enough to the
369
- // requested size.
370
- const uptr MaxAllowedFragmentedBytes =
371
- (CommitBase + CommitSize - HeaderPos) / FragmentedBytesDivisor;
372
- if (Diff <= MaxAllowedFragmentedBytes) {
373
- OptimalFitIndex = I;
374
- EntryHeaderPos = HeaderPos;
375
- break ;
376
- }
377
- // keep track of the smallest cached block
378
- // that is greater than (AllocSize + HeaderSize)
379
- if (Diff > MinDiff)
385
+
386
+ if (Diff > MaxAllowedFragmentedBytes || Diff >= MinDiff)
380
387
continue ;
381
- OptimalFitIndex = I;
388
+
382
389
MinDiff = Diff;
390
+ OptimalFitIndex = I;
383
391
EntryHeaderPos = HeaderPos;
392
+
393
+ const uptr OptimalFitThesholdBytes =
394
+ (CommitBase + CommitSize - HeaderPos) / FragmentedBytesDivisor;
395
+ if (Diff <= OptimalFitThesholdBytes) {
396
+ FoundOptimalFit = true ;
397
+ break ;
398
+ }
384
399
}
385
- if (Found ) {
400
+ if (OptimalFitIndex != CachedBlock::InvalidEntry ) {
386
401
Entry = Entries[OptimalFitIndex];
387
402
remove (OptimalFitIndex);
388
403
SuccessfulRetrieves++;
389
404
}
390
405
}
391
406
407
+ // The difference between the retrieved memory chunk and the request
408
+ // size is at most MaxAllowedFragmentedBytes
409
+ //
410
+ // / MaxAllowedFragmentedBytes \
411
+ // +--------------------------+-----------+
412
+ // | | |
413
+ // +--------------------------+-----------+
414
+ // \ Bytes to be released / ^
415
+ // |
416
+ // (may or may not have commited)
417
+ //
418
+ // The maximum number of bytes released to the OS is capped by
419
+ // ReleaseMemoryUpperBound
420
+ //
421
+ // * ReleaseMemoryUpperBound default is currently 16 KB
422
+ // - We arrived at this value after noticing that mapping
423
+ // in larger memory regions performs better than releasing
424
+ // memory and forcing a cache hit. According to the data,
425
+ // it suggests that beyond 16KB, the release execution time is
426
+ // longer than the map execution time. In this way, the default
427
+ // is dependent on the platform.
428
+ //
429
+ // TODO : Considering to make ReleaseMemoryUpperBound configurable since
430
+ // the release to OS API can vary across systems.
431
+ if (!FoundOptimalFit && Entry.Time != 0 ) {
432
+ const uptr FragmentedBytes = EntryHeaderPos - Entry.CommitBase ;
433
+ const uptr MaxUnusedCacheBytes = MaxUnusedCachePages * PageSize;
434
+ if (FragmentedBytes > MaxUnusedCacheBytes) {
435
+ uptr BytesToRelease =
436
+ roundUp (Min<uptr>(CachedBlock::ReleaseMemoryUpperBound,
437
+ FragmentedBytes - MaxUnusedCacheBytes),
438
+ PageSize);
439
+ Entry.MemMap .releaseAndZeroPagesToOS (Entry.CommitBase , BytesToRelease);
440
+ }
441
+ }
442
+
392
443
return Entry;
393
444
}
394
445
@@ -659,8 +710,15 @@ MapAllocator<Config>::tryAllocateFromCache(const Options &Options, uptr Size,
659
710
FillContentsMode FillContents) {
660
711
CachedBlock Entry;
661
712
uptr EntryHeaderPos;
713
+ uptr MaxAllowedFragmentedBytes;
714
+ const uptr PageSize = getPageSizeCached ();
715
+
716
+ MaxAllowedFragmentedBytes = MaxUnusedCachePages * PageSize;
717
+ if (LIKELY (!useMemoryTagging<Config>(Options)))
718
+ MaxAllowedFragmentedBytes += CachedBlock::ReleaseMemoryUpperBound;
662
719
663
- Entry = Cache.retrieve (Size, Alignment, getHeadersSize (), EntryHeaderPos);
720
+ Entry = Cache.retrieve (MaxAllowedFragmentedBytes, Size, Alignment,
721
+ getHeadersSize (), EntryHeaderPos);
664
722
if (!Entry.isValid ())
665
723
return nullptr ;
666
724
0 commit comments