Skip to content

Commit 1865c7e

Browse files
authored
[scudo] Store more blocks in each TransferBatch (#70390)
Instead of always storing the same number of blocks as cached, we prefer increasing the utilization by saving more blocks in a single TransferBatch. This may slightly impact the performance, but it will save a lot of memory used by BatchClassId (especially for larger blocks).
1 parent ce4740d commit 1865c7e

File tree

4 files changed

+159
-141
lines changed

4 files changed

+159
-141
lines changed

compiler-rt/lib/scudo/standalone/allocator_common.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ template <class SizeClassAllocator> struct TransferBatch {
4040
B->Count = static_cast<u16>(B->Count - N);
4141
}
4242
void clear() { Count = 0; }
43+
bool empty() { return Count == 0; }
4344
void add(CompactPtrT P) {
4445
DCHECK_LT(Count, MaxNumCached);
4546
Batch[Count++] = P;
@@ -48,6 +49,12 @@ template <class SizeClassAllocator> struct TransferBatch {
4849
memcpy(Array, Batch, sizeof(Batch[0]) * Count);
4950
clear();
5051
}
52+
53+
void moveNToArray(CompactPtrT *Array, u16 N) {
54+
DCHECK_LE(N, Count);
55+
memcpy(Array, Batch + Count - N, sizeof(Batch[0]) * Count);
56+
Count -= N;
57+
}
5158
u16 getCount() const { return Count; }
5259
bool isEmpty() const { return Count == 0U; }
5360
CompactPtrT get(u16 I) const {

compiler-rt/lib/scudo/standalone/primary32.h

Lines changed: 55 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -191,38 +191,21 @@ template <typename Config> class SizeClassAllocator32 {
191191
return BlockSize > PageSize;
192192
}
193193

194-
// Note that the `MaxBlockCount` will be used when we support arbitrary blocks
195-
// count. Now it's the same as the number of blocks stored in the
196-
// `TransferBatch`.
197194
u16 popBlocks(CacheT *C, uptr ClassId, CompactPtrT *ToArray,
198-
UNUSED const u16 MaxBlockCount) {
199-
TransferBatchT *B = popBatch(C, ClassId);
200-
if (!B)
201-
return 0;
202-
203-
const u16 Count = B->getCount();
204-
DCHECK_GT(Count, 0U);
205-
B->moveToArray(ToArray);
206-
207-
if (ClassId != SizeClassMap::BatchClassId)
208-
C->deallocate(SizeClassMap::BatchClassId, B);
209-
210-
return Count;
211-
}
212-
213-
TransferBatchT *popBatch(CacheT *C, uptr ClassId) {
195+
const u16 MaxBlockCount) {
214196
DCHECK_LT(ClassId, NumClasses);
215197
SizeClassInfo *Sci = getSizeClassInfo(ClassId);
216198
ScopedLock L(Sci->Mutex);
217-
TransferBatchT *B = popBatchImpl(C, ClassId, Sci);
218-
if (UNLIKELY(!B)) {
199+
200+
u16 PopCount = popBlocksImpl(C, ClassId, Sci, ToArray, MaxBlockCount);
201+
if (UNLIKELY(PopCount == 0)) {
219202
if (UNLIKELY(!populateFreeList(C, ClassId, Sci)))
220-
return nullptr;
221-
B = popBatchImpl(C, ClassId, Sci);
222-
// if `populateFreeList` succeeded, we are supposed to get free blocks.
223-
DCHECK_NE(B, nullptr);
203+
return 0U;
204+
PopCount = popBlocksImpl(C, ClassId, Sci, ToArray, MaxBlockCount);
205+
DCHECK_NE(PopCount, 0U);
224206
}
225-
return B;
207+
208+
return PopCount;
226209
}
227210

228211
// Push the array of free blocks to the designated batch group.
@@ -510,7 +493,7 @@ template <typename Config> class SizeClassAllocator32 {
510493
// by TransferBatch is also free for use. We don't need to recycle the
511494
// TransferBatch. Note that the correctness is maintained by the invariant,
512495
//
513-
// The unit of each popBatch() request is entire TransferBatch. Return
496+
// Each popBlocks() request returns the entire TransferBatch. Returning
514497
// part of the blocks in a TransferBatch is invalid.
515498
//
516499
// This ensures that TransferBatch won't leak the address itself while it's
@@ -634,7 +617,7 @@ template <typename Config> class SizeClassAllocator32 {
634617
BG->Batches.push_front(TB);
635618
BG->PushedBlocks = 0;
636619
BG->BytesInBGAtLastCheckpoint = 0;
637-
BG->MaxCachedPerBatch = CacheT::getMaxCached(getSizeByClassId(ClassId));
620+
BG->MaxCachedPerBatch = TransferBatchT::MaxNumCached;
638621

639622
return BG;
640623
};
@@ -726,14 +709,11 @@ template <typename Config> class SizeClassAllocator32 {
726709
InsertBlocks(Cur, Array + Size - Count, Count);
727710
}
728711

729-
// Pop one TransferBatch from a BatchGroup. The BatchGroup with the smallest
730-
// group id will be considered first.
731-
//
732-
// The region mutex needs to be held while calling this method.
733-
TransferBatchT *popBatchImpl(CacheT *C, uptr ClassId, SizeClassInfo *Sci)
712+
u16 popBlocksImpl(CacheT *C, uptr ClassId, SizeClassInfo *Sci,
713+
CompactPtrT *ToArray, const u16 MaxBlockCount)
734714
REQUIRES(Sci->Mutex) {
735715
if (Sci->FreeListInfo.BlockList.empty())
736-
return nullptr;
716+
return 0U;
737717

738718
SinglyLinkedList<TransferBatchT> &Batches =
739719
Sci->FreeListInfo.BlockList.front()->Batches;
@@ -746,33 +726,57 @@ template <typename Config> class SizeClassAllocator32 {
746726
// Block used by `BatchGroup` is from BatchClassId. Turn the block into
747727
// `TransferBatch` with single block.
748728
TransferBatchT *TB = reinterpret_cast<TransferBatchT *>(BG);
749-
TB->clear();
750-
TB->add(
751-
compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(TB)));
729+
ToArray[0] =
730+
compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(TB));
752731
Sci->FreeListInfo.PoppedBlocks += 1;
753-
return TB;
732+
return 1U;
754733
}
755734

735+
// So far, instead of always filling the blocks to `MaxBlockCount`, we only
736+
// examine single `TransferBatch` to minimize the time spent on the primary
737+
// allocator. Besides, the sizes of `TransferBatch` and
738+
// `CacheT::getMaxCached()` may also impact the time spent on accessing the
739+
// primary allocator.
740+
// TODO(chiahungduan): Evaluate if we want to always prepare `MaxBlockCount`
741+
// blocks and/or adjust the size of `TransferBatch` according to
742+
// `CacheT::getMaxCached()`.
756743
TransferBatchT *B = Batches.front();
757-
Batches.pop_front();
758744
DCHECK_NE(B, nullptr);
759745
DCHECK_GT(B->getCount(), 0U);
760746

761-
if (Batches.empty()) {
762-
BatchGroupT *BG = Sci->FreeListInfo.BlockList.front();
763-
Sci->FreeListInfo.BlockList.pop_front();
764-
765-
// We don't keep BatchGroup with zero blocks to avoid empty-checking while
766-
// allocating. Note that block used by constructing BatchGroup is recorded
767-
// as free blocks in the last element of BatchGroup::Batches. Which means,
768-
// once we pop the last TransferBatch, the block is implicitly
769-
// deallocated.
747+
// BachClassId should always take all blocks in the TransferBatch. Read the
748+
// comment in `pushBatchClassBlocks()` for more details.
749+
const u16 PopCount = ClassId == SizeClassMap::BatchClassId
750+
? B->getCount()
751+
: Min(MaxBlockCount, B->getCount());
752+
B->moveNToArray(ToArray, PopCount);
753+
754+
// TODO(chiahungduan): The deallocation of unused BatchClassId blocks can be
755+
// done without holding `Mutex`.
756+
if (B->empty()) {
757+
Batches.pop_front();
758+
// `TransferBatch` of BatchClassId is self-contained, no need to
759+
// deallocate. Read the comment in `pushBatchClassBlocks()` for more
760+
// details.
770761
if (ClassId != SizeClassMap::BatchClassId)
771-
C->deallocate(SizeClassMap::BatchClassId, BG);
762+
C->deallocate(SizeClassMap::BatchClassId, B);
763+
764+
if (Batches.empty()) {
765+
BatchGroupT *BG = Sci->FreeListInfo.BlockList.front();
766+
Sci->FreeListInfo.BlockList.pop_front();
767+
768+
// We don't keep BatchGroup with zero blocks to avoid empty-checking
769+
// while allocating. Note that block used for constructing BatchGroup is
770+
// recorded as free blocks in the last element of BatchGroup::Batches.
771+
// Which means, once we pop the last TransferBatch, the block is
772+
// implicitly deallocated.
773+
if (ClassId != SizeClassMap::BatchClassId)
774+
C->deallocate(SizeClassMap::BatchClassId, BG);
775+
}
772776
}
773777

774-
Sci->FreeListInfo.PoppedBlocks += B->getCount();
775-
return B;
778+
Sci->FreeListInfo.PoppedBlocks += PopCount;
779+
return PopCount;
776780
}
777781

778782
NOINLINE bool populateFreeList(CacheT *C, uptr ClassId, SizeClassInfo *Sci)

0 commit comments

Comments
 (0)