-
Notifications
You must be signed in to change notification settings - Fork 14.3k
Revert "[scudo] Store more blocks in each TransferBatch" #83078
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This reverts commit 1865c7e.
@llvm/pr-subscribers-compiler-rt-sanitizer Author: None (ChiaHungDuan) ChangesReverts llvm/llvm-project#70390 There's a bug caught by Patch is 23.00 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/83078.diff 4 Files Affected:
diff --git a/compiler-rt/lib/scudo/standalone/allocator_common.h b/compiler-rt/lib/scudo/standalone/allocator_common.h
index 46dc7c0f3b914e..95f4776ac596dc 100644
--- a/compiler-rt/lib/scudo/standalone/allocator_common.h
+++ b/compiler-rt/lib/scudo/standalone/allocator_common.h
@@ -40,7 +40,6 @@ template <class SizeClassAllocator> struct TransferBatch {
B->Count = static_cast<u16>(B->Count - N);
}
void clear() { Count = 0; }
- bool empty() { return Count == 0; }
void add(CompactPtrT P) {
DCHECK_LT(Count, MaxNumCached);
Batch[Count++] = P;
@@ -49,12 +48,6 @@ template <class SizeClassAllocator> struct TransferBatch {
memcpy(Array, Batch, sizeof(Batch[0]) * Count);
clear();
}
-
- void moveNToArray(CompactPtrT *Array, u16 N) {
- DCHECK_LE(N, Count);
- memcpy(Array, Batch + Count - N, sizeof(Batch[0]) * Count);
- Count -= N;
- }
u16 getCount() const { return Count; }
bool isEmpty() const { return Count == 0U; }
CompactPtrT get(u16 I) const {
diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h
index c86e75b8fd66a8..4d03b282d000de 100644
--- a/compiler-rt/lib/scudo/standalone/primary32.h
+++ b/compiler-rt/lib/scudo/standalone/primary32.h
@@ -191,21 +191,38 @@ template <typename Config> class SizeClassAllocator32 {
return BlockSize > PageSize;
}
+ // Note that the `MaxBlockCount` will be used when we support arbitrary blocks
+ // count. Now it's the same as the number of blocks stored in the
+ // `TransferBatch`.
u16 popBlocks(CacheT *C, uptr ClassId, CompactPtrT *ToArray,
- const u16 MaxBlockCount) {
+ UNUSED const u16 MaxBlockCount) {
+ TransferBatchT *B = popBatch(C, ClassId);
+ if (!B)
+ return 0;
+
+ const u16 Count = B->getCount();
+ DCHECK_GT(Count, 0U);
+ B->moveToArray(ToArray);
+
+ if (ClassId != SizeClassMap::BatchClassId)
+ C->deallocate(SizeClassMap::BatchClassId, B);
+
+ return Count;
+ }
+
+ TransferBatchT *popBatch(CacheT *C, uptr ClassId) {
DCHECK_LT(ClassId, NumClasses);
SizeClassInfo *Sci = getSizeClassInfo(ClassId);
ScopedLock L(Sci->Mutex);
-
- u16 PopCount = popBlocksImpl(C, ClassId, Sci, ToArray, MaxBlockCount);
- if (UNLIKELY(PopCount == 0)) {
+ TransferBatchT *B = popBatchImpl(C, ClassId, Sci);
+ if (UNLIKELY(!B)) {
if (UNLIKELY(!populateFreeList(C, ClassId, Sci)))
- return 0U;
- PopCount = popBlocksImpl(C, ClassId, Sci, ToArray, MaxBlockCount);
- DCHECK_NE(PopCount, 0U);
+ return nullptr;
+ B = popBatchImpl(C, ClassId, Sci);
+ // if `populateFreeList` succeeded, we are supposed to get free blocks.
+ DCHECK_NE(B, nullptr);
}
-
- return PopCount;
+ return B;
}
// Push the array of free blocks to the designated batch group.
@@ -493,7 +510,7 @@ template <typename Config> class SizeClassAllocator32 {
// by TransferBatch is also free for use. We don't need to recycle the
// TransferBatch. Note that the correctness is maintained by the invariant,
//
- // Each popBlocks() request returns the entire TransferBatch. Returning
+ // The unit of each popBatch() request is entire TransferBatch. Return
// part of the blocks in a TransferBatch is invalid.
//
// This ensures that TransferBatch won't leak the address itself while it's
@@ -617,7 +634,7 @@ template <typename Config> class SizeClassAllocator32 {
BG->Batches.push_front(TB);
BG->PushedBlocks = 0;
BG->BytesInBGAtLastCheckpoint = 0;
- BG->MaxCachedPerBatch = TransferBatchT::MaxNumCached;
+ BG->MaxCachedPerBatch = CacheT::getMaxCached(getSizeByClassId(ClassId));
return BG;
};
@@ -709,11 +726,14 @@ template <typename Config> class SizeClassAllocator32 {
InsertBlocks(Cur, Array + Size - Count, Count);
}
- u16 popBlocksImpl(CacheT *C, uptr ClassId, SizeClassInfo *Sci,
- CompactPtrT *ToArray, const u16 MaxBlockCount)
+ // Pop one TransferBatch from a BatchGroup. The BatchGroup with the smallest
+ // group id will be considered first.
+ //
+ // The region mutex needs to be held while calling this method.
+ TransferBatchT *popBatchImpl(CacheT *C, uptr ClassId, SizeClassInfo *Sci)
REQUIRES(Sci->Mutex) {
if (Sci->FreeListInfo.BlockList.empty())
- return 0U;
+ return nullptr;
SinglyLinkedList<TransferBatchT> &Batches =
Sci->FreeListInfo.BlockList.front()->Batches;
@@ -726,57 +746,33 @@ template <typename Config> class SizeClassAllocator32 {
// Block used by `BatchGroup` is from BatchClassId. Turn the block into
// `TransferBatch` with single block.
TransferBatchT *TB = reinterpret_cast<TransferBatchT *>(BG);
- ToArray[0] =
- compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(TB));
+ TB->clear();
+ TB->add(
+ compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(TB)));
Sci->FreeListInfo.PoppedBlocks += 1;
- return 1U;
+ return TB;
}
- // So far, instead of always filling the blocks to `MaxBlockCount`, we only
- // examine single `TransferBatch` to minimize the time spent on the primary
- // allocator. Besides, the sizes of `TransferBatch` and
- // `CacheT::getMaxCached()` may also impact the time spent on accessing the
- // primary allocator.
- // TODO(chiahungduan): Evaluate if we want to always prepare `MaxBlockCount`
- // blocks and/or adjust the size of `TransferBatch` according to
- // `CacheT::getMaxCached()`.
TransferBatchT *B = Batches.front();
+ Batches.pop_front();
DCHECK_NE(B, nullptr);
DCHECK_GT(B->getCount(), 0U);
- // BachClassId should always take all blocks in the TransferBatch. Read the
- // comment in `pushBatchClassBlocks()` for more details.
- const u16 PopCount = ClassId == SizeClassMap::BatchClassId
- ? B->getCount()
- : Min(MaxBlockCount, B->getCount());
- B->moveNToArray(ToArray, PopCount);
-
- // TODO(chiahungduan): The deallocation of unused BatchClassId blocks can be
- // done without holding `Mutex`.
- if (B->empty()) {
- Batches.pop_front();
- // `TransferBatch` of BatchClassId is self-contained, no need to
- // deallocate. Read the comment in `pushBatchClassBlocks()` for more
- // details.
+ if (Batches.empty()) {
+ BatchGroupT *BG = Sci->FreeListInfo.BlockList.front();
+ Sci->FreeListInfo.BlockList.pop_front();
+
+ // We don't keep BatchGroup with zero blocks to avoid empty-checking while
+ // allocating. Note that block used by constructing BatchGroup is recorded
+ // as free blocks in the last element of BatchGroup::Batches. Which means,
+ // once we pop the last TransferBatch, the block is implicitly
+ // deallocated.
if (ClassId != SizeClassMap::BatchClassId)
- C->deallocate(SizeClassMap::BatchClassId, B);
-
- if (Batches.empty()) {
- BatchGroupT *BG = Sci->FreeListInfo.BlockList.front();
- Sci->FreeListInfo.BlockList.pop_front();
-
- // We don't keep BatchGroup with zero blocks to avoid empty-checking
- // while allocating. Note that block used for constructing BatchGroup is
- // recorded as free blocks in the last element of BatchGroup::Batches.
- // Which means, once we pop the last TransferBatch, the block is
- // implicitly deallocated.
- if (ClassId != SizeClassMap::BatchClassId)
- C->deallocate(SizeClassMap::BatchClassId, BG);
- }
+ C->deallocate(SizeClassMap::BatchClassId, BG);
}
- Sci->FreeListInfo.PoppedBlocks += PopCount;
- return PopCount;
+ Sci->FreeListInfo.PoppedBlocks += B->getCount();
+ return B;
}
NOINLINE bool populateFreeList(CacheT *C, uptr ClassId, SizeClassInfo *Sci)
diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h
index 971a65ee15f0fc..9a642d23620e1e 100644
--- a/compiler-rt/lib/scudo/standalone/primary64.h
+++ b/compiler-rt/lib/scudo/standalone/primary64.h
@@ -12,7 +12,6 @@
#include "allocator_common.h"
#include "bytemap.h"
#include "common.h"
-#include "condition_variable.h"
#include "list.h"
#include "local_cache.h"
#include "mem_map.h"
@@ -23,6 +22,8 @@
#include "string_utils.h"
#include "thread_annotations.h"
+#include "condition_variable.h"
+
namespace scudo {
// SizeClassAllocator64 is an allocator tuned for 64-bit address space.
@@ -220,24 +221,41 @@ template <typename Config> class SizeClassAllocator64 {
DCHECK_EQ(BlocksInUse, BatchClassUsedInFreeLists);
}
+ // Note that the `MaxBlockCount` will be used when we support arbitrary blocks
+ // count. Now it's the same as the number of blocks stored in the
+ // `TransferBatch`.
u16 popBlocks(CacheT *C, uptr ClassId, CompactPtrT *ToArray,
- const u16 MaxBlockCount) {
+ UNUSED const u16 MaxBlockCount) {
+ TransferBatchT *B = popBatch(C, ClassId);
+ if (!B)
+ return 0;
+
+ const u16 Count = B->getCount();
+ DCHECK_GT(Count, 0U);
+ B->moveToArray(ToArray);
+
+ if (ClassId != SizeClassMap::BatchClassId)
+ C->deallocate(SizeClassMap::BatchClassId, B);
+
+ return Count;
+ }
+
+ TransferBatchT *popBatch(CacheT *C, uptr ClassId) {
DCHECK_LT(ClassId, NumClasses);
RegionInfo *Region = getRegionInfo(ClassId);
- u16 PopCount = 0;
{
ScopedLock L(Region->FLLock);
- PopCount = popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount);
- if (PopCount != 0U)
- return PopCount;
+ TransferBatchT *B = popBatchImpl(C, ClassId, Region);
+ if (LIKELY(B))
+ return B;
}
bool ReportRegionExhausted = false;
+ TransferBatchT *B = nullptr;
if (conditionVariableEnabled()) {
- PopCount = popBlocksWithCV(C, ClassId, Region, ToArray, MaxBlockCount,
- ReportRegionExhausted);
+ B = popBatchWithCV(C, ClassId, Region, ReportRegionExhausted);
} else {
while (true) {
// When two threads compete for `Region->MMLock`, we only want one of
@@ -246,15 +264,13 @@ template <typename Config> class SizeClassAllocator64 {
ScopedLock ML(Region->MMLock);
{
ScopedLock FL(Region->FLLock);
- PopCount = popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount);
- if (PopCount != 0U)
- return PopCount;
+ if ((B = popBatchImpl(C, ClassId, Region)))
+ break;
}
const bool RegionIsExhausted = Region->Exhausted;
if (!RegionIsExhausted)
- PopCount = populateFreeListAndPopBlocks(C, ClassId, Region, ToArray,
- MaxBlockCount);
+ B = populateFreeListAndPopBatch(C, ClassId, Region);
ReportRegionExhausted = !RegionIsExhausted && Region->Exhausted;
break;
}
@@ -270,7 +286,7 @@ template <typename Config> class SizeClassAllocator64 {
reportOutOfBatchClass();
}
- return PopCount;
+ return B;
}
// Push the array of free blocks to the designated batch group.
@@ -624,7 +640,7 @@ template <typename Config> class SizeClassAllocator64 {
// by TransferBatch is also free for use. We don't need to recycle the
// TransferBatch. Note that the correctness is maintained by the invariant,
//
- // Each popBlocks() request returns the entire TransferBatch. Returning
+ // The unit of each popBatch() request is entire TransferBatch. Return
// part of the blocks in a TransferBatch is invalid.
//
// This ensures that TransferBatch won't leak the address itself while it's
@@ -747,7 +763,7 @@ template <typename Config> class SizeClassAllocator64 {
BG->Batches.push_front(TB);
BG->PushedBlocks = 0;
BG->BytesInBGAtLastCheckpoint = 0;
- BG->MaxCachedPerBatch = TransferBatchT::MaxNumCached;
+ BG->MaxCachedPerBatch = CacheT::getMaxCached(getSizeByClassId(ClassId));
return BG;
};
@@ -839,10 +855,9 @@ template <typename Config> class SizeClassAllocator64 {
InsertBlocks(Cur, Array + Size - Count, Count);
}
- u16 popBlocksWithCV(CacheT *C, uptr ClassId, RegionInfo *Region,
- CompactPtrT *ToArray, const u16 MaxBlockCount,
- bool &ReportRegionExhausted) {
- u16 PopCount = 0;
+ TransferBatchT *popBatchWithCV(CacheT *C, uptr ClassId, RegionInfo *Region,
+ bool &ReportRegionExhausted) {
+ TransferBatchT *B = nullptr;
while (true) {
// We only expect one thread doing the freelist refillment and other
@@ -863,8 +878,7 @@ template <typename Config> class SizeClassAllocator64 {
const bool RegionIsExhausted = Region->Exhausted;
if (!RegionIsExhausted)
- PopCount = populateFreeListAndPopBlocks(C, ClassId, Region, ToArray,
- MaxBlockCount);
+ B = populateFreeListAndPopBatch(C, ClassId, Region);
ReportRegionExhausted = !RegionIsExhausted && Region->Exhausted;
{
@@ -891,8 +905,7 @@ template <typename Config> class SizeClassAllocator64 {
// blocks were used up right after the refillment. Therefore, we have to
// check if someone is still populating the freelist.
ScopedLock FL(Region->FLLock);
- PopCount = popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount);
- if (PopCount != 0U)
+ if (LIKELY(B = popBatchImpl(C, ClassId, Region)))
break;
if (!Region->isPopulatingFreeList)
@@ -905,19 +918,21 @@ template <typename Config> class SizeClassAllocator64 {
// `pushBatchClassBlocks()` and `mergeGroupsToReleaseBack()`.
Region->FLLockCV.wait(Region->FLLock);
- PopCount = popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount);
- if (PopCount != 0U)
+ if (LIKELY(B = popBatchImpl(C, ClassId, Region)))
break;
}
- return PopCount;
+ return B;
}
- u16 popBlocksImpl(CacheT *C, uptr ClassId, RegionInfo *Region,
- CompactPtrT *ToArray, const u16 MaxBlockCount)
+ // Pop one TransferBatch from a BatchGroup. The BatchGroup with the smallest
+ // group id will be considered first.
+ //
+ // The region mutex needs to be held while calling this method.
+ TransferBatchT *popBatchImpl(CacheT *C, uptr ClassId, RegionInfo *Region)
REQUIRES(Region->FLLock) {
if (Region->FreeListInfo.BlockList.empty())
- return 0U;
+ return nullptr;
SinglyLinkedList<TransferBatchT> &Batches =
Region->FreeListInfo.BlockList.front()->Batches;
@@ -930,64 +945,39 @@ template <typename Config> class SizeClassAllocator64 {
// Block used by `BatchGroup` is from BatchClassId. Turn the block into
// `TransferBatch` with single block.
TransferBatchT *TB = reinterpret_cast<TransferBatchT *>(BG);
- ToArray[0] =
- compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(TB));
+ TB->clear();
+ TB->add(
+ compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(TB)));
Region->FreeListInfo.PoppedBlocks += 1;
- return 1U;
+ return TB;
}
- // So far, instead of always fill blocks to `MaxBlockCount`, we only exmaine
- // single `TransferBatch` to minimize the time spent in the primary
- // allocator. Besides, the sizes of `TransferBatch` and
- // `CacheT::getMaxCached()` may also impact the time spent on accessing the
- // primary allocator.
- // TODO(chiahungduan): Evaluate if we want to always prepare `MaxBlockCount`
- // blocks and/or adjust the size of `TransferBatch` according to
- // `CacheT::getMaxCached()`.
TransferBatchT *B = Batches.front();
+ Batches.pop_front();
DCHECK_NE(B, nullptr);
DCHECK_GT(B->getCount(), 0U);
- // BachClassId should always take all blocks in the TransferBatch. Read the
- // comment in `pushBatchClassBlocks()` for more details.
- const u16 PopCount = ClassId == SizeClassMap::BatchClassId
- ? B->getCount()
- : Min(MaxBlockCount, B->getCount());
- B->moveNToArray(ToArray, PopCount);
-
- // TODO(chiahungduan): The deallocation of unused BatchClassId blocks can be
- // done without holding `FLLock`.
- if (B->empty()) {
- Batches.pop_front();
- // `TransferBatch` of BatchClassId is self-contained, no need to
- // deallocate. Read the comment in `pushBatchClassBlocks()` for more
- // details.
- if (ClassId != SizeClassMap::BatchClassId)
- C->deallocate(SizeClassMap::BatchClassId, B);
-
- if (Batches.empty()) {
- BatchGroupT *BG = Region->FreeListInfo.BlockList.front();
- Region->FreeListInfo.BlockList.pop_front();
+ if (Batches.empty()) {
+ BatchGroupT *BG = Region->FreeListInfo.BlockList.front();
+ Region->FreeListInfo.BlockList.pop_front();
- // We don't keep BatchGroup with zero blocks to avoid empty-checking
- // while allocating. Note that block used for constructing BatchGroup is
- // recorded as free blocks in the last element of BatchGroup::Batches.
- // Which means, once we pop the last TransferBatch, the block is
- // implicitly deallocated.
- if (ClassId != SizeClassMap::BatchClassId)
- C->deallocate(SizeClassMap::BatchClassId, BG);
- }
+ // We don't keep BatchGroup with zero blocks to avoid empty-checking while
+ // allocating. Note that block used by constructing BatchGroup is recorded
+ // as free blocks in the last element of BatchGroup::Batches. Which means,
+ // once we pop the last TransferBatch, the block is implicitly
+ // deallocated.
+ if (ClassId != SizeClassMap::BatchClassId)
+ C->deallocate(SizeClassMap::BatchClassId, BG);
}
- Region->FreeListInfo.PoppedBlocks += PopCount;
+ Region->FreeListInfo.PoppedBlocks += B->getCount();
- return PopCount;
+ return B;
}
- NOINLINE u16 populateFreeListAndPopBlocks(CacheT *C, uptr ClassId,
- RegionInfo *Region,
- CompactPtrT *ToArray,
- const u16 MaxBlockCount)
+ // Refill the freelist and return one batch.
+ NOINLINE TransferBatchT *populateFreeListAndPopBatch(CacheT *C, uptr ClassId,
+ RegionInfo *Region)
REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) {
const uptr Size = getSizeByClassId(ClassId);
const u16 MaxCount = CacheT::getMaxCached(Size);
@@ -1004,7 +994,7 @@ template <typename Config> class SizeClassAllocator64 {
const uptr RegionBase = RegionBeg - getRegionBaseByClassId(ClassId);
if (UNLIKELY(RegionBase + MappedUser + MapSize > RegionSize)) {
Region->Exhausted = true;
- return 0U;
+ return nullptr;
}
if (UNLIKELY(!Region->MemMapInfo.MemMap.remap(
@@ -1012,7 +1002,7 @@ template <typename Config> class SizeClassAllocator64 {
MAP_ALLOWNOMEM | MAP_RESIZABLE |
(useMemoryTagging<Config>(Options.load()) ? MAP_MEMTAG
: 0)))) {
- return 0U;
+ return nullptr;
}
Region->MemMapInfo.MappedUser += MapSize;
C->getStats().add(StatMapped, MapSize);
@@ -1059,9 +1049,8 @@ template <typename Config> class SizeClassAllocator64 {
pushBatchClassBlocks(Region, ShuffleArray, NumberOfBlocks);
}
- const u16 PopCount =
- popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount);
- DCHECK_NE(PopCount, 0U);
+ TransferBatchT *B = popBatchImpl(C, ClassId, Region);
+ DCHECK_NE(B, nullptr);
// Note that `PushedBlocks` and `PoppedBlocks` are supposed to only record
// the requests from `PushBlocks` and `PopBatch` which are external
@@ -1073,7 +1062,7 @@ template <typename Config> class SizeClassAllocator64 {
C->getStats().add(StatFree, AllocatedUser);
Region->MemMapInfo.AllocatedUser += AllocatedUser;
- return PopCount;
+ return B;
}
void getStats(ScopedString *Str, uptr ClassId, RegionInfo *Re...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This reverts commit 056d62b.
…57e604e76 Local branch amd-gfx f9257e6 Merged main:15d9d0fa8f55936625882a28759f0ec0033cb6de into amd-gfx:df9c746d9591 Remote branch main 1a7776a Reland "[scudo] Store more blocks in each TransferBatch" (llvm#83078) (llvm#83081)
…llvm#83081) This reverts commit 056d62b. Fixed the number of bytes copied in moveNToArray()
Reverts #70390
There's a bug caught by
ScudoCombinedTestReallocateInPlaceStress_DefaultConfig.ReallocateInPlaceStress
with gwp asan. It's an easy fix but given that this is a major change, I would like to revert it first