Skip to content

Commit a9171b4

Browse files
[CAS] Handle DataPool overflow in OnDiskGraphDB
Handle DataPool overflow in GraphDB: * When the data pool can't hold more items, allocate the new entries on disk standalone (degraded performance and more disk space consumption) * Teach UnifiedCAS to more aggressively chaining and reclaiming space if data pool is relatively full (currently hard code to 85%) but the CAS is not at the space limit for overall size. rdar://139242950
1 parent c66ea5f commit a9171b4

File tree

7 files changed

+93
-10
lines changed

7 files changed

+93
-10
lines changed

llvm/include/llvm/CAS/OnDiskGraphDB.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,11 @@ class OnDiskGraphDB {
302302
/// large object if the process crashed right at the point of inserting it.
303303
size_t getStorageSize() const;
304304

305+
/// \returns The precentage of space utilization of hard space limits.
306+
///
307+
/// Return value is an integer between 0 and 100 for percentage.
308+
unsigned getHardStorageLimitUtilization() const;
309+
305310
void print(raw_ostream &OS) const;
306311

307312
/// How to fault-in nodes if an upstream database is used.

llvm/include/llvm/CAS/OnDiskHashMappedTrie.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,7 @@ class OnDiskHashMappedTrie {
275275
}
276276

277277
size_t size() const;
278+
size_t capacity() const;
278279

279280
/// Gets or creates a file at \p Path with a hash-mapped trie named \p
280281
/// TrieName. The hash size is \p NumHashBits (in bits) and the records store
@@ -366,6 +367,7 @@ class OnDiskDataAllocator {
366367
MutableArrayRef<uint8_t> getUserHeader();
367368

368369
size_t size() const;
370+
size_t capacity() const;
369371

370372
static Expected<OnDiskDataAllocator>
371373
create(const Twine &Path, const Twine &TableName, uint64_t MaxFileSize,

llvm/include/llvm/CAS/OnDiskKeyValueDB.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,13 @@ class OnDiskKeyValueDB {
3939
return Cache.size();
4040
}
4141

42+
/// \returns The precentage of space utilization of hard space limits.
43+
///
44+
/// Return value is an integer between 0 and 100 for percentage.
45+
unsigned getHardStorageLimitUtilization() const {
46+
return Cache.size() * 100ULL / Cache.capacity();
47+
}
48+
4249
/// Open the on-disk store from a directory.
4350
///
4451
/// \param Path directory for the on-disk store. The directory will be created

llvm/lib/CAS/OnDiskGraphDB.cpp

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1259,12 +1259,33 @@ Error OnDiskGraphDB::store(ObjectID ID, ArrayRef<ObjectID> Refs,
12591259
SmallString<256> Path;
12601260
std::optional<MappedTempFile> File;
12611261
std::optional<uint64_t> FileSize;
1262+
auto AllocStandaloneFile = [&](size_t Size) -> Expected<char *> {
1263+
getStandalonePath(TrieRecord::getStandaloneFileSuffix(
1264+
TrieRecord::StorageKind::Standalone),
1265+
I, Path);
1266+
if (Error E = createTempFile(Path, Size).moveInto(File))
1267+
return std::move(E);
1268+
assert(File->size() == Size);
1269+
FileSize = Size;
1270+
SK = TrieRecord::StorageKind::Standalone;
1271+
return File->data();
1272+
};
12621273
auto Alloc = [&](size_t Size) -> Expected<char *> {
12631274
if (Size <= TrieRecord::MaxEmbeddedSize) {
12641275
SK = TrieRecord::StorageKind::DataPool;
12651276
auto P = DataPool.allocate(Size);
1266-
if (LLVM_UNLIKELY(!P))
1267-
return P.takeError();
1277+
if (LLVM_UNLIKELY(!P)) {
1278+
char *NewAlloc = nullptr;
1279+
auto NewE = handleErrors(
1280+
P.takeError(), [&](std::unique_ptr<StringError> E) -> Error {
1281+
if (E->convertToErrorCode() == std::errc::not_enough_memory)
1282+
return AllocStandaloneFile(Size).moveInto(NewAlloc);
1283+
return Error(std::move(E));
1284+
});
1285+
if (!NewE)
1286+
return NewAlloc;
1287+
return std::move(NewE);
1288+
}
12681289
PoolOffset = P->getOffset();
12691290
LLVM_DEBUG({
12701291
dbgs() << "pool-alloc addr=" << (void *)PoolOffset.get()
@@ -1273,15 +1294,9 @@ Error OnDiskGraphDB::store(ObjectID ID, ArrayRef<ObjectID> Refs,
12731294
});
12741295
return (*P)->data();
12751296
}
1276-
1277-
SK = TrieRecord::StorageKind::Standalone;
1278-
getStandalonePath(TrieRecord::getStandaloneFileSuffix(SK), I, Path);
1279-
if (Error E = createTempFile(Path, Size).moveInto(File))
1280-
return std::move(E);
1281-
assert(File->size() == Size);
1282-
FileSize = Size;
1283-
return File->data();
1297+
return AllocStandaloneFile(Size);
12841298
};
1299+
12851300
DataRecordHandle Record;
12861301
if (Error E =
12871302
DataRecordHandle::createWithError(Alloc, Input).moveInto(Record))
@@ -1350,6 +1365,12 @@ size_t OnDiskGraphDB::getStorageSize() const {
13501365
return Index.size() + DataPool.size() + getStandaloneStorageSize();
13511366
}
13521367

1368+
unsigned OnDiskGraphDB::getHardStorageLimitUtilization() const {
1369+
unsigned IndexPercent = Index.size() * 100ULL / Index.capacity();
1370+
unsigned DataPercent = DataPool.size() * 100ULL / DataPool.capacity();
1371+
return std::max(IndexPercent, DataPercent);
1372+
}
1373+
13531374
static bool useSmallMappedFiles(const Twine &P) {
13541375
// macOS tmpfs does not support sparse tails.
13551376
#if defined(__APPLE__) && __has_include(<sys/mount.h>)

llvm/lib/CAS/OnDiskHashMappedTrie.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1063,6 +1063,9 @@ static Error checkTable(StringRef Label, size_t Expected, size_t Observed,
10631063
}
10641064

10651065
size_t OnDiskHashMappedTrie::size() const { return Impl->File.size(); }
1066+
size_t OnDiskHashMappedTrie::capacity() const {
1067+
return Impl->File.getRegion().size();
1068+
}
10661069

10671070
Expected<OnDiskHashMappedTrie>
10681071
OnDiskHashMappedTrie::create(const Twine &PathTwine, const Twine &TrieNameTwine,
@@ -1321,6 +1324,9 @@ MutableArrayRef<uint8_t> OnDiskDataAllocator::getUserHeader() {
13211324
}
13221325

13231326
size_t OnDiskDataAllocator::size() const { return Impl->File.size(); }
1327+
size_t OnDiskDataAllocator::capacity() const {
1328+
return Impl->File.getRegion().size();
1329+
}
13241330

13251331
OnDiskDataAllocator::OnDiskDataAllocator(std::unique_ptr<ImplType> Impl)
13261332
: Impl(std::move(Impl)) {}
@@ -1365,6 +1371,14 @@ size_t OnDiskHashMappedTrie::size() const {
13651371
report_fatal_error("not supported");
13661372
}
13671373

1374+
size_t OnDiskHashMappedTrie::capacity() const {
1375+
report_fatal_error("not supported");
1376+
}
1377+
1378+
size_t OnDiskDataAllocator::capacity() const {
1379+
report_fatal_error("not supported");
1380+
}
1381+
13681382
struct OnDiskDataAllocator::ImplType {};
13691383

13701384
Expected<OnDiskDataAllocator> OnDiskDataAllocator::create(

llvm/lib/CAS/UnifiedOnDiskCache.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,14 @@ bool UnifiedOnDiskCache::hasExceededSizeLimit() const {
271271
uint64_t CurSizeLimit = SizeLimit;
272272
if (!CurSizeLimit)
273273
return false;
274+
275+
// If the hard limit is beyond 85%, declare above limit and request clean up.
276+
unsigned CurrentPrecent =
277+
std::max(PrimaryGraphDB->getHardStorageLimitUtilization(),
278+
PrimaryKVDB->getHardStorageLimitUtilization());
279+
if (CurrentPrecent > 85)
280+
return true;
281+
274282
// We allow each of the directories in the chain to reach up to half the
275283
// intended size limit. Check whether the primary directory has exceeded half
276284
// the limit or not, in order to decide whether we need to start a new chain.

llvm/unittests/CAS/OnDiskGraphDBTest.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9+
#include "CASTestConfig.h"
910
#include "OnDiskCommonUtils.h"
1011
#include "llvm/Testing/Support/Error.h"
1112
#include "llvm/Testing/Support/SupportHelpers.h"
@@ -284,4 +285,29 @@ TEST(OnDiskGraphDBTest, FaultInPolicyConflict) {
284285
OnDiskGraphDB::FaultInPolicy::SingleNode);
285286
}
286287

288+
#if defined(EXPENSIVE_CHECKS)
289+
TEST(OnDiskGraphDBTest, SpaceLimit) {
290+
setMaxOnDiskCASMappingSize();
291+
unittest::TempDir Temp("ondiskcas", /*Unique=*/true);
292+
std::unique_ptr<OnDiskGraphDB> DB;
293+
ASSERT_THAT_ERROR(
294+
OnDiskGraphDB::open(Temp.path(), "blake3", sizeof(HashType)).moveInto(DB),
295+
Succeeded());
296+
297+
std::optional<ObjectID> ID;
298+
std::string Data(500, '0');
299+
auto storeSmallObject = [&]() {
300+
SmallVector<ObjectID, 1> Refs;
301+
if (ID)
302+
Refs.push_back(*ID);
303+
ASSERT_THAT_ERROR(store(*DB, Data, Refs).moveInto(ID), Succeeded());
304+
};
305+
306+
// Insert enough small elements to overflow the data pool.
307+
for (unsigned I = 0; I < 1024 * 256; ++I)
308+
storeSmallObject();
309+
310+
EXPECT_GE(DB->getHardStorageLimitUtilization(), 99U);
311+
}
312+
#endif
287313
#endif // LLVM_ENABLE_ONDISK_CAS

0 commit comments

Comments
 (0)