Skip to content

[CAS] Add OnDiskCAS #114103

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: users/cachemeifyoucan/spr/main.cas-add-ondiskcas
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions llvm/include/llvm/CAS/ActionCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,12 @@ class ActionCache {
/// Create an action cache in memory.
std::unique_ptr<ActionCache> createInMemoryActionCache();

/// Get a reasonable default on-disk path for a persistent ActionCache for the
/// current user.
std::string getDefaultOnDiskActionCachePath();

/// Create an action cache on disk.
Expected<std::unique_ptr<ActionCache>> createOnDiskActionCache(StringRef Path);
} // end namespace llvm::cas

#endif // LLVM_CAS_CASACTIONCACHE_H
14 changes: 14 additions & 0 deletions llvm/include/llvm/CAS/ObjectStore.h
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,20 @@ class ObjectProxy {

std::unique_ptr<ObjectStore> createInMemoryCAS();

/// \returns true if \c LLVM_ENABLE_ONDISK_CAS configuration was enabled.
bool isOnDiskCASEnabled();

/// Gets or creates a persistent on-disk path at \p Path.
Expected<std::unique_ptr<ObjectStore>> createOnDiskCAS(const Twine &Path);

/// Set \p Path to a reasonable default on-disk path for a persistent CAS for
/// the current user.
Error getDefaultOnDiskCASPath(SmallVectorImpl<char> &Path);

/// Get a reasonable default on-disk path for a persistent CAS for the current
/// user. \returns empty string if no reasonable path can be found.
std::string getDefaultOnDiskCASPath();

} // namespace cas
} // namespace llvm

Expand Down
83 changes: 83 additions & 0 deletions llvm/lib/CAS/ActionCaches.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
#include "BuiltinCAS.h"
#include "llvm/ADT/TrieRawHashMap.h"
#include "llvm/CAS/ActionCache.h"
#include "llvm/CAS/OnDiskKeyValueDB.h"
#include "llvm/Support/BLAKE3.h"
#include "llvm/Support/Path.h"

#define DEBUG_TYPE "action-caches"

Expand Down Expand Up @@ -48,6 +50,25 @@ class InMemoryActionCache final : public ActionCache {

InMemoryCacheT Cache;
};

class OnDiskActionCache final : public ActionCache {
public:
Error putImpl(ArrayRef<uint8_t> ActionKey, const CASID &Result,
bool Globally) final;
Expected<std::optional<CASID>> getImpl(ArrayRef<uint8_t> ActionKey,
bool Globally) const final;

static Expected<std::unique_ptr<OnDiskActionCache>> create(StringRef Path);

private:
static StringRef getHashName() { return "BLAKE3"; }

OnDiskActionCache(std::unique_ptr<ondisk::OnDiskKeyValueDB> DB);

std::unique_ptr<ondisk::OnDiskKeyValueDB> DB;
using DataT = CacheEntry<sizeof(HashType)>;
};

} // end namespace

static std::string hashToString(ArrayRef<uint8_t> Hash) {
Expand Down Expand Up @@ -90,10 +111,72 @@ Error InMemoryActionCache::putImpl(ArrayRef<uint8_t> Key, const CASID &Result,
Observed.getValue());
}

static constexpr StringLiteral DefaultName = "actioncache";

namespace llvm::cas {

std::unique_ptr<ActionCache> createInMemoryActionCache() {
return std::make_unique<InMemoryActionCache>();
}

std::string getDefaultOnDiskActionCachePath() {
SmallString<128> Path;
if (!llvm::sys::path::cache_directory(Path))
report_fatal_error("cannot get default cache directory");
llvm::sys::path::append(Path, builtin::DefaultDir, DefaultName);
return Path.str().str();
}

} // namespace llvm::cas

OnDiskActionCache::OnDiskActionCache(
std::unique_ptr<ondisk::OnDiskKeyValueDB> DB)
: ActionCache(builtin::BuiltinCASContext::getDefaultContext()),
DB(std::move(DB)) {}

Expected<std::unique_ptr<OnDiskActionCache>>
OnDiskActionCache::create(StringRef AbsPath) {
std::unique_ptr<ondisk::OnDiskKeyValueDB> DB;
if (Error E = ondisk::OnDiskKeyValueDB::open(AbsPath, getHashName(),
sizeof(HashType), getHashName(),
sizeof(DataT))
.moveInto(DB))
return std::move(E);
return std::unique_ptr<OnDiskActionCache>(
new OnDiskActionCache(std::move(DB)));
}

Expected<std::optional<CASID>>
OnDiskActionCache::getImpl(ArrayRef<uint8_t> Key, bool /*Globally*/) const {
std::optional<ArrayRef<char>> Val;
if (Error E = DB->get(Key).moveInto(Val))
return std::move(E);
if (!Val)
return std::nullopt;
return CASID::create(&getContext(), toStringRef(*Val));
}

Error OnDiskActionCache::putImpl(ArrayRef<uint8_t> Key, const CASID &Result,
bool /*Globally*/) {
auto ResultHash = Result.getHash();
ArrayRef Expected((const char *)ResultHash.data(), ResultHash.size());
ArrayRef<char> Observed;
if (Error E = DB->put(Key, Expected).moveInto(Observed))
return E;

if (Expected == Observed)
return Error::success();

return createResultCachePoisonedError(
hashToString(Key), getContext(), Result,
ArrayRef((const uint8_t *)Observed.data(), Observed.size()));
}

Expected<std::unique_ptr<ActionCache>>
cas::createOnDiskActionCache(StringRef Path) {
#if LLVM_ENABLE_ONDISK_CAS
return OnDiskActionCache::create(Path);
#else
return createStringError(inconvertibleErrorCode(), "OnDiskCache is disabled");
#endif
}
2 changes: 2 additions & 0 deletions llvm/lib/CAS/BuiltinCAS.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ class BuiltinCAS : public ObjectStore {
Error validate(const CASID &ID) final;
};

constexpr StringLiteral DefaultDir = "llvm.cas.builtin.default";

} // end namespace builtin
} // end namespace llvm::cas

Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CAS/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ add_llvm_component_library(LLVMCAS
InMemoryCAS.cpp
MappedFileRegionBumpPtr.cpp
ObjectStore.cpp
OnDiskCAS.cpp
OnDiskCommon.cpp
OnDiskKeyValueDB.cpp
OnDiskHashMappedTrie.cpp
Expand Down
186 changes: 186 additions & 0 deletions llvm/lib/CAS/OnDiskCAS.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
//===- OnDiskCAS.cpp --------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "BuiltinCAS.h"
#include "llvm/CAS/OnDiskGraphDB.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/Path.h"

using namespace llvm;
using namespace llvm::cas;
using namespace llvm::cas::builtin;

namespace {

class OnDiskCAS : public BuiltinCAS {
public:
Expected<ObjectRef> storeImpl(ArrayRef<uint8_t> ComputedHash,
ArrayRef<ObjectRef> Refs,
ArrayRef<char> Data) final;

Expected<std::optional<ObjectHandle>> loadIfExists(ObjectRef Ref) final;

CASID getID(ObjectRef Ref) const final;

std::optional<ObjectRef> getReference(const CASID &ID) const final;

Expected<bool> isMaterialized(ObjectRef Ref) const final;

ArrayRef<char> getDataConst(ObjectHandle Node) const final;

void print(raw_ostream &OS) const final;

static Expected<std::unique_ptr<OnDiskCAS>> open(StringRef Path);

private:
ObjectHandle convertHandle(ondisk::ObjectHandle Node) const {
return makeObjectHandle(Node.getOpaqueData());
}

ondisk::ObjectHandle convertHandle(ObjectHandle Node) const {
return ondisk::ObjectHandle::fromOpaqueData(Node.getInternalRef(*this));
}

ObjectRef convertRef(ondisk::ObjectID Ref) const {
return makeObjectRef(Ref.getOpaqueData());
}

ondisk::ObjectID convertRef(ObjectRef Ref) const {
return ondisk::ObjectID::fromOpaqueData(Ref.getInternalRef(*this));
}

size_t getNumRefs(ObjectHandle Node) const final {
auto RefsRange = DB->getObjectRefs(convertHandle(Node));
return std::distance(RefsRange.begin(), RefsRange.end());
}
ObjectRef readRef(ObjectHandle Node, size_t I) const final {
auto RefsRange = DB->getObjectRefs(convertHandle(Node));
return convertRef(RefsRange.begin()[I]);
}
Error forEachRef(ObjectHandle Node,
function_ref<Error(ObjectRef)> Callback) const final;

OnDiskCAS(std::unique_ptr<ondisk::OnDiskGraphDB> GraphDB)
: OwnedDB(std::move(GraphDB)), DB(OwnedDB.get()) {}

std::unique_ptr<ondisk::OnDiskGraphDB> OwnedDB;
ondisk::OnDiskGraphDB *DB;
};

} // end anonymous namespace

void OnDiskCAS::print(raw_ostream &OS) const { DB->print(OS); }

CASID OnDiskCAS::getID(ObjectRef Ref) const {
ArrayRef<uint8_t> Hash = DB->getDigest(convertRef(Ref));
return CASID::create(&getContext(), toStringRef(Hash));
}

std::optional<ObjectRef> OnDiskCAS::getReference(const CASID &ID) const {
std::optional<ondisk::ObjectID> ObjID =
DB->getExistingReference(ID.getHash());
if (!ObjID)
return std::nullopt;
return convertRef(*ObjID);
}

Expected<bool> OnDiskCAS::isMaterialized(ObjectRef ExternalRef) const {
return DB->isMaterialized(convertRef(ExternalRef));
}

ArrayRef<char> OnDiskCAS::getDataConst(ObjectHandle Node) const {
return DB->getObjectData(convertHandle(Node));
}

Expected<std::optional<ObjectHandle>>
OnDiskCAS::loadIfExists(ObjectRef ExternalRef) {
Expected<std::optional<ondisk::ObjectHandle>> ObjHnd =
DB->load(convertRef(ExternalRef));
if (!ObjHnd)
return ObjHnd.takeError();
if (!*ObjHnd)
return std::nullopt;
return convertHandle(**ObjHnd);
}

Expected<ObjectRef> OnDiskCAS::storeImpl(ArrayRef<uint8_t> ComputedHash,
ArrayRef<ObjectRef> Refs,
ArrayRef<char> Data) {
SmallVector<ondisk::ObjectID, 64> IDs;
IDs.reserve(Refs.size());
for (ObjectRef Ref : Refs) {
IDs.push_back(convertRef(Ref));
}

ondisk::ObjectID StoredID = DB->getReference(ComputedHash);
if (Error E = DB->store(StoredID, IDs, Data))
return std::move(E);
return convertRef(StoredID);
}

Error OnDiskCAS::forEachRef(ObjectHandle Node,
function_ref<Error(ObjectRef)> Callback) const {
auto RefsRange = DB->getObjectRefs(convertHandle(Node));
for (ondisk::ObjectID Ref : RefsRange) {
if (Error E = Callback(convertRef(Ref)))
return E;
}
return Error::success();
}

Expected<std::unique_ptr<OnDiskCAS>> OnDiskCAS::open(StringRef AbsPath) {
Expected<std::unique_ptr<ondisk::OnDiskGraphDB>> DB =
ondisk::OnDiskGraphDB::open(AbsPath, BuiltinCASContext::getHashName(),
sizeof(HashType));
if (!DB)
return DB.takeError();
return std::unique_ptr<OnDiskCAS>(new OnDiskCAS(std::move(*DB)));
}

bool cas::isOnDiskCASEnabled() {
#if LLVM_ENABLE_ONDISK_CAS
return true;
#else
return false;
#endif
}

Expected<std::unique_ptr<ObjectStore>> cas::createOnDiskCAS(const Twine &Path) {
#if LLVM_ENABLE_ONDISK_CAS
// FIXME: An absolute path isn't really good enough. Should open a directory
// and use openat() for files underneath.
SmallString<256> AbsPath;
Path.toVector(AbsPath);
sys::fs::make_absolute(AbsPath);

return OnDiskCAS::open(AbsPath);
#else
return createStringError(inconvertibleErrorCode(), "OnDiskCAS is disabled");
#endif /* LLVM_ENABLE_ONDISK_CAS */
}

static constexpr StringLiteral DefaultName = "cas";

Error cas::getDefaultOnDiskCASPath(SmallVectorImpl<char> &Path) {
// FIXME: Should this return 'Error' instead of hard-failing?
if (!llvm::sys::path::cache_directory(Path))
return createStringError(inconvertibleErrorCode(),
"cannot get default cache directory");
llvm::sys::path::append(Path, DefaultDir, DefaultName);
return Error::success();
}

std::string cas::getDefaultOnDiskCASPath() {
SmallString<128> Path;
if (auto Err = getDefaultOnDiskCASPath(Path)) {
consumeError(std::move(Err));
return {};
}

return Path.str().str();
}
28 changes: 27 additions & 1 deletion llvm/unittests/CAS/CASTestConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include "CASTestConfig.h"
#include "llvm/CAS/ObjectStore.h"
#include "llvm/Testing/Support/Error.h"
#include "gtest/gtest.h"

using namespace llvm;
Expand All @@ -16,8 +17,33 @@ using namespace llvm::cas;
CASTestingEnv createInMemory(int I) {
std::unique_ptr<ObjectStore> CAS = createInMemoryCAS();
std::unique_ptr<ActionCache> Cache = createInMemoryActionCache();
return CASTestingEnv{std::move(CAS), std::move(Cache)};
return CASTestingEnv{std::move(CAS), std::move(Cache), std::nullopt};
}

INSTANTIATE_TEST_SUITE_P(InMemoryCAS, CASTest,
::testing::Values(createInMemory));

#if LLVM_ENABLE_ONDISK_CAS
namespace llvm::cas::ondisk {
void setMaxMappingSize(uint64_t Size);
} // namespace llvm::cas::ondisk

void setMaxOnDiskCASMappingSize() {
static std::once_flag Flag;
std::call_once(
Flag, [] { llvm::cas::ondisk::setMaxMappingSize(100 * 1024 * 1024); });
}

CASTestingEnv createOnDisk(int I) {
unittest::TempDir Temp("on-disk-cas", /*Unique=*/true);
std::unique_ptr<ObjectStore> CAS;
EXPECT_THAT_ERROR(createOnDiskCAS(Temp.path()).moveInto(CAS), Succeeded());
std::unique_ptr<ActionCache> Cache;
EXPECT_THAT_ERROR(createOnDiskActionCache(Temp.path()).moveInto(Cache),
Succeeded());
return CASTestingEnv{std::move(CAS), std::move(Cache), std::move(Temp)};
}
INSTANTIATE_TEST_SUITE_P(OnDiskCAS, CASTest, ::testing::Values(createOnDisk));
#else
void setMaxOnDiskCASMappingSize() {}
#endif /* LLVM_ENABLE_ONDISK_CAS */
Loading
Loading
You are viewing a condensed version of this merge commit. You can view the full changes here.