Skip to content

[llvm][cas] Add validate-if-needed to recover from invalid data #10581

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions clang/include/clang/CAS/CASOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ class CASOptions : public CASConfiguration {
/// default on-disk CAS, otherwise this is a noop.
void ensurePersistentCAS();

void getResolvedCASPath(llvm::SmallVectorImpl<char> &Result) const;

private:
/// Initialize Cached CAS and ActionCache.
llvm::Error initCache() const;
Expand Down
9 changes: 9 additions & 0 deletions clang/lib/CAS/CASOptions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ llvm::Error CASOptions::initCache() const {
}

SmallString<256> PathBuf;
getResolvedCASPath(PathBuf);
if (CASPath == "auto") {
getDefaultOnDiskCASPath(PathBuf);
CASPath = PathBuf;
Expand All @@ -119,3 +120,11 @@ llvm::Error CASOptions::initCache() const {
std::tie(Cache.CAS, Cache.AC) = std::move(DBs);
return llvm::Error::success();
}

void CASOptions::getResolvedCASPath(SmallVectorImpl<char> &Result) const {
if (CASPath == "auto") {
getDefaultOnDiskCASPath(Result);
} else {
Result.assign(CASPath.begin(), CASPath.end());
}
}
2 changes: 1 addition & 1 deletion clang/test/CAS/depscan-cas-log.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
// enable logging there are currently zero records in the log.

// RUN: rm -rf %t && mkdir %t
// RUN: env LLVM_CACHE_CAS_PATH=%t/cas LLVM_CAS_LOG=1 %clang \
// RUN: env LLVM_CACHE_CAS_PATH=%t/cas LLVM_CAS_LOG=1 LLVM_CAS_DISABLE_VALIDATION=1 %clang \
// RUN: -cc1depscan -fdepscan=daemon -fdepscan-include-tree -o - \
// RUN: -cc1-args -cc1 -triple x86_64-apple-macosx11.0.0 -emit-obj %s -o %t/t.o -fcas-path %t/cas
// RUN: FileCheck %s --input-file %t/cas/v1.log
Expand Down
18 changes: 18 additions & 0 deletions clang/test/CAS/validate-once.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// RUN: rm -rf %t

// RUN: llvm-cas --cas %t/cas --ingest %s
// RUN: mv %t/cas/v1.1/v8.data %t/cas/v1.1/v8.data.bak

// RUN: %clang -cc1depscand -execute %{clang-daemon-dir}/%basename_t -cas-args -fcas-path %t/cas -- \
// RUN: %clang -target x86_64-apple-macos11 -I %S/Inputs \
// RUN: -Xclang -fcas-path -Xclang %t/cas \
// RUN: -fdepscan=daemon -fdepscan-daemon=%{clang-daemon-dir}/%basename_t -fsyntax-only -x c %s

// RUN: ls %t/cas/corrupt.0.v1.1

// RUN: llvm-cas --cas %t/cas --validate-if-needed | FileCheck %s -check-prefix=SKIPPED
// SKIPPED: validation skipped

#include "test.h"

int func(void);
3 changes: 2 additions & 1 deletion clang/tools/driver/cc1depscanProtocol.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,8 @@ Expected<ScanDaemon> ScanDaemon::launchDaemon(StringRef BasePath,
#endif

static constexpr const char *PassThroughEnv[] = {
"LLVM_CAS_LOG",
"LLVM_CAS_LOG",
"LLVM_CAS_DISABLE_VALIDATION",
};
SmallVector<const char *> EnvP;
for (const char *Name : PassThroughEnv)
Expand Down
82 changes: 63 additions & 19 deletions clang/tools/driver/cc1depscan_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,11 @@
#include "clang/Tooling/DependencyScanning/ScanAndUpdateArgs.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Bitstream/BitstreamReader.h"
#include "llvm/CAS/ActionCache.h"
#include "llvm/CAS/BuiltinUnifiedCASDatabases.h"
#include "llvm/CAS/CASProvidingFileSystem.h"
#include "llvm/CAS/CachingOnDiskFileSystem.h"
#include "llvm/CAS/HierarchicalTreeBuilder.h"
Expand All @@ -39,6 +41,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/PrefixMapper.h"
Expand All @@ -50,6 +53,7 @@
#include "llvm/Support/raw_ostream.h"
#include <cstdio>
#include <mutex>
#include <optional>
#include <shared_mutex>

#if LLVM_ON_UNIX
Expand Down Expand Up @@ -630,8 +634,8 @@ namespace {
struct ScanServer {
const char *Argv0 = nullptr;
SmallString<128> BasePath;
/// List of cas options.
ArrayRef<const char *> CASArgs;
CASOptions CASOpts;
bool ProduceIncludeTree = true;
int PidFD = -1;
int ListenSocket = -1;
/// \p std::nullopt means it runs indefinitely.
Expand All @@ -640,7 +644,7 @@ struct ScanServer {

~ScanServer() { shutdown(); }

void start(bool Exclusive);
void start(bool Exclusive, ArrayRef<const char *> CASArgs);
int listen();

/// Tear down the socket and bind file immediately but wait till all existing
Expand Down Expand Up @@ -705,13 +709,13 @@ int cc1depscand_main(ArrayRef<const char *> Argv, const char *Argv0,
// particular "build session", to shutdown, then have it stay alive until the
// session is finished.
bool LongRunning = false;

ArrayRef<const char *> CASArgs;
for (const auto *A = Argv.begin() + 2; A != Argv.end(); ++A) {
StringRef Arg(*A);
if (Arg == "-long-running")
LongRunning = true;
else if (Arg == "-cas-args") {
Server.CASArgs = ArrayRef(A + 1, Argv.end());
CASArgs = ArrayRef(A + 1, Argv.end());
break;
}
}
Expand All @@ -722,7 +726,7 @@ int cc1depscand_main(ArrayRef<const char *> Argv, const char *Argv0,
reportError(Twine("cannot create basedir: ") + EC.message());

if (Command == "-serve") {
Server.start(/*Exclusive*/ true);
Server.start(/*Exclusive*/ true, CASArgs);
return Server.listen();

} else if (Command == "-execute") {
Expand All @@ -733,7 +737,7 @@ int cc1depscand_main(ArrayRef<const char *> Argv, const char *Argv0,
}

// Make sure to start the server before executing the command.
Server.start(/*Exclusive*/ true);
Server.start(/*Exclusive*/ true, CASArgs);
std::thread ServerThread([&Server]() { Server.listen(); });

setenv("CLANG_CACHE_SCAN_DAEMON_SOCKET_PATH", Server.BasePath.c_str(),
Expand Down Expand Up @@ -784,11 +788,61 @@ int cc1depscand_main(ArrayRef<const char *> Argv, const char *Argv0,
openAndReplaceFD(1, LogOutPath);
openAndReplaceFD(2, LogErrPath);

Server.start(/*Exclusive*/ false);
Server.start(/*Exclusive*/ false, CASArgs);
return Server.listen();
}

void ScanServer::start(bool Exclusive) {
static std::optional<StringRef>
findLLVMCasBinary(const char *Argv0, llvm::SmallVectorImpl<char> &Storage) {
using namespace llvm::sys;
std::string Path = fs::getMainExecutable(Argv0, (void *)cc1depscan_main);
Storage.assign(Path.begin(), Path.end());
path::remove_filename(Storage);
path::append(Storage, "llvm-cas");
StringRef PathStr(Storage.data(), Storage.size());
if (fs::exists(PathStr))
return PathStr;
// Look for a corresponding usr/local/bin/llvm-cas
PathStr = path::parent_path(PathStr);
if (path::filename(PathStr) != "bin")
return std::nullopt;
PathStr = path::parent_path(PathStr);
Storage.truncate(PathStr.size());
path::append(Storage, "local", "bin", "llvm-cas");
PathStr = StringRef{Storage.data(), Storage.size()};
if (fs::exists(PathStr))
return PathStr;
return std::nullopt;
}

void ScanServer::start(bool Exclusive, ArrayRef<const char *> CASArgs) {
// Parse CAS options and validate if needed.
DiagnosticsEngine Diags(new DiagnosticIDs(), new DiagnosticOptions());

const OptTable &Opts = clang::driver::getDriverOptTable();
unsigned MissingArgIndex, MissingArgCount;
auto ParsedCASArgs =
Opts.ParseArgs(CASArgs, MissingArgIndex, MissingArgCount);
CompilerInvocation::ParseCASArgs(CASOpts, ParsedCASArgs, Diags);
CASOpts.ensurePersistentCAS();
ProduceIncludeTree =
ParsedCASArgs.hasArg(driver::options::OPT_fdepscan_include_tree);

static std::once_flag ValidateOnce;
std::call_once(ValidateOnce, [&] {
if (getenv("LLVM_CAS_DISABLE_VALIDATION"))
return;
if (CASOpts.CASPath.empty() || !CASOpts.PluginPath.empty())
return;
SmallString<64> LLVMCasStorage;
SmallString<64> CASPath;
CASOpts.getResolvedCASPath(CASPath);
ExitOnErr(llvm::cas::validateOnDiskUnifiedCASDatabasesIfNeeded(
CASPath, /*CheckHash=*/true,
/*AllowRecovery=*/true,
/*Force=*/false, findLLVMCasBinary(Argv0, LLVMCasStorage)));
});

// Check the pidfile.
SmallString<128> PidPath;
(BasePath + ".pid").toVector(PidPath);
Expand Down Expand Up @@ -827,16 +881,6 @@ int ScanServer::listen() {
llvm::DefaultThreadPool Pool;

DiagnosticsEngine Diags(new DiagnosticIDs(), new DiagnosticOptions());
CASOptions CASOpts;
const OptTable &Opts = clang::driver::getDriverOptTable();
unsigned MissingArgIndex, MissingArgCount;
auto ParsedCASArgs =
Opts.ParseArgs(CASArgs, MissingArgIndex, MissingArgCount);
CompilerInvocation::ParseCASArgs(CASOpts, ParsedCASArgs, Diags);
CASOpts.ensurePersistentCAS();
bool ProduceIncludeTree =
ParsedCASArgs.hasArg(driver::options::OPT_fdepscan_include_tree);

std::shared_ptr<llvm::cas::ObjectStore> CAS;
std::shared_ptr<llvm::cas::ActionCache> Cache;
std::tie(CAS, Cache) = CASOpts.getOrCreateDatabases(Diags);
Expand Down
33 changes: 33 additions & 0 deletions llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,39 @@ class ObjectStore;
Expected<std::pair<std::unique_ptr<ObjectStore>, std::unique_ptr<ActionCache>>>
createOnDiskUnifiedCASDatabases(StringRef Path);

/// Represents the result of validating the contents using
/// \c validateOnDiskUnifiedCASDatabasesIfNeeded.
///
/// Note: invalid results are handled as an \c Error.
enum class ValidationResult {
/// The data is already valid.
Valid,
/// The data was invalid, but was recovered.
Recovered,
/// Validation was skipped, as it was not needed.
Skipped,
};

/// Validate the data in \p Path, if needed to ensure correctness.
///
/// \param Path directory for the on-disk database.
/// \param CheckHash Whether to validate hashes match the data.
/// \param AllowRecovery Whether to automatically recover from invalid data by
/// marking the files for garbage collection.
/// \param ForceValidation Whether to force validation to occur even if it
/// should not be necessary.
/// \param LLVMCasBinary If provided, validation is performed out-of-process
/// using the given \c llvm-cas executable which protects against crashes
/// during validation. Otherwise validation is performed in-process.
///
/// \returns \c Valid if the data is already valid, \c Recovered if data
/// was invalid but has been cleared, \c Skipped if validation is not needed,
/// or an \c Error if validation cannot be performed or if the data is left
/// in an invalid state because \p AllowRecovery is false.
Expected<ValidationResult> validateOnDiskUnifiedCASDatabasesIfNeeded(
StringRef Path, bool CheckHash, bool AllowRecovery, bool ForceValidation,
std::optional<StringRef> LLVMCasBinary);

} // namespace llvm::cas

#endif // LLVM_CAS_BUILTINUNIFIEDCASDATABASES_H
5 changes: 5 additions & 0 deletions llvm/include/llvm/CAS/OnDiskCASLogger.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@ class OnDiskCASLogger {
void log_MappedFileRegionBumpPtr_allocate(void *Region, TrieOffset Off,
size_t Size);
void log_UnifiedOnDiskCache_collectGarbage(StringRef Path);
void log_UnifiedOnDiskCache_validateIfNeeded(
StringRef Path, uint64_t BootTime, uint64_t ValidationTime,
bool CheckHash, bool AllowRecovery, bool Force,
std::optional<StringRef> LLVMCas, StringRef ValidationError, bool Skipped,
bool Recovered);
void log_TempFile_create(StringRef Name);
void log_TempFile_keep(StringRef TmpName, StringRef Name, std::error_code EC);
void log_TempFile_remove(StringRef TmpName, std::error_code EC);
Expand Down
29 changes: 29 additions & 0 deletions llvm/include/llvm/CAS/UnifiedOnDiskCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#ifndef LLVM_CAS_UNIFIEDONDISKCACHE_H
#define LLVM_CAS_UNIFIEDONDISKCACHE_H

#include "llvm/CAS/BuiltinUnifiedCASDatabases.h"
#include "llvm/CAS/OnDiskGraphDB.h"

namespace llvm::cas::ondisk {
Expand Down Expand Up @@ -82,6 +83,34 @@ class UnifiedOnDiskCache {
OnDiskGraphDB::FaultInPolicy FaultInPolicy =
OnDiskGraphDB::FaultInPolicy::FullTree);

/// Validate the data in \p Path, if needed to ensure correctness.
///
/// Note: if invalid data is detected and \p AllowRecovery is true, then
/// recovery requires exclusive access to the CAS and it is an error to
/// attempt recovery if there is concurrent use of the CAS.
///
/// \param Path directory for the on-disk database.
/// \param HashName Identifier name for the hashing algorithm that is going to
/// be used.
/// \param HashByteSize Size for the object digest hash bytes.
/// \param CheckHash Whether to validate hashes match the data.
/// \param AllowRecovery Whether to automatically recover from invalid data by
/// marking the files for garbage collection.
/// \param ForceValidation Whether to force validation to occur even if it
/// should not be necessary.
/// \param LLVMCasBinary If provided, validation is performed out-of-process
/// using the given \c llvm-cas executable which protects against crashes
/// during validation. Otherwise validation is performed in-process.
///
/// \returns \c Valid if the data is already valid, \c Recovered if data
/// was invalid but has been cleared, \c Skipped if validation is not needed,
/// or an \c Error if validation cannot be performed or if the data is left
/// in an invalid state because \p AllowRecovery is false.
static Expected<ValidationResult>
validateIfNeeded(StringRef Path, StringRef HashName, unsigned HashByteSize,
bool CheckHash, bool AllowRecovery, bool ForceValidation,
std::optional<StringRef> LLVMCasBinary);

/// This is called implicitly at destruction time, so it is not required for a
/// client to call this. After calling \p close the only method that is valid
/// to call is \p needsGarbageCollection.
Expand Down
13 changes: 13 additions & 0 deletions llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,16 @@ cas::createOnDiskUnifiedCASDatabases(StringRef Path) {
auto AC = builtin::createActionCacheFromUnifiedOnDiskCache(std::move(UniDB));
return std::make_pair(std::move(CAS), std::move(AC));
}

Expected<ValidationResult> cas::validateOnDiskUnifiedCASDatabasesIfNeeded(
StringRef Path, bool CheckHash, bool AllowRecovery, bool ForceValidation,
std::optional<StringRef> LLVMCasBinary) {
#if LLVM_ENABLE_ONDISK_CAS
return ondisk::UnifiedOnDiskCache::validateIfNeeded(
Path, builtin::BuiltinCASContext::getHashName(),
sizeof(builtin::HashType), CheckHash, AllowRecovery, ForceValidation,
LLVMCasBinary);
#else
return createStringError(inconvertibleErrorCode(), "OnDiskCache is disabled");
#endif
}
19 changes: 19 additions & 0 deletions llvm/lib/CAS/OnDiskCASLogger.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,25 @@ void OnDiskCASLogger::log_UnifiedOnDiskCache_collectGarbage(StringRef Path) {
Log << "collect garbage '" << Path << "'";
}

void OnDiskCASLogger::log_UnifiedOnDiskCache_validateIfNeeded(
StringRef Path, uint64_t BootTime, uint64_t ValidationTime, bool CheckHash,
bool AllowRecovery, bool Force, std::optional<StringRef> LLVMCas,
StringRef ValidationError, bool Skipped, bool Recovered) {
TextLogLine Log(OS);
Log << "validate-if-needed '" << Path << "'";
Log << " boot=" << BootTime << " last-valid=" << ValidationTime;
Log << " check-hash=" << CheckHash << " allow-recovery=" << AllowRecovery;
Log << " force=" << Force;
if (LLVMCas)
Log << " llvm-cas=" << *LLVMCas;
if (Skipped)
Log << " skipped";
if (Recovered)
Log << " recovered";
if (!ValidationError.empty())
Log << " data was invalid " << ValidationError;
}

void OnDiskCASLogger::log_TempFile_create(StringRef Name) {
TextLogLine Log(OS);
Log << "standalone file create '" << Name << "'";
Expand Down
Loading