Skip to content

Commit c343be7

Browse files
authored
Merge pull request #10581 from benlangmuir/eng/blangmuir/validate-if-needed-123542312
[llvm][cas] Add validate-if-needed to recover from invalid data
2 parents 4589caf + bb52d96 commit c343be7

File tree

15 files changed

+591
-32
lines changed

15 files changed

+591
-32
lines changed

clang/include/clang/CAS/CASOptions.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,8 @@ class CASOptions : public CASConfiguration {
117117
/// default on-disk CAS, otherwise this is a noop.
118118
void ensurePersistentCAS();
119119

120+
void getResolvedCASPath(llvm::SmallVectorImpl<char> &Result) const;
121+
120122
private:
121123
/// Initialize Cached CAS and ActionCache.
122124
llvm::Error initCache() const;

clang/lib/CAS/CASOptions.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ llvm::Error CASOptions::initCache() const {
108108
}
109109

110110
SmallString<256> PathBuf;
111+
getResolvedCASPath(PathBuf);
111112
if (CASPath == "auto") {
112113
getDefaultOnDiskCASPath(PathBuf);
113114
CASPath = PathBuf;
@@ -119,3 +120,11 @@ llvm::Error CASOptions::initCache() const {
119120
std::tie(Cache.CAS, Cache.AC) = std::move(DBs);
120121
return llvm::Error::success();
121122
}
123+
124+
void CASOptions::getResolvedCASPath(SmallVectorImpl<char> &Result) const {
125+
if (CASPath == "auto") {
126+
getDefaultOnDiskCASPath(Result);
127+
} else {
128+
Result.assign(CASPath.begin(), CASPath.end());
129+
}
130+
}

clang/test/CAS/depscan-cas-log.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
// enable logging there are currently zero records in the log.
44

55
// RUN: rm -rf %t && mkdir %t
6-
// RUN: env LLVM_CACHE_CAS_PATH=%t/cas LLVM_CAS_LOG=1 %clang \
6+
// RUN: env LLVM_CACHE_CAS_PATH=%t/cas LLVM_CAS_LOG=1 LLVM_CAS_DISABLE_VALIDATION=1 %clang \
77
// RUN: -cc1depscan -fdepscan=daemon -fdepscan-include-tree -o - \
88
// RUN: -cc1-args -cc1 -triple x86_64-apple-macosx11.0.0 -emit-obj %s -o %t/t.o -fcas-path %t/cas
99
// RUN: FileCheck %s --input-file %t/cas/v1.log

clang/test/CAS/validate-once.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// RUN: rm -rf %t
2+
3+
// RUN: llvm-cas --cas %t/cas --ingest %s
4+
// RUN: mv %t/cas/v1.1/v8.data %t/cas/v1.1/v8.data.bak
5+
6+
// RUN: %clang -cc1depscand -execute %{clang-daemon-dir}/%basename_t -cas-args -fcas-path %t/cas -- \
7+
// RUN: %clang -target x86_64-apple-macos11 -I %S/Inputs \
8+
// RUN: -Xclang -fcas-path -Xclang %t/cas \
9+
// RUN: -fdepscan=daemon -fdepscan-daemon=%{clang-daemon-dir}/%basename_t -fsyntax-only -x c %s
10+
11+
// RUN: ls %t/cas/corrupt.0.v1.1
12+
13+
// RUN: llvm-cas --cas %t/cas --validate-if-needed | FileCheck %s -check-prefix=SKIPPED
14+
// SKIPPED: validation skipped
15+
16+
#include "test.h"
17+
18+
int func(void);

clang/tools/driver/cc1depscanProtocol.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,8 @@ Expected<ScanDaemon> ScanDaemon::launchDaemon(StringRef BasePath,
188188
#endif
189189

190190
static constexpr const char *PassThroughEnv[] = {
191-
"LLVM_CAS_LOG",
191+
"LLVM_CAS_LOG",
192+
"LLVM_CAS_DISABLE_VALIDATION",
192193
};
193194
SmallVector<const char *> EnvP;
194195
for (const char *Name : PassThroughEnv)

clang/tools/driver/cc1depscan_main.cpp

Lines changed: 63 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,11 @@
2626
#include "clang/Tooling/DependencyScanning/ScanAndUpdateArgs.h"
2727
#include "llvm/ADT/ArrayRef.h"
2828
#include "llvm/ADT/ScopeExit.h"
29+
#include "llvm/ADT/SmallVector.h"
2930
#include "llvm/ADT/Statistic.h"
3031
#include "llvm/Bitstream/BitstreamReader.h"
3132
#include "llvm/CAS/ActionCache.h"
33+
#include "llvm/CAS/BuiltinUnifiedCASDatabases.h"
3234
#include "llvm/CAS/CASProvidingFileSystem.h"
3335
#include "llvm/CAS/CachingOnDiskFileSystem.h"
3436
#include "llvm/CAS/HierarchicalTreeBuilder.h"
@@ -39,6 +41,7 @@
3941
#include "llvm/Support/Compiler.h"
4042
#include "llvm/Support/Error.h"
4143
#include "llvm/Support/ErrorHandling.h"
44+
#include "llvm/Support/FileSystem.h"
4245
#include "llvm/Support/ManagedStatic.h"
4346
#include "llvm/Support/Path.h"
4447
#include "llvm/Support/PrefixMapper.h"
@@ -50,6 +53,7 @@
5053
#include "llvm/Support/raw_ostream.h"
5154
#include <cstdio>
5255
#include <mutex>
56+
#include <optional>
5357
#include <shared_mutex>
5458

5559
#if LLVM_ON_UNIX
@@ -630,8 +634,8 @@ namespace {
630634
struct ScanServer {
631635
const char *Argv0 = nullptr;
632636
SmallString<128> BasePath;
633-
/// List of cas options.
634-
ArrayRef<const char *> CASArgs;
637+
CASOptions CASOpts;
638+
bool ProduceIncludeTree = true;
635639
int PidFD = -1;
636640
int ListenSocket = -1;
637641
/// \p std::nullopt means it runs indefinitely.
@@ -640,7 +644,7 @@ struct ScanServer {
640644

641645
~ScanServer() { shutdown(); }
642646

643-
void start(bool Exclusive);
647+
void start(bool Exclusive, ArrayRef<const char *> CASArgs);
644648
int listen();
645649

646650
/// Tear down the socket and bind file immediately but wait till all existing
@@ -705,13 +709,13 @@ int cc1depscand_main(ArrayRef<const char *> Argv, const char *Argv0,
705709
// particular "build session", to shutdown, then have it stay alive until the
706710
// session is finished.
707711
bool LongRunning = false;
708-
712+
ArrayRef<const char *> CASArgs;
709713
for (const auto *A = Argv.begin() + 2; A != Argv.end(); ++A) {
710714
StringRef Arg(*A);
711715
if (Arg == "-long-running")
712716
LongRunning = true;
713717
else if (Arg == "-cas-args") {
714-
Server.CASArgs = ArrayRef(A + 1, Argv.end());
718+
CASArgs = ArrayRef(A + 1, Argv.end());
715719
break;
716720
}
717721
}
@@ -722,7 +726,7 @@ int cc1depscand_main(ArrayRef<const char *> Argv, const char *Argv0,
722726
reportError(Twine("cannot create basedir: ") + EC.message());
723727

724728
if (Command == "-serve") {
725-
Server.start(/*Exclusive*/ true);
729+
Server.start(/*Exclusive*/ true, CASArgs);
726730
return Server.listen();
727731

728732
} else if (Command == "-execute") {
@@ -733,7 +737,7 @@ int cc1depscand_main(ArrayRef<const char *> Argv, const char *Argv0,
733737
}
734738

735739
// Make sure to start the server before executing the command.
736-
Server.start(/*Exclusive*/ true);
740+
Server.start(/*Exclusive*/ true, CASArgs);
737741
std::thread ServerThread([&Server]() { Server.listen(); });
738742

739743
setenv("CLANG_CACHE_SCAN_DAEMON_SOCKET_PATH", Server.BasePath.c_str(),
@@ -784,11 +788,61 @@ int cc1depscand_main(ArrayRef<const char *> Argv, const char *Argv0,
784788
openAndReplaceFD(1, LogOutPath);
785789
openAndReplaceFD(2, LogErrPath);
786790

787-
Server.start(/*Exclusive*/ false);
791+
Server.start(/*Exclusive*/ false, CASArgs);
788792
return Server.listen();
789793
}
790794

791-
void ScanServer::start(bool Exclusive) {
795+
static std::optional<StringRef>
796+
findLLVMCasBinary(const char *Argv0, llvm::SmallVectorImpl<char> &Storage) {
797+
using namespace llvm::sys;
798+
std::string Path = fs::getMainExecutable(Argv0, (void *)cc1depscan_main);
799+
Storage.assign(Path.begin(), Path.end());
800+
path::remove_filename(Storage);
801+
path::append(Storage, "llvm-cas");
802+
StringRef PathStr(Storage.data(), Storage.size());
803+
if (fs::exists(PathStr))
804+
return PathStr;
805+
// Look for a corresponding usr/local/bin/llvm-cas
806+
PathStr = path::parent_path(PathStr);
807+
if (path::filename(PathStr) != "bin")
808+
return std::nullopt;
809+
PathStr = path::parent_path(PathStr);
810+
Storage.truncate(PathStr.size());
811+
path::append(Storage, "local", "bin", "llvm-cas");
812+
PathStr = StringRef{Storage.data(), Storage.size()};
813+
if (fs::exists(PathStr))
814+
return PathStr;
815+
return std::nullopt;
816+
}
817+
818+
void ScanServer::start(bool Exclusive, ArrayRef<const char *> CASArgs) {
819+
// Parse CAS options and validate if needed.
820+
DiagnosticsEngine Diags(new DiagnosticIDs(), new DiagnosticOptions());
821+
822+
const OptTable &Opts = clang::driver::getDriverOptTable();
823+
unsigned MissingArgIndex, MissingArgCount;
824+
auto ParsedCASArgs =
825+
Opts.ParseArgs(CASArgs, MissingArgIndex, MissingArgCount);
826+
CompilerInvocation::ParseCASArgs(CASOpts, ParsedCASArgs, Diags);
827+
CASOpts.ensurePersistentCAS();
828+
ProduceIncludeTree =
829+
ParsedCASArgs.hasArg(driver::options::OPT_fdepscan_include_tree);
830+
831+
static std::once_flag ValidateOnce;
832+
std::call_once(ValidateOnce, [&] {
833+
if (getenv("LLVM_CAS_DISABLE_VALIDATION"))
834+
return;
835+
if (CASOpts.CASPath.empty() || !CASOpts.PluginPath.empty())
836+
return;
837+
SmallString<64> LLVMCasStorage;
838+
SmallString<64> CASPath;
839+
CASOpts.getResolvedCASPath(CASPath);
840+
ExitOnErr(llvm::cas::validateOnDiskUnifiedCASDatabasesIfNeeded(
841+
CASPath, /*CheckHash=*/true,
842+
/*AllowRecovery=*/true,
843+
/*Force=*/false, findLLVMCasBinary(Argv0, LLVMCasStorage)));
844+
});
845+
792846
// Check the pidfile.
793847
SmallString<128> PidPath;
794848
(BasePath + ".pid").toVector(PidPath);
@@ -827,16 +881,6 @@ int ScanServer::listen() {
827881
llvm::DefaultThreadPool Pool;
828882

829883
DiagnosticsEngine Diags(new DiagnosticIDs(), new DiagnosticOptions());
830-
CASOptions CASOpts;
831-
const OptTable &Opts = clang::driver::getDriverOptTable();
832-
unsigned MissingArgIndex, MissingArgCount;
833-
auto ParsedCASArgs =
834-
Opts.ParseArgs(CASArgs, MissingArgIndex, MissingArgCount);
835-
CompilerInvocation::ParseCASArgs(CASOpts, ParsedCASArgs, Diags);
836-
CASOpts.ensurePersistentCAS();
837-
bool ProduceIncludeTree =
838-
ParsedCASArgs.hasArg(driver::options::OPT_fdepscan_include_tree);
839-
840884
std::shared_ptr<llvm::cas::ObjectStore> CAS;
841885
std::shared_ptr<llvm::cas::ActionCache> Cache;
842886
std::tie(CAS, Cache) = CASOpts.getOrCreateDatabases(Diags);

llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,39 @@ class ObjectStore;
2121
Expected<std::pair<std::unique_ptr<ObjectStore>, std::unique_ptr<ActionCache>>>
2222
createOnDiskUnifiedCASDatabases(StringRef Path);
2323

24+
/// Represents the result of validating the contents using
25+
/// \c validateOnDiskUnifiedCASDatabasesIfNeeded.
26+
///
27+
/// Note: invalid results are handled as an \c Error.
28+
enum class ValidationResult {
29+
/// The data is already valid.
30+
Valid,
31+
/// The data was invalid, but was recovered.
32+
Recovered,
33+
/// Validation was skipped, as it was not needed.
34+
Skipped,
35+
};
36+
37+
/// Validate the data in \p Path, if needed to ensure correctness.
38+
///
39+
/// \param Path directory for the on-disk database.
40+
/// \param CheckHash Whether to validate hashes match the data.
41+
/// \param AllowRecovery Whether to automatically recover from invalid data by
42+
/// marking the files for garbage collection.
43+
/// \param ForceValidation Whether to force validation to occur even if it
44+
/// should not be necessary.
45+
/// \param LLVMCasBinary If provided, validation is performed out-of-process
46+
/// using the given \c llvm-cas executable which protects against crashes
47+
/// during validation. Otherwise validation is performed in-process.
48+
///
49+
/// \returns \c Valid if the data is already valid, \c Recovered if data
50+
/// was invalid but has been cleared, \c Skipped if validation is not needed,
51+
/// or an \c Error if validation cannot be performed or if the data is left
52+
/// in an invalid state because \p AllowRecovery is false.
53+
Expected<ValidationResult> validateOnDiskUnifiedCASDatabasesIfNeeded(
54+
StringRef Path, bool CheckHash, bool AllowRecovery, bool ForceValidation,
55+
std::optional<StringRef> LLVMCasBinary);
56+
2457
} // namespace llvm::cas
2558

2659
#endif // LLVM_CAS_BUILTINUNIFIEDCASDATABASES_H

llvm/include/llvm/CAS/OnDiskCASLogger.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,11 @@ class OnDiskCASLogger {
6262
void log_MappedFileRegionBumpPtr_allocate(void *Region, TrieOffset Off,
6363
size_t Size);
6464
void log_UnifiedOnDiskCache_collectGarbage(StringRef Path);
65+
void log_UnifiedOnDiskCache_validateIfNeeded(
66+
StringRef Path, uint64_t BootTime, uint64_t ValidationTime,
67+
bool CheckHash, bool AllowRecovery, bool Force,
68+
std::optional<StringRef> LLVMCas, StringRef ValidationError, bool Skipped,
69+
bool Recovered);
6570
void log_TempFile_create(StringRef Name);
6671
void log_TempFile_keep(StringRef TmpName, StringRef Name, std::error_code EC);
6772
void log_TempFile_remove(StringRef TmpName, std::error_code EC);

llvm/include/llvm/CAS/UnifiedOnDiskCache.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#ifndef LLVM_CAS_UNIFIEDONDISKCACHE_H
1010
#define LLVM_CAS_UNIFIEDONDISKCACHE_H
1111

12+
#include "llvm/CAS/BuiltinUnifiedCASDatabases.h"
1213
#include "llvm/CAS/OnDiskGraphDB.h"
1314

1415
namespace llvm::cas::ondisk {
@@ -82,6 +83,34 @@ class UnifiedOnDiskCache {
8283
OnDiskGraphDB::FaultInPolicy FaultInPolicy =
8384
OnDiskGraphDB::FaultInPolicy::FullTree);
8485

86+
/// Validate the data in \p Path, if needed to ensure correctness.
87+
///
88+
/// Note: if invalid data is detected and \p AllowRecovery is true, then
89+
/// recovery requires exclusive access to the CAS and it is an error to
90+
/// attempt recovery if there is concurrent use of the CAS.
91+
///
92+
/// \param Path directory for the on-disk database.
93+
/// \param HashName Identifier name for the hashing algorithm that is going to
94+
/// be used.
95+
/// \param HashByteSize Size for the object digest hash bytes.
96+
/// \param CheckHash Whether to validate hashes match the data.
97+
/// \param AllowRecovery Whether to automatically recover from invalid data by
98+
/// marking the files for garbage collection.
99+
/// \param ForceValidation Whether to force validation to occur even if it
100+
/// should not be necessary.
101+
/// \param LLVMCasBinary If provided, validation is performed out-of-process
102+
/// using the given \c llvm-cas executable which protects against crashes
103+
/// during validation. Otherwise validation is performed in-process.
104+
///
105+
/// \returns \c Valid if the data is already valid, \c Recovered if data
106+
/// was invalid but has been cleared, \c Skipped if validation is not needed,
107+
/// or an \c Error if validation cannot be performed or if the data is left
108+
/// in an invalid state because \p AllowRecovery is false.
109+
static Expected<ValidationResult>
110+
validateIfNeeded(StringRef Path, StringRef HashName, unsigned HashByteSize,
111+
bool CheckHash, bool AllowRecovery, bool ForceValidation,
112+
std::optional<StringRef> LLVMCasBinary);
113+
85114
/// This is called implicitly at destruction time, so it is not required for a
86115
/// client to call this. After calling \p close the only method that is valid
87116
/// to call is \p needsGarbageCollection.

llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,16 @@ cas::createOnDiskUnifiedCASDatabases(StringRef Path) {
2323
auto AC = builtin::createActionCacheFromUnifiedOnDiskCache(std::move(UniDB));
2424
return std::make_pair(std::move(CAS), std::move(AC));
2525
}
26+
27+
Expected<ValidationResult> cas::validateOnDiskUnifiedCASDatabasesIfNeeded(
28+
StringRef Path, bool CheckHash, bool AllowRecovery, bool ForceValidation,
29+
std::optional<StringRef> LLVMCasBinary) {
30+
#if LLVM_ENABLE_ONDISK_CAS
31+
return ondisk::UnifiedOnDiskCache::validateIfNeeded(
32+
Path, builtin::BuiltinCASContext::getHashName(),
33+
sizeof(builtin::HashType), CheckHash, AllowRecovery, ForceValidation,
34+
LLVMCasBinary);
35+
#else
36+
return createStringError(inconvertibleErrorCode(), "OnDiskCache is disabled");
37+
#endif
38+
}

llvm/lib/CAS/OnDiskCASLogger.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,25 @@ void OnDiskCASLogger::log_UnifiedOnDiskCache_collectGarbage(StringRef Path) {
211211
Log << "collect garbage '" << Path << "'";
212212
}
213213

214+
void OnDiskCASLogger::log_UnifiedOnDiskCache_validateIfNeeded(
215+
StringRef Path, uint64_t BootTime, uint64_t ValidationTime, bool CheckHash,
216+
bool AllowRecovery, bool Force, std::optional<StringRef> LLVMCas,
217+
StringRef ValidationError, bool Skipped, bool Recovered) {
218+
TextLogLine Log(OS);
219+
Log << "validate-if-needed '" << Path << "'";
220+
Log << " boot=" << BootTime << " last-valid=" << ValidationTime;
221+
Log << " check-hash=" << CheckHash << " allow-recovery=" << AllowRecovery;
222+
Log << " force=" << Force;
223+
if (LLVMCas)
224+
Log << " llvm-cas=" << *LLVMCas;
225+
if (Skipped)
226+
Log << " skipped";
227+
if (Recovered)
228+
Log << " recovered";
229+
if (!ValidationError.empty())
230+
Log << " data was invalid " << ValidationError;
231+
}
232+
214233
void OnDiskCASLogger::log_TempFile_create(StringRef Name) {
215234
TextLogLine Log(OS);
216235
Log << "standalone file create '" << Name << "'";

0 commit comments

Comments
 (0)