Skip to content

Commit 17aa9cf

Browse files
committed
[llvm][cas] Add validate-if-needed to recover from invalid data
Introduce a new validate-if-needed API to the UnifiedOnDiskCache and llvm-cas tool that triggers out-of-process validation of the CAS once for every machine boot, and optionally recovers from invalid data by marking it for garbage collection. This fixes a hole in the CAS data coherence when a power loss or similar failure causes the OS to not flush all of the pages in the mmaped on-disk CAS files. The intent is that clients such as the clang scanning daemon or a build system should trigger this validation at least once before using the CAS. rdar://123542312
1 parent 49dea8c commit 17aa9cf

File tree

9 files changed

+495
-11
lines changed

9 files changed

+495
-11
lines changed

llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,39 @@ class ObjectStore;
2121
Expected<std::pair<std::unique_ptr<ObjectStore>, std::unique_ptr<ActionCache>>>
2222
createOnDiskUnifiedCASDatabases(StringRef Path);
2323

24+
/// Represents the result of validating the contents using
25+
/// \c validateOnDiskUnifiedCASDatabasesIfNeeded.
26+
///
27+
/// Note: invalid results are handled as an \c Error.
28+
enum class ValidationResult {
29+
/// The data is already valid.
30+
Valid,
31+
/// The data was invalid, but was recovered.
32+
RecoveredValid,
33+
/// Validation was skipped, as it was not needed.
34+
Skipped,
35+
};
36+
37+
/// Validate the data in \p Path, if needed to ensure correctness.
38+
///
39+
/// \param Path directory for the on-disk database.
40+
/// \param CheckHash Whether to validate hashes match the data.
41+
/// \param AllowRecovery Whether to automatically recover from invalid data by
42+
/// marking the files for garbage collection.
43+
/// \param ForceValidation Whether to force validation to occur even if it
44+
/// should not be necessary.
45+
/// \param LLVMCasBinary If provided, validation is performed out-of-process
46+
/// using the given \c llvm-cas executable which protects against crashes
47+
/// during validation. Otherwise validation is performed in-process.
48+
///
49+
/// \returns \c Valid if the data is already valid, \c RecoveredValid if data
50+
/// was invalid but has been cleared, \c Skipped if validation is not needed,
51+
/// or an \c Error if validation cannot be performed or if the data is left
52+
/// in an invalid state because \p AllowRecovery is false.
53+
Expected<ValidationResult> validateOnDiskUnifiedCASDatabasesIfNeeded(
54+
StringRef Path, bool CheckHash, bool AllowRecovery, bool ForceValidation,
55+
std::optional<StringRef> LLVMCasBinary);
56+
2457
} // namespace llvm::cas
2558

2659
#endif // LLVM_CAS_BUILTINUNIFIEDCASDATABASES_H

llvm/include/llvm/CAS/OnDiskCASLogger.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,11 @@ class OnDiskCASLogger {
6262
void log_MappedFileRegionBumpPtr_allocate(void *Region, TrieOffset Off,
6363
size_t Size);
6464
void log_UnifiedOnDiskCache_collectGarbage(StringRef Path);
65+
void log_UnifiedOnDiskCache_validateIfNeeded(
66+
StringRef Path, uint64_t BootTime, uint64_t ValidationTime,
67+
bool CheckHash, bool AllowRecovery, bool Force,
68+
std::optional<StringRef> LLVMCas, StringRef ValidationError, bool Skipped,
69+
bool Recovered);
6570
void log_TempFile_create(StringRef Name);
6671
void log_TempFile_keep(StringRef TmpName, StringRef Name, std::error_code EC);
6772
void log_TempFile_remove(StringRef TmpName, std::error_code EC);

llvm/include/llvm/CAS/UnifiedOnDiskCache.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#ifndef LLVM_CAS_UNIFIEDONDISKCACHE_H
1010
#define LLVM_CAS_UNIFIEDONDISKCACHE_H
1111

12+
#include "llvm/CAS/BuiltinUnifiedCASDatabases.h"
1213
#include "llvm/CAS/OnDiskGraphDB.h"
1314

1415
namespace llvm::cas::ondisk {
@@ -82,6 +83,34 @@ class UnifiedOnDiskCache {
8283
OnDiskGraphDB::FaultInPolicy FaultInPolicy =
8384
OnDiskGraphDB::FaultInPolicy::FullTree);
8485

86+
/// Validate the data in \p Path, if needed to ensure correctness.
87+
///
88+
/// Note: if invalid data is detected and \p AllowRecovery is true, then
89+
/// recovery requires exclusive access to the CAS and it is an error to
90+
/// attempt recovery if there is concurrent use of the CAS.
91+
///
92+
/// \param Path directory for the on-disk database.
93+
/// \param HashName Identifier name for the hashing algorithm that is going to
94+
/// be used.
95+
/// \param HashByteSize Size for the object digest hash bytes.
96+
/// \param CheckHash Whether to validate hashes match the data.
97+
/// \param AllowRecovery Whether to automatically recover from invalid data by
98+
/// marking the files for garbage collection.
99+
/// \param ForceValidation Whether to force validation to occur even if it
100+
/// should not be necessary.
101+
/// \param LLVMCasBinary If provided, validation is performed out-of-process
102+
/// using the given \c llvm-cas executable which protects against crashes
103+
/// during validation. Otherwise validation is performed in-process.
104+
///
105+
/// \returns \c Valid if the data is already valid, \c RecoveredValid if data
106+
/// was invalid but has been cleared, \c Skipped if validation is not needed,
107+
/// or an \c Error if validation cannot be performed or if the data is left
108+
/// in an invalid state because \p AllowRecovery is false.
109+
static Expected<ValidationResult>
110+
validateIfNeeded(StringRef Path, StringRef HashName, unsigned HashByteSize,
111+
bool CheckHash, bool AllowRecovery, bool ForceValidation,
112+
std::optional<StringRef> LLVMCasBinary);
113+
85114
/// This is called implicitly at destruction time, so it is not required for a
86115
/// client to call this. After calling \p close the only method that is valid
87116
/// to call is \p needsGarbageCollection.

llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,16 @@ cas::createOnDiskUnifiedCASDatabases(StringRef Path) {
2323
auto AC = builtin::createActionCacheFromUnifiedOnDiskCache(std::move(UniDB));
2424
return std::make_pair(std::move(CAS), std::move(AC));
2525
}
26+
27+
Expected<ValidationResult> cas::validateOnDiskUnifiedCASDatabasesIfNeeded(
28+
StringRef Path, bool CheckHash, bool AllowRecovery, bool ForceValidation,
29+
std::optional<StringRef> LLVMCasBinary) {
30+
#if LLVM_ENABLE_ONDISK_CAS
31+
return ondisk::UnifiedOnDiskCache::validateIfNeeded(
32+
Path, builtin::BuiltinCASContext::getHashName(),
33+
sizeof(builtin::HashType), CheckHash, AllowRecovery, ForceValidation,
34+
LLVMCasBinary);
35+
#else
36+
return createStringError(inconvertibleErrorCode(), "OnDiskCache is disabled");
37+
#endif
38+
}

llvm/lib/CAS/OnDiskCASLogger.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,25 @@ void OnDiskCASLogger::log_UnifiedOnDiskCache_collectGarbage(StringRef Path) {
211211
Log << "collect garbage '" << Path << "'";
212212
}
213213

214+
void OnDiskCASLogger::log_UnifiedOnDiskCache_validateIfNeeded(
215+
StringRef Path, uint64_t BootTime, uint64_t ValidationTime, bool CheckHash,
216+
bool AllowRecovery, bool Force, std::optional<StringRef> LLVMCas,
217+
StringRef ValidationError, bool Skipped, bool Recovered) {
218+
TextLogLine Log(OS);
219+
Log << "validate-if-needed '" << Path << "'";
220+
Log << " boot=" << BootTime << " last-valid=" << ValidationTime;
221+
Log << " check-hash=" << CheckHash << " allow-recovery=" << AllowRecovery;
222+
Log << " force=" << Force;
223+
if (LLVMCas)
224+
Log << " llvm-cas=" << *LLVMCas;
225+
if (Skipped)
226+
Log << " skipped";
227+
if (Recovered)
228+
Log << " recovered";
229+
if (!ValidationError.empty())
230+
Log << " data was invalid " << ValidationError;
231+
}
232+
214233
void OnDiskCASLogger::log_TempFile_create(StringRef Name) {
215234
TextLogLine Log(OS);
216235
Log << "standalone file create '" << Name << "'";

0 commit comments

Comments
 (0)