Skip to content

Commit fe9a485

Browse files
[Caching][ThinLTO] Avoid aborting for CAS error during ThinLTO caching
Treat most of the CAS errors during thinlto cache replay as cache misses, unless environmental variable `LLVM_THINLTO_STRICT_CAS_ERRORS`, which will cause thinlto to abort immediately. rdar://118475647
1 parent 614137b commit fe9a485

File tree

1 file changed

+53
-14
lines changed

1 file changed

+53
-14
lines changed

llvm/lib/LTO/ThinLTOCodeGenerator.cpp

Lines changed: 53 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,11 @@ static cl::opt<bool> DeterministicCheck(
106106
cl::init((bool)sys::Process::GetEnv(
107107
"LLVM_CACHE_CHECK_REPRODUCIBLE_CACHING_ISSUES")),
108108
cl::Hidden);
109+
static cl::opt<bool> StrictCASErrors(
110+
"thinlto-strict-cas-errors",
111+
cl::desc("Treat CAS errors during ThinLTO as fatal errors"),
112+
cl::init((bool)sys::Process::GetEnv("LLVM_THINLTO_STRICT_CAS_ERRORS")),
113+
cl::Hidden);
109114

110115
class LoggingStream {
111116
public:
@@ -474,6 +479,25 @@ class FileModuleCacheEntry : public ModuleCacheEntry {
474479
SmallString<128> EntryPath;
475480
};
476481

482+
static void handleCASError(
483+
Error E, llvm::function_ref<void(llvm::function_ref<void(raw_ostream &OS)>)>
484+
Logger) {
485+
if (!E)
486+
return;
487+
488+
// If strict CAS error, abort.
489+
if (StrictCASErrors)
490+
report_fatal_error(std::move(E));
491+
492+
// Otherwise, log the error message and return error_code.
493+
if (Logger)
494+
Logger([&](raw_ostream &OS) {
495+
OS << "LTO CAS Error: '" << toString(std::move(E)) << "'\n";
496+
});
497+
else
498+
consumeError(std::move(E));
499+
}
500+
477501
class CASModuleCacheEntry : public ModuleCacheEntry {
478502
public:
479503
// Create a cache entry. This compute a unique hash for the Module considering
@@ -501,8 +525,11 @@ class CASModuleCacheEntry : public ModuleCacheEntry {
501525
// TODO: We can have an alternative hashing function that doesn't
502526
// need to store the key into CAS to get the CacheKey.
503527
auto CASKey = CAS.createProxy(std::nullopt, *Key);
504-
if (!CASKey)
505-
report_fatal_error(CASKey.takeError());
528+
if (!CASKey) {
529+
handleCASError(CASKey.takeError(), this->Logger);
530+
// return as if the key doesn't exist, which will be treated as miss.
531+
return;
532+
}
506533

507534
ID = CASKey->getID();
508535
}
@@ -530,8 +557,11 @@ class CASModuleCacheEntry : public ModuleCacheEntry {
530557
}
531558
});
532559

533-
if (Error E = Cache.get(*ID, /*Globally=*/true).moveInto(MaybeKeyID))
534-
return errorToErrorCode(std::move(E));
560+
if (Error E = Cache.get(*ID, /*Globally=*/true).moveInto(MaybeKeyID)) {
561+
handleCASError(std::move(E), Logger);
562+
// If handleCASError didn't abort, treat as miss.
563+
return std::error_code();
564+
}
535565
}
536566

537567
if (!MaybeKeyID)
@@ -547,8 +577,11 @@ class CASModuleCacheEntry : public ModuleCacheEntry {
547577
});
548578

549579
auto MaybeObject = CAS.getProxy(*MaybeKeyID);
550-
if (!MaybeObject)
551-
return errorToErrorCode(MaybeObject.takeError());
580+
if (!MaybeObject) {
581+
handleCASError(MaybeObject.takeError(), Logger);
582+
// If handleCASError didn't abort, treat as miss.
583+
return std::error_code();
584+
}
552585

553586
return MaybeObject->getMemoryBuffer("", /*NullTerminated=*/true);
554587
}
@@ -571,7 +604,7 @@ class CASModuleCacheEntry : public ModuleCacheEntry {
571604

572605
if (Error E = CAS.createProxy(std::nullopt, OutputBuffer.getBuffer())
573606
.moveInto(Proxy))
574-
report_fatal_error(std::move(E));
607+
return handleCASError(std::move(E), Logger);
575608
}
576609

577610
ScopedDurationTimer ScopedTime([&](double Seconds) {
@@ -584,7 +617,7 @@ class CASModuleCacheEntry : public ModuleCacheEntry {
584617
});
585618

586619
if (auto Err = Cache.put(*ID, Proxy->getID(), /*Globally=*/true))
587-
report_fatal_error(std::move(Err));
620+
handleCASError(std::move(Err), Logger);
588621
}
589622

590623
private:
@@ -639,8 +672,11 @@ class RemoteModuleCacheEntry : public ModuleCacheEntry {
639672
}
640673
});
641674

642-
if (Error E = Service.KVDB->getValueSync(ID).moveInto(GetResponse))
643-
return errorToErrorCode(std::move(E));
675+
if (Error E = Service.KVDB->getValueSync(ID).moveInto(GetResponse)) {
676+
handleCASError(std::move(E), Logger);
677+
// If handleCASError didn't abort, treat as miss.
678+
return std::error_code();
679+
}
644680
}
645681

646682
// Cache Miss.
@@ -666,8 +702,11 @@ class RemoteModuleCacheEntry : public ModuleCacheEntry {
666702

667703
// Request the output buffer.
668704
auto LoadResponse = Service.CASDB->loadSync(Result->getValue(), OutputPath);
669-
if (!LoadResponse)
670-
return errorToErrorCode(LoadResponse.takeError());
705+
if (!LoadResponse) {
706+
handleCASError(LoadResponse.takeError(), Logger);
707+
// If handleCASError didn't abort, treat as miss.
708+
return std::error_code();
709+
}
671710

672711
// Object not found. Treat it as a miss.
673712
if (LoadResponse->KeyNotFound)
@@ -699,7 +738,7 @@ class RemoteModuleCacheEntry : public ModuleCacheEntry {
699738

700739
if (Error E =
701740
Service.CASDB->saveFileSync(OutputPath).moveInto(SaveResponse))
702-
report_fatal_error(std::move(E));
741+
return handleCASError(std::move(E), Logger);
703742
}
704743

705744
// Only check determinism when the cache lookup succeeded before.
@@ -722,7 +761,7 @@ class RemoteModuleCacheEntry : public ModuleCacheEntry {
722761
cas::remote::KeyValueDBClient::ValueTy CompResult;
723762
CompResult["Output"] = *SaveResponse;
724763
if (auto Err = Service.KVDB->putValueSync(ID, CompResult))
725-
report_fatal_error(std::move(Err));
764+
handleCASError(std::move(Err), Logger);
726765
}
727766

728767
Error writeObject(const MemoryBuffer &OutputBuffer,

0 commit comments

Comments
 (0)