Skip to content

Commit 4ceab20

Browse files
committed
Make the compilation cache key and the CAS outputs data output path independent
Instead of storing CAS outputs by the specific path that was used at the time of invocation, associate them with an "output kind name" so that they are re-usable even with different output paths. The practical benefit of this is that we can get cache hits when source files are part of multiple products (e.g. when source files are included in multiple Xcode test targets) or when PCH files end up with the same caching key and output but the build system is not in a position identify this.
1 parent 982629f commit 4ceab20

File tree

7 files changed

+259
-35
lines changed

7 files changed

+259
-35
lines changed

clang/lib/Frontend/CompileJobCacheKey.cpp

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,18 +56,29 @@ clang::createCompileJobCacheKey(CASDB &CAS, ArrayRef<const char *> CC1Args,
5656

5757
Optional<llvm::cas::CASID>
5858
clang::createCompileJobCacheKey(CASDB &CAS, DiagnosticsEngine &Diags,
59-
const CompilerInvocation &Invocation) {
59+
const CompilerInvocation &OriginalInvocation) {
60+
CompilerInvocation InvocationForCacheKey(OriginalInvocation);
61+
FrontendOptions &FrontendOpts = InvocationForCacheKey.getFrontendOpts();
62+
DiagnosticOptions &DiagOpts = InvocationForCacheKey.getDiagnosticOpts();
63+
// Keep the key independent of the paths of these outputs.
64+
FrontendOpts.OutputFile = "-";
65+
InvocationForCacheKey.getDependencyOutputOpts().OutputFile = "-";
66+
// We always generate the serialized diagnostics so the key is independent of
67+
// the presence of '--serialize-diagnostics'.
68+
DiagOpts.DiagnosticSerializationFile.clear();
69+
6070
// Generate a new command-line in case Invocation has been canonicalized.
6171
llvm::BumpPtrAllocator Alloc;
6272
llvm::StringSaver Saver(Alloc);
6373
llvm::SmallVector<const char *> Argv;
6474
Argv.push_back("-cc1");
65-
Invocation.generateCC1CommandLine(
75+
InvocationForCacheKey.generateCC1CommandLine(
6676
Argv, [&Saver](const llvm::Twine &T) { return Saver.save(T).data(); });
6777

6878
// FIXME: currently correct since the main executable is always in the root
6979
// from scanning, but we should probably make it explicit here...
70-
StringRef RootIDString = Invocation.getFileSystemOpts().CASFileSystemRootID;
80+
StringRef RootIDString =
81+
InvocationForCacheKey.getFileSystemOpts().CASFileSystemRootID;
7182
Expected<llvm::cas::CASID> RootID = CAS.parseID(RootIDString);
7283
if (!RootID) {
7384
llvm::consumeError(RootID.takeError());
Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,33 @@
11
// RUN: rm -rf %t && mkdir -p %t
2-
// RUN: llvm-cas --cas %t/cas --ingest --data %s > %t/casid
2+
// RUN: split-file %s %t
3+
// RUN: llvm-cas --cas %t/cas --ingest --data %t > %t/casid
34
//
45
// RUN: %clang -cc1 -triple x86_64-apple-macos11 \
56
// RUN: -fcas-path %t/cas -fcas-fs @%t/casid -fcache-compile-job \
6-
// RUN: -Rcompile-job-cache -emit-obj -o %t/output.o \
7-
// RUN: -dependency-file %t/deps.d -MT %t/output.o 2>&1 \
7+
// RUN: -Rcompile-job-cache %t/main.c -emit-obj -o %t/output.o \
8+
// RUN: -dependency-file %t/deps1.d -MT depends 2>&1 \
89
// RUN: | FileCheck %s --allow-empty --check-prefix=CACHE-MISS
910
//
11+
// RUN: FileCheck %s --input-file=%t/deps1.d --check-prefix=DEPS
12+
// DEPS: depends:
13+
// DEPS: main.c
14+
// DEPS: my_header.h
15+
1016
// RUN: ls %t/output.o && rm %t/output.o
11-
// RUN: ls %t/deps.d && mv %t/deps.d %t/deps.d.orig
1217
//
1318
// RUN: %clang -cc1 -triple x86_64-apple-macos11 \
1419
// RUN: -fcas-path %t/cas -fcas-fs @%t/casid -fcache-compile-job \
15-
// RUN: -Rcompile-job-cache -emit-obj -o %t/output.o \
16-
// RUN: -dependency-file %t/deps.d -MT %t/output.o 2>&1 \
20+
// RUN: -Rcompile-job-cache %t/main.c -emit-obj -o %t/output.o \
21+
// RUN: -dependency-file %t/deps2.d -MT depends 2>&1 \
1722
// RUN: | FileCheck %s --check-prefix=CACHE-HIT
1823
//
1924
// RUN: ls %t/output.o
20-
// RUN: diff -u %t/deps.d %t/deps.d.orig
25+
// RUN: diff -u %t/deps1.d %t/deps2.d
2126
//
2227
// CACHE-HIT: remark: compile job cache hit
2328
// CACHE-MISS-NOT: remark: compile job cache hit
29+
30+
//--- main.c
31+
#include "my_header.h"
32+
33+
//--- my_header.h

clang/test/CAS/fcache-compile-job.c

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,7 @@
1515
// RUN: %clang -cc1 -triple x86_64-apple-macos11 \
1616
// RUN: -fcas-path %t/cas -fcas-fs @%t/casid -fcache-compile-job \
1717
// RUN: -Rcompile-job-cache-hit -emit-obj -o output.o 2>&1 \
18-
// RUN: | FileCheck %s --allow-empty --check-prefix=CACHE-MISS
19-
// RUN: ls %t/output.o && rm %t/output.o
20-
// RUN: %clang -cc1 -triple x86_64-apple-macos11 \
21-
// RUN: -fcas-path %t/cas -fcas-fs @%t/casid -fcache-compile-job \
22-
// RUN: -Rcompile-job-cache-hit -emit-obj -o output.o 2>&1 \
23-
// RUN: | FileCheck %s --check-prefix=CACHE-HIT
18+
// RUN: | FileCheck %s --allow-empty --check-prefix=CACHE-HIT
2419
// RUN: ls %t/output.o
2520
//
2621
// Check for a cache hit if the CAS moves:
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
// RUN: rm -rf %t && mkdir -p %t/a %t/b
2+
3+
// Check that we got a cache hit even though the output paths are different.
4+
5+
// RUN: env LLVM_CACHE_CAS_PATH=%t/cas %clang-cache \
6+
// RUN: %clang -target x86_64-apple-macos11 -c %s -o %t/a/t1.o -MMD -MT dependencies -MF %t/a/t1.d --serialize-diagnostics %t/a/t1.dia -Rcompile-job-cache \
7+
// RUN: 2>&1 | FileCheck %s --check-prefix=CACHE-MISS
8+
// RUN: env LLVM_CACHE_CAS_PATH=%t/cas %clang-cache \
9+
// RUN: %clang -target x86_64-apple-macos11 -c %s -o %t/b/t2.o -MMD -MT dependencies -MF %t/b/t2.d --serialize-diagnostics %t/b/t2.dia -Rcompile-job-cache \
10+
// RUN: 2>&1 | FileCheck %s --check-prefix=CACHE-HIT
11+
12+
// Check PCH output
13+
14+
// RUN: env LLVM_CACHE_CAS_PATH=%t/cas %clang-cache \
15+
// RUN: %clang -target x86_64-apple-macos11 -x c-header %s -o %t/a/t1.pch -Rcompile-job-cache 2>&1 | FileCheck %s --check-prefix=CACHE-MISS
16+
// RUN: env LLVM_CACHE_CAS_PATH=%t/cas %clang-cache \
17+
// RUN: %clang -target x86_64-apple-macos11 -x c-header %s -o %t/b/t2.pch -Rcompile-job-cache 2>&1 | FileCheck %s --check-prefix=CACHE-HIT
18+
19+
// CACHE-MISS: remark: compile job cache miss
20+
// CACHE-HIT: remark: compile job cache hit
21+
22+
// Repeat to diff outputs produced from each invocation. CAS path is different to avoid cache hits.
23+
24+
// RUN: rm -rf %t && mkdir -p %t
25+
26+
// Baseline to check we got expected outputs.
27+
// RUN: %clang -target x86_64-apple-macos11 -c %s -o %t/t.o -MMD -MT dependencies -MF %t/t.d --serialize-diagnostics %t/t.dia
28+
// RUN: env LLVM_CACHE_CAS_PATH=%t/a/cas %clang-cache \
29+
// RUN: %clang -target x86_64-apple-macos11 -c %s -o %t/a/t1.o -MMD -MT dependencies -MF %t/a/t1.d --serialize-diagnostics %t/a/t1.dia
30+
// RUN: env LLVM_CACHE_CAS_PATH=%t/b/cas %clang-cache \
31+
// RUN: %clang -target x86_64-apple-macos11 -c %s -o %t/b/t2.o -MMD -MT dependencies -MF %t/b/t2.d --serialize-diagnostics %t/b/t2.dia
32+
33+
// RUN: diff %t/a/t1.o %t/b/t2.o
34+
// RUN: diff %t/t.o %t/a/t1.o
35+
36+
// RUN: diff %t/a/t1.dia %t/b/t2.dia
37+
// RUN: diff %t/t.dia %t/a/t1.dia
38+
39+
// RUN: diff %t/a/t1.d %t/b/t2.d
40+
// RUN: diff %t/t.d %t/a/t1.d
41+
42+
// Baseline to check we got expected output.
43+
// RUN: %clang -target x86_64-apple-macos11 -x c-header %s -o %t/t.pch -Xclang -fno-pch-timestamp
44+
// RUN: env LLVM_CACHE_CAS_PATH=%t/a/cas %clang-cache \
45+
// RUN: %clang -target x86_64-apple-macos11 -x c-header %s -o %t/a/t1.pch
46+
// RUN: env LLVM_CACHE_CAS_PATH=%t/b/cas %clang-cache \
47+
// RUN: %clang -target x86_64-apple-macos11 -x c-header %s -o %t/b/t2.pch
48+
49+
// RUN: diff %t/a/t1.pch %t/b/t2.pch
50+
// RUN: diff %t/t.pch %t/a/t1.pch
51+
52+
// Check that caching is independent of whether '--serialize-diagnostics' exists or not.
53+
54+
// Check with the option missing then present.
55+
// RUN: env LLVM_CACHE_CAS_PATH=%t/d1/cas %clang-cache \
56+
// RUN: %clang -target x86_64-apple-macos11 -c %s -o %t/t1.o -Rcompile-job-cache \
57+
// RUN: 2>&1 | FileCheck %s --check-prefix=CACHE-MISS
58+
// RUN: env LLVM_CACHE_CAS_PATH=%t/d1/cas %clang-cache \
59+
// RUN: %clang -target x86_64-apple-macos11 -c %s -o %t/t2.o --serialize-diagnostics %t/t1.dia -Rcompile-job-cache \
60+
// RUN: 2>&1 | FileCheck %s --check-prefix=CACHE-HIT
61+
62+
// Check with the option present then missing.
63+
// RUN: env LLVM_CACHE_CAS_PATH=%t/d2/cas %clang-cache \
64+
// RUN: %clang -target x86_64-apple-macos11 -c %s -o %t/t1.o --serialize-diagnostics %t/t2.dia -Rcompile-job-cache \
65+
// RUN: 2>&1 | FileCheck %s --check-prefix=CACHE-MISS
66+
// RUN: env LLVM_CACHE_CAS_PATH=%t/d2/cas %clang-cache \
67+
// RUN: %clang -target x86_64-apple-macos11 -c %s -o %t/t2.o -Rcompile-job-cache \
68+
// RUN: 2>&1 | FileCheck %s --check-prefix=CACHE-HIT
69+
70+
// RUN: diff %t/t1.dia %t/t2.dia
71+
// RUN: diff %t/t.dia %t/t1.dia
72+
73+
#warning some warning
74+
void test() {}

clang/tools/driver/cc1_main.cpp

Lines changed: 109 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,27 @@ namespace {
236236
// handled.
237237
class CompileJobCache {
238238
public:
239+
/// Categorization for the output kinds that is used to decouple the
240+
/// compilation cache key from the specific output paths.
241+
enum class OutputKind {
242+
MainOutput,
243+
SerializedDiagnostics,
244+
Dependencies,
245+
};
246+
static ArrayRef<OutputKind> getAllOutputKinds() {
247+
static const OutputKind AllOutputKinds[] = {
248+
OutputKind::MainOutput, OutputKind::SerializedDiagnostics,
249+
OutputKind::Dependencies};
250+
return llvm::makeArrayRef(AllOutputKinds);
251+
}
252+
253+
static StringRef getOutputKindName(OutputKind Kind);
254+
255+
/// \returns \p None if \p Name doesn't match one of the output kind names.
256+
static Optional<OutputKind> getOutputKindForName(StringRef Name);
257+
258+
StringRef getPathForOutputKind(OutputKind Kind);
259+
239260
/// Canonicalize \p Clang.
240261
///
241262
/// Return status if should exit immediately, otherwise None.
@@ -267,19 +288,60 @@ class CompileJobCache {
267288
SmallString<256> ResultDiags;
268289
Optional<llvm::cas::CASID> ResultCacheKey;
269290
std::unique_ptr<llvm::raw_ostream> ResultDiagsOS;
291+
SmallString<256> SerialDiagsBuf;
270292
IntrusiveRefCntPtr<llvm::cas::CASOutputBackend> CASOutputs;
271293
std::string OutputFile;
294+
std::string SerialDiagsFile;
295+
std::string DependenciesFile;
272296
Optional<llvm::vfs::OutputFile> SerialDiagsOutput;
273297
};
274298
} // end anonymous namespace
275299

300+
static constexpr llvm::StringLiteral MainOutputKindName = "<output>";
301+
static constexpr llvm::StringLiteral SerializedDiagnosticsKindName =
302+
"<serial-diags>";
303+
static constexpr llvm::StringLiteral DependenciesOutputKindName =
304+
"<dependencies>";
305+
306+
StringRef CompileJobCache::getOutputKindName(OutputKind Kind) {
307+
switch (Kind) {
308+
case OutputKind::MainOutput:
309+
return MainOutputKindName;
310+
case OutputKind::SerializedDiagnostics:
311+
return SerializedDiagnosticsKindName;
312+
case OutputKind::Dependencies:
313+
return DependenciesOutputKindName;
314+
}
315+
}
316+
317+
Optional<CompileJobCache::OutputKind>
318+
CompileJobCache::getOutputKindForName(StringRef Name) {
319+
return llvm::StringSwitch<Optional<OutputKind>>(Name)
320+
.Case(MainOutputKindName, OutputKind::MainOutput)
321+
.Case(SerializedDiagnosticsKindName, OutputKind::SerializedDiagnostics)
322+
.Case(DependenciesOutputKindName, OutputKind::Dependencies)
323+
.Default(None);
324+
}
325+
326+
StringRef CompileJobCache::getPathForOutputKind(OutputKind Kind) {
327+
switch (Kind) {
328+
case OutputKind::MainOutput:
329+
return OutputFile;
330+
case OutputKind::SerializedDiagnostics:
331+
return SerialDiagsFile;
332+
case OutputKind::Dependencies:
333+
return DependenciesFile;
334+
}
335+
}
336+
276337
Optional<int> CompileJobCache::initialize(CompilerInstance &Clang) {
277338
CompilerInvocation &Invocation = Clang.getInvocation();
278339
DiagnosticsEngine &Diags = Clang.getDiagnostics();
340+
FrontendOptions &FrontendOpts = Invocation.getFrontendOpts();
279341

280342
// Extract whether caching is on (and canonicalize setting).
281-
CacheCompileJob = Invocation.getFrontendOpts().CacheCompileJob;
282-
Invocation.getFrontendOpts().CacheCompileJob = false;
343+
CacheCompileJob = FrontendOpts.CacheCompileJob;
344+
FrontendOpts.CacheCompileJob = false;
283345

284346
// Nothing else to do if we're not caching.
285347
if (!CacheCompileJob)
@@ -300,10 +362,11 @@ Optional<int> CompileJobCache::initialize(CompilerInstance &Clang) {
300362
// TODO: Canonicalize DiagnosticOptions here to be "serialized" only. Pass in
301363
// a hook to mirror diagnostics to stderr (when writing there), and handle
302364
// other outputs during replay.
365+
FrontendOpts.IncludeTimestamps = false;
303366

304-
// TODO: Canonicalize OutputFile to "-" here. During replay, move it and
305-
// derived outputs to the right place.
306-
OutputFile = Invocation.getFrontendOpts().OutputFile;
367+
OutputFile = FrontendOpts.OutputFile;
368+
SerialDiagsFile = Invocation.getDiagnosticOpts().DiagnosticSerializationFile;
369+
DependenciesFile = Invocation.getDependencyOutputOpts().OutputFile;
307370
return None;
308371
}
309372

@@ -387,6 +450,11 @@ Optional<int> CompileJobCache::tryReplayCachedResult(CompilerInstance &Clang) {
387450

388451
// Set up the output backend so we can save / cache the result after.
389452
CASOutputs = llvm::makeIntrusiveRefCnt<llvm::cas::CASOutputBackend>(*CAS);
453+
for (OutputKind K : getAllOutputKinds()) {
454+
StringRef OutPath = getPathForOutputKind(K);
455+
if (!OutPath.empty())
456+
CASOutputs->addKindMap(getOutputKindName(K), OutPath);
457+
}
390458

391459
Clang.setOutputBackend(llvm::vfs::makeMirroringOutputBackend(
392460
CASOutputs, std::move(OnDiskOutputs)));
@@ -442,6 +510,14 @@ Optional<int> CompileJobCache::tryReplayCachedResult(CompilerInstance &Clang) {
442510
OutputFile, &DiagOpts, /*MergeChildRecords*/ false, std::move(*OS));
443511
Diags.setClient(new ChainedDiagnosticConsumer(
444512
Diags.takeClient(), std::move(SerializedConsumer)));
513+
} else {
514+
// We always generate the serialized diagnostics so the key is independent
515+
// of the presence of '--serialize-diagnostics'.
516+
auto OS = std::make_unique<llvm::raw_svector_ostream>(SerialDiagsBuf);
517+
auto SerializedConsumer = clang::serialized_diags::create(
518+
StringRef(), &DiagOpts, /*MergeChildRecords*/ false, std::move(OS));
519+
Diags.setClient(new ChainedDiagnosticConsumer(
520+
Diags.takeClient(), std::move(SerializedConsumer)));
445521
}
446522

447523
return None;
@@ -476,6 +552,21 @@ void CompileJobCache::finishComputedResult(CompilerInstance &Clang,
476552
return;
477553

478554
// FIXME: Stop calling report_fatal_error().
555+
if (!SerialDiagsOutput) {
556+
// Not requested to get a serialized diagnostics file but we generated it
557+
// and will store it regardless so that the key is independent of the
558+
// presence of '--serialize-diagnostics'.
559+
Expected<llvm::cas::ObjectProxy> SerialDiags =
560+
CAS->createProxy(None, SerialDiagsBuf);
561+
// FIXME: Stop calling report_fatal_error().
562+
if (!SerialDiags)
563+
llvm::report_fatal_error(SerialDiags.takeError());
564+
if (Error E = CASOutputs->addObject(
565+
getOutputKindName(OutputKind::SerializedDiagnostics),
566+
SerialDiags->getRef()))
567+
llvm::report_fatal_error(std::move(E));
568+
}
569+
479570
Expected<llvm::cas::ObjectProxy> Outputs = CASOutputs->getCASProxy();
480571
if (!Outputs)
481572
llvm::report_fatal_error(Outputs.takeError());
@@ -557,20 +648,28 @@ Optional<int> CompileJobCache::replayCachedResult(CompilerInstance &Clang,
557648
llvm::cas::CASID PathID = Outputs->getReferenceID(I);
558649
llvm::cas::CASID BytesID = Outputs->getReferenceID(I + 1);
559650

560-
Optional<llvm::cas::ObjectProxy> Path;
561-
if (Error E = CAS->getProxy(PathID).moveInto(Path))
651+
Optional<llvm::cas::ObjectProxy> PathProxy;
652+
if (Error E = CAS->getProxy(PathID).moveInto(PathProxy))
562653
llvm::report_fatal_error(std::move(E));
563654

655+
Optional<OutputKind> OutKind = getOutputKindForName(PathProxy->getData());
656+
StringRef Path =
657+
OutKind ? getPathForOutputKind(*OutKind) : PathProxy->getData();
658+
if (Path.empty()) {
659+
// The output may be always generated but not needed with this invocation,
660+
// like the serialized diagnostics file.
661+
continue;
662+
}
663+
564664
Optional<StringRef> Contents;
565665
SmallString<50> ContentsStorage;
566666
Optional<llvm::cas::ObjectProxy> Bytes;
567667
if (Error E = CAS->getProxy(BytesID).moveInto(Bytes))
568668
llvm::report_fatal_error(std::move(E));
569669
Contents = Bytes->getData();
570670
std::unique_ptr<llvm::FileOutputBuffer> Output;
571-
if (Error E =
572-
llvm::FileOutputBuffer::create(Path->getData(), Contents->size())
573-
.moveInto(Output))
671+
if (Error E = llvm::FileOutputBuffer::create(Path, Contents->size())
672+
.moveInto(Output))
574673
llvm::report_fatal_error(std::move(E));
575674
llvm::copy(*Contents, Output->getBufferStart());
576675
if (llvm::Error E = Output->commit())

llvm/include/llvm/CAS/CASOutputBackend.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,14 @@ class CASOutputBackend : public vfs::OutputBackend {
2727
/// Create a top-level tree for all created files. This will contain all files
2828
Expected<ObjectProxy> getCASProxy();
2929

30-
/// Add a CAS object to the path in the output backend.
31-
Error addObject(StringRef Path, ObjectRef Object);
30+
/// Add a CAS object in the output backend associated with the given name,
31+
/// which can be a path or a "kind" string.
32+
Error addObject(StringRef Name, ObjectRef Object);
33+
34+
/// Add an association of a "kind" string with a particular output path.
35+
/// When the output for \p Path is encountered it will be associated with
36+
/// the \p Kind string instead of its path.
37+
void addKindMap(StringRef Kind, StringRef Path);
3238

3339
private:
3440
Expected<std::unique_ptr<vfs::OutputFileImpl>>

0 commit comments

Comments
 (0)