Skip to content

Commit c132f74

Browse files
[Caching] Don't rely on FileSystem when replaying diagnostics
Stop relying on file system to provide source buffer for diagnostics when replying. This avoids initializing CASFS which is quite expensive. Now cached diagnostics contains CASID for the file buffer so it can initialize its own source manager without relying on the underlying file system. rdar://128423393
1 parent 7a57bd8 commit c132f74

File tree

2 files changed

+148
-46
lines changed

2 files changed

+148
-46
lines changed

lib/Frontend/CachedDiagnostics.cpp

Lines changed: 97 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,14 @@
2424
#include "swift/Frontend/FrontendInputsAndOutputs.h"
2525
#include "llvm/ADT/IntrusiveRefCntPtr.h"
2626
#include "llvm/ADT/STLExtras.h"
27+
#include "llvm/ADT/StringExtras.h"
2728
#include "llvm/ADT/StringRef.h"
2829
#include "llvm/ADT/TinyPtrVector.h"
30+
#include "llvm/CAS/ObjectStore.h"
2931
#include "llvm/Support/Compression.h"
3032
#include "llvm/Support/Debug.h"
3133
#include "llvm/Support/Error.h"
34+
#include "llvm/Support/MemoryBuffer.h"
3235
#include "llvm/Support/PrefixMapper.h"
3336
#include "llvm/Support/SMLoc.h"
3437
#include "llvm/Support/VirtualFileSystem.h"
@@ -82,6 +85,7 @@ struct SerializedDiagnosticInfo {
8285
struct SerializedFile {
8386
std::string FileName;
8487
SerializedSourceLoc IncludeLoc = SerializedSourceLoc();
88+
std::string ContentCASID;
8589
StringRef Content;
8690
};
8791

@@ -100,8 +104,8 @@ struct SerializedGeneratedFileInfo {
100104

101105
struct DiagnosticSerializer {
102106
DiagnosticSerializer(llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
103-
llvm::PrefixMapper &Mapper)
104-
: SrcMgr(FS), Mapper(Mapper) {}
107+
llvm::PrefixMapper &Mapper, llvm::cas::ObjectStore &CAS)
108+
: SrcMgr(FS), Mapper(Mapper), CAS(CAS) {}
105109

106110
using ReplayFunc = llvm::function_ref<llvm::Error(const DiagnosticInfo &)>;
107111

@@ -111,31 +115,40 @@ struct DiagnosticSerializer {
111115
llvm::Error serializeEmittedDiagnostics(llvm::raw_ostream &os);
112116

113117
static llvm::Error
114-
emitDiagnosticsFromCached(llvm::StringRef Buffer, SourceManager &SrcMgr,
115-
DiagnosticEngine &Diags,
116-
llvm::PrefixMapper &Mapper,
118+
emitDiagnosticsFromCached(llvm::StringRef Buffer,
119+
DiagnosticEngine &Diags, llvm::PrefixMapper &Mapper,
120+
llvm::cas::ObjectStore &CAS,
117121
const FrontendInputsAndOutputs &InAndOut) {
118122
// Create a new DiagnosticSerializer since this cannot be shared with a
119-
// serialization instance.
120-
DiagnosticSerializer DS(SrcMgr.getFileSystem(), Mapper);
121-
DS.addInputsToSourceMgr(InAndOut);
123+
// serialization instance. Using an empty in-memory file system as
124+
// underlying file system because the replay logic should not touch file
125+
// system.
126+
auto FS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
127+
DiagnosticSerializer DS(std::move(FS), Mapper, CAS);
122128
return DS.doEmitFromCached(Buffer, Diags);
123129
}
124130

125131
SourceManager &getSourceMgr() { return SrcMgr; }
126132

127-
void addInputsToSourceMgr(const FrontendInputsAndOutputs &InAndOut) {
133+
void addInputsToSourceMgr(SourceManager &SM,
134+
const FrontendInputsAndOutputs &InAndOut) {
128135
// Extract all the input file names so they can be added to the source
129136
// manager when replaying the diagnostics. All input files are needed even
130137
// they don't contain diagnostics because FileSpecificDiagConsumer need
131138
// has references to input files to find subconsumer.
132139
auto addInputToSourceMgr = [&](const InputFile &Input) {
133-
if (Input.getFileName() != "-")
134-
SrcMgr.getExternalSourceBufferID(remapFilePath(Input.getFileName()));
140+
auto Path = remapFilePath(Input.getFileName());
141+
SrcMgr.getExternalSourceBufferID(Path);
142+
143+
// Fetch the source buffer from original SourceManager and create a
144+
// serialized file from it.
145+
auto Idx = SM.getExternalSourceBufferID(Path);
146+
if (Idx != 0)
147+
getFileIDFromBufferID(SM, Idx);
148+
135149
return false;
136150
};
137-
InAndOut.forEachInputProducingSupplementaryOutput(addInputToSourceMgr);
138-
InAndOut.forEachNonPrimaryInput(addInputToSourceMgr);
151+
InAndOut.forEachInput(addInputToSourceMgr);
139152
}
140153

141154
private:
@@ -169,7 +182,7 @@ struct DiagnosticSerializer {
169182
DiagnosticStorage &, ReplayFunc);
170183

171184
// Deserialize File and return the bufferID in serializing SourceManager.
172-
unsigned deserializeFile(const SerializedFile &File);
185+
llvm::Expected<unsigned> deserializeFile(const SerializedFile &File);
173186
llvm::Error deserializeVirtualFile(const SerializedVirtualFile &VF);
174187
llvm::Error deserializeGeneratedFileInfo(const SerializedGeneratedFileInfo &Info);
175188
std::string remapFilePath(StringRef Path) {
@@ -190,6 +203,9 @@ struct DiagnosticSerializer {
190203
SourceManager SrcMgr;
191204
llvm::PrefixMapper &Mapper;
192205

206+
// CAS for file system backing.
207+
llvm::cas::ObjectStore &CAS;
208+
193209
// Mapping of the FileID between SourceManager from CompilerInstance vs.
194210
// the serialized FileID in cached diagnostics. Lookup tables are
195211
// per-SourceManager to handle diagnostics from all sub-instances which
@@ -248,6 +264,7 @@ struct MappingTraits<SerializedFile> {
248264
io.mapRequired("Name", F.FileName);
249265
io.mapOptional("IncludeLoc", F.IncludeLoc, SerializedSourceLoc());
250266
io.mapOptional("Content", F.Content, StringRef());
267+
io.mapOptional("CASID", F.ContentCASID, "");
251268
}
252269
};
253270

@@ -302,25 +319,29 @@ void DiagnosticSerializer::handleDiagnostic(SourceManager &SM,
302319

303320
unsigned DiagnosticSerializer::getFileIDFromBufferID(SourceManager &SM,
304321
unsigned Idx) {
305-
auto &Buf = SM.getLLVMSourceMgr().getBufferInfo(Idx);
306-
auto Filename = Buf.Buffer->getBufferIdentifier();
307-
bool IsFSBacked = SM.getFileSystem()->exists(Filename);
308-
309322
// See if the file is already constructed.
310323
auto &Allocated = FileMapper[&SM];
311324
auto ID = Allocated.find(Idx);
312325
if (ID != Allocated.end())
313326
return ID->second;
314327

328+
auto &Buf = SM.getLLVMSourceMgr().getBufferInfo(Idx);
329+
auto Filename = Buf.Buffer->getBufferIdentifier();
330+
bool IsFileBacked = SM.getFileSystem()->exists(Filename);
331+
315332
// Construct and add to files. If there is an IncludeLoc, the file from
316333
// IncludeLoc is added before current file.
317334
assert(CurrentFileID == Files.size() && "File index mismatch");
318-
StringRef FileContent = IsFSBacked ? StringRef() : Buf.Buffer->getBuffer();
335+
336+
StringRef FileContent = Buf.Buffer->getBuffer();
319337
SerializedFile File = {Filename.str(),
320338
convertSourceLoc(SM, SourceLoc(Buf.IncludeLoc)),
321-
FileContent};
339+
{},
340+
IsFileBacked ? "" : FileContent};
341+
322342
// Add file to serializing source manager.
323-
FileMapper[&SrcMgr].insert({CurrentFileID, deserializeFile(File)});
343+
unsigned NewIdx = SrcMgr.addMemBufferCopy(Buf.Buffer.get());
344+
FileMapper[&SrcMgr].insert({CurrentFileID, NewIdx});
324345

325346
Files.emplace_back(std::move(File));
326347
Allocated.insert({Idx, ++CurrentFileID});
@@ -495,21 +516,26 @@ DiagnosticSerializer::deserializeFixIt(const SerializedFixIt &FI) {
495516
return DiagnosticInfo::FixIt(*Range, FI.Text, {});
496517
}
497518

498-
unsigned DiagnosticSerializer::deserializeFile(const SerializedFile &File) {
519+
llvm::Expected<unsigned>
520+
DiagnosticSerializer::deserializeFile(const SerializedFile &File) {
499521
assert(File.IncludeLoc.FileID == 0 && "IncludeLoc not supported yet");
500522
auto FileName = remapFilePath(File.FileName);
501-
if (File.Content.empty() && FileName == File.FileName)
502-
return SrcMgr.getExternalSourceBufferID(FileName);
503-
504-
std::unique_ptr<llvm::MemoryBuffer> Content;
505-
if (!File.Content.empty())
506-
Content = llvm::MemoryBuffer::getMemBufferCopy(File.Content, FileName);
507-
else if (auto InputFileOrErr = swift::vfs::getFileOrSTDIN(
508-
*SrcMgr.getFileSystem(), File.FileName))
509-
Content = llvm::MemoryBuffer::getMemBufferCopy(
510-
(*InputFileOrErr)->getBuffer(), FileName);
511-
512-
return Content ? SrcMgr.addNewSourceBuffer(std::move(Content)) : 0u;
523+
524+
if (!File.ContentCASID.empty()) {
525+
auto ID = CAS.parseID(File.ContentCASID);
526+
if (!ID)
527+
return ID.takeError();
528+
529+
auto Proxy = CAS.getProxy(*ID);
530+
if (!Proxy)
531+
return Proxy.takeError();
532+
533+
auto Content = Proxy->getMemoryBuffer(FileName);
534+
return SrcMgr.addNewSourceBuffer(std::move(Content));
535+
}
536+
537+
auto Content = llvm::MemoryBuffer::getMemBufferCopy(File.Content, FileName);
538+
return SrcMgr.addNewSourceBuffer(std::move(Content));
513539
}
514540

515541
llvm::Error
@@ -599,6 +625,32 @@ llvm::Error DiagnosticSerializer::deserializeDiagnosticInfo(
599625

600626
llvm::Error
601627
DiagnosticSerializer::serializeEmittedDiagnostics(llvm::raw_ostream &os) {
628+
// Convert all file backed source file into CASIDs.
629+
for (auto &File : Files) {
630+
if (!File.Content.empty() || !File.ContentCASID.empty())
631+
continue;
632+
633+
auto Ref =
634+
SrcMgr.getFileSystem()->getObjectRefForFileContent(File.FileName);
635+
if (!Ref)
636+
return llvm::createFileError(File.FileName, Ref.getError());
637+
638+
if (*Ref) {
639+
File.ContentCASID = CAS.getID(**Ref).toString();
640+
continue;
641+
}
642+
643+
// Probably a file system that is not CAS based. Ingest the buffer.
644+
auto Buf = SrcMgr.getFileSystem()->getBufferForFile(File.FileName);
645+
if (!Buf)
646+
return llvm::createFileError(File.FileName, Buf.getError());
647+
648+
auto BufRef = CAS.storeFromString({}, (*Buf)->getBuffer());
649+
if (!BufRef)
650+
return llvm::createFileError(File.FileName, BufRef.takeError());
651+
File.ContentCASID = CAS.getID(*BufRef).toString();
652+
}
653+
602654
llvm::yaml::Output yout(os);
603655
yout << *this;
604656
return llvm::Error::success();
@@ -616,8 +668,10 @@ llvm::Error DiagnosticSerializer::doEmitFromCached(llvm::StringRef Buffer,
616668
unsigned ID = 0;
617669
for (auto &File : Files) {
618670
assert(File.IncludeLoc.FileID == 0 && "IncludeLoc not supported yet");
619-
unsigned Idx = deserializeFile(File);
620-
FileMapper[&SrcMgr].insert({ID++, Idx});
671+
auto Idx = deserializeFile(File);
672+
if (!Idx)
673+
return Idx.takeError();
674+
FileMapper[&SrcMgr].insert({ID++, *Idx});
621675
}
622676

623677
for (auto &VF : VFiles) {
@@ -651,7 +705,7 @@ class CachingDiagnosticsProcessor::Implementation
651705
: InstanceSourceMgr(Instance.getSourceMgr()),
652706
InAndOut(
653707
Instance.getInvocation().getFrontendOptions().InputsAndOutputs),
654-
Diags(Instance.getDiags()) {
708+
Diags(Instance.getDiags()), CAS(*Instance.getSharedCASInstance()) {
655709
SmallVector<llvm::MappedPrefix, 4> Prefixes;
656710
llvm::MappedPrefix::transformJoinedIfValid(
657711
Instance.getInvocation().getFrontendOptions().CacheReplayPrefixMap,
@@ -681,7 +735,7 @@ class CachingDiagnosticsProcessor::Implementation
681735

682736
llvm::Error replayCachedDiagnostics(llvm::StringRef Buffer) {
683737
return DiagnosticSerializer::emitDiagnosticsFromCached(
684-
Buffer, getDiagnosticSourceMgr(), Diags, Mapper, InAndOut);
738+
Buffer, Diags, Mapper, CAS, InAndOut);
685739
}
686740

687741
void handleDiagnostic(SourceManager &SM,
@@ -691,7 +745,7 @@ class CachingDiagnosticsProcessor::Implementation
691745
"Caching for a different file system");
692746
Serializer.handleDiagnostic(SM, Info, [&](const DiagnosticInfo &Info) {
693747
for (auto *Diag : OrigConsumers)
694-
Diag->handleDiagnostic(getDiagnosticSourceMgr(), Info);
748+
Diag->handleDiagnostic(Serializer.getSourceMgr(), Info);
695749
return llvm::Error::success();
696750
});
697751
}
@@ -718,10 +772,6 @@ class CachingDiagnosticsProcessor::Implementation
718772
}
719773

720774
private:
721-
SourceManager &getDiagnosticSourceMgr() {
722-
return getSerializer().getSourceMgr();
723-
}
724-
725775
DiagnosticSerializer &getSerializer() {
726776
// If the DiagnosticSerializer is not setup, create it. It cannot
727777
// be created on the creation of CachingDiagnosticsProcessor because the
@@ -730,9 +780,9 @@ class CachingDiagnosticsProcessor::Implementation
730780
// compiler instance on the first diagnostics and assert if the underlying
731781
// file system changes on later diagnostics.
732782
if (!Serializer) {
733-
Serializer.reset(
734-
new DiagnosticSerializer(InstanceSourceMgr.getFileSystem(), Mapper));
735-
Serializer->addInputsToSourceMgr(InAndOut);
783+
Serializer.reset(new DiagnosticSerializer(
784+
InstanceSourceMgr.getFileSystem(), Mapper, CAS));
785+
Serializer->addInputsToSourceMgr(InstanceSourceMgr, InAndOut);
736786
}
737787

738788
return *Serializer;
@@ -751,6 +801,7 @@ class CachingDiagnosticsProcessor::Implementation
751801
const FrontendInputsAndOutputs &InAndOut;
752802
DiagnosticEngine &Diags;
753803
llvm::PrefixMapper Mapper;
804+
llvm::cas::ObjectStore &CAS;
754805

755806
llvm::unique_function<bool(StringRef)> serializedOutputCallback;
756807

test/CAS/path_remap.swift

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
// RUN: %empty-directory(%t)
2+
// RUN: split-file %s %t
3+
4+
// RUN: %target-swift-frontend -scan-dependencies -module-name Test -module-cache-path %t/clang-module-cache -O \
5+
// RUN: -disable-implicit-string-processing-module-import -disable-implicit-concurrency-module-import -parse-stdlib \
6+
// RUN: %t/main.swift -o %t/deps.json -swift-version 5 -cache-compile-job -cas-path %t/cas -I %t/include \
7+
// RUN: -scanner-prefix-map %swift_src_root=/^src -scanner-prefix-map %t=/^tmp
8+
9+
// RUN: %{python} %S/Inputs/BuildCommandExtractor.py %t/deps.json A > %t/A.cmd
10+
// RUN: %swift_frontend_plain @%t/A.cmd
11+
12+
// RUN: %{python} %S/Inputs/GenerateExplicitModuleMap.py %t/deps.json > %t/map.json
13+
// RUN: llvm-cas --cas %t/cas --make-blob --data %t/map.json > %t/map.casid
14+
// RUN: %{python} %S/Inputs/BuildCommandExtractor.py %t/deps.json Test > %t/MyApp.cmd
15+
16+
// RUN: %target-swift-frontend \
17+
// RUN: -c -o %t/main.o -cache-compile-job -cas-path %t/cas \
18+
// RUN: -swift-version 5 -disable-implicit-swift-modules \
19+
// RUN: -disable-implicit-string-processing-module-import -disable-implicit-concurrency-module-import -parse-stdlib \
20+
// RUN: -module-name Test -explicit-swift-module-map-file @%t/map.casid \
21+
// RUN: -cache-replay-prefix-map /^src=%swift_src_root -cache-replay-prefix-map /^tmp=%t \
22+
// RUN: /^tmp/main.swift @%t/MyApp.cmd
23+
24+
// RUN: %swift-scan-test -action compute_cache_key_from_index -cas-path %t/cas -input 0 -- \
25+
// RUN: %target-swift-frontend \
26+
// RUN: -c -o %t/main.o -cache-compile-job -cas-path %t/cas \
27+
// RUN: -swift-version 5 -disable-implicit-swift-modules \
28+
// RUN: -disable-implicit-string-processing-module-import -disable-implicit-concurrency-module-import -parse-stdlib \
29+
// RUN: -module-name Test -explicit-swift-module-map-file @%t/map.casid \
30+
// RUN: -cache-replay-prefix-map /^src=%swift_src_root -cache-replay-prefix-map /^tmp=%t \
31+
// RUN: /^tmp/main.swift @%t/MyApp.cmd > %t/key.casid
32+
33+
// RUN: %swift-scan-test -action replay_result -cas-path %t/cas -id @%t/key.casid -- \
34+
// RUN: %target-swift-frontend \
35+
// RUN: -c -o %t/main.o -cache-compile-job -cas-path %t/cas \
36+
// RUN: -swift-version 5 -disable-implicit-swift-modules \
37+
// RUN: -disable-implicit-string-processing-module-import -disable-implicit-concurrency-module-import -parse-stdlib \
38+
// RUN: -module-name Test -explicit-swift-module-map-file @%t/map.casid \
39+
// RUN: -cache-replay-prefix-map /^src=%swift_src_root -cache-replay-prefix-map /^tmp=%t \
40+
// RUN: /^tmp/main.swift @%t/MyApp.cmd
41+
42+
//--- main.swift
43+
import A
44+
45+
#warning("This is a warning")
46+
47+
//--- include/A.swiftinterface
48+
// swift-interface-format-version: 1.0
49+
// swift-module-flags: -module-name A -O -disable-implicit-string-processing-module-import -disable-implicit-concurrency-module-import -parse-stdlib -user-module-version 1.0
50+
public func b() { }
51+

0 commit comments

Comments
 (0)