Skip to content

[Caching] Don't rely on FileSystem when replaying diagnostics #73822

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 97 additions & 46 deletions lib/Frontend/CachedDiagnostics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,14 @@
#include "swift/Frontend/FrontendInputsAndOutputs.h"
#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/CAS/ObjectStore.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/PrefixMapper.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/VirtualFileSystem.h"
Expand Down Expand Up @@ -82,6 +85,7 @@ struct SerializedDiagnosticInfo {
struct SerializedFile {
std::string FileName;
SerializedSourceLoc IncludeLoc = SerializedSourceLoc();
std::string ContentCASID;
StringRef Content;
};

Expand All @@ -100,8 +104,8 @@ struct SerializedGeneratedFileInfo {

struct DiagnosticSerializer {
DiagnosticSerializer(llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
llvm::PrefixMapper &Mapper)
: SrcMgr(FS), Mapper(Mapper) {}
llvm::PrefixMapper &Mapper, llvm::cas::ObjectStore &CAS)
: SrcMgr(FS), Mapper(Mapper), CAS(CAS) {}

using ReplayFunc = llvm::function_ref<llvm::Error(const DiagnosticInfo &)>;

Expand All @@ -111,31 +115,40 @@ struct DiagnosticSerializer {
llvm::Error serializeEmittedDiagnostics(llvm::raw_ostream &os);

static llvm::Error
emitDiagnosticsFromCached(llvm::StringRef Buffer, SourceManager &SrcMgr,
DiagnosticEngine &Diags,
llvm::PrefixMapper &Mapper,
emitDiagnosticsFromCached(llvm::StringRef Buffer,
DiagnosticEngine &Diags, llvm::PrefixMapper &Mapper,
llvm::cas::ObjectStore &CAS,
const FrontendInputsAndOutputs &InAndOut) {
// Create a new DiagnosticSerializer since this cannot be shared with a
// serialization instance.
DiagnosticSerializer DS(SrcMgr.getFileSystem(), Mapper);
DS.addInputsToSourceMgr(InAndOut);
// serialization instance. Using an empty in-memory file system as
// underlying file system because the replay logic should not touch file
// system.
auto FS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
DiagnosticSerializer DS(std::move(FS), Mapper, CAS);
return DS.doEmitFromCached(Buffer, Diags);
}

SourceManager &getSourceMgr() { return SrcMgr; }

void addInputsToSourceMgr(const FrontendInputsAndOutputs &InAndOut) {
void addInputsToSourceMgr(SourceManager &SM,
const FrontendInputsAndOutputs &InAndOut) {
// Extract all the input file names so they can be added to the source
// manager when replaying the diagnostics. All input files are needed even
// they don't contain diagnostics because FileSpecificDiagConsumer need
// has references to input files to find subconsumer.
auto addInputToSourceMgr = [&](const InputFile &Input) {
if (Input.getFileName() != "-")
SrcMgr.getExternalSourceBufferID(remapFilePath(Input.getFileName()));
auto Path = remapFilePath(Input.getFileName());
SrcMgr.getExternalSourceBufferID(Path);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Duplicated with next line

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Those are two different SourceMgr. The first one is the one used to replay, and the second one is the actual SourceMgr from the instance.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This deserves a comment or else rename SM so it's clear the difference


// Fetch the source buffer from original SourceManager and create a
// serialized file from it.
auto Idx = SM.getExternalSourceBufferID(Path);
if (Idx != 0)
getFileIDFromBufferID(SM, Idx);

return false;
};
InAndOut.forEachInputProducingSupplementaryOutput(addInputToSourceMgr);
InAndOut.forEachNonPrimaryInput(addInputToSourceMgr);
InAndOut.forEachInput(addInputToSourceMgr);
}

private:
Expand Down Expand Up @@ -169,7 +182,7 @@ struct DiagnosticSerializer {
DiagnosticStorage &, ReplayFunc);

// Deserialize File and return the bufferID in serializing SourceManager.
unsigned deserializeFile(const SerializedFile &File);
llvm::Expected<unsigned> deserializeFile(const SerializedFile &File);
llvm::Error deserializeVirtualFile(const SerializedVirtualFile &VF);
llvm::Error deserializeGeneratedFileInfo(const SerializedGeneratedFileInfo &Info);
std::string remapFilePath(StringRef Path) {
Expand All @@ -190,6 +203,9 @@ struct DiagnosticSerializer {
SourceManager SrcMgr;
llvm::PrefixMapper &Mapper;

// CAS for file system backing.
llvm::cas::ObjectStore &CAS;

// Mapping of the FileID between SourceManager from CompilerInstance vs.
// the serialized FileID in cached diagnostics. Lookup tables are
// per-SourceManager to handle diagnostics from all sub-instances which
Expand Down Expand Up @@ -248,6 +264,7 @@ struct MappingTraits<SerializedFile> {
io.mapRequired("Name", F.FileName);
io.mapOptional("IncludeLoc", F.IncludeLoc, SerializedSourceLoc());
io.mapOptional("Content", F.Content, StringRef());
io.mapOptional("CASID", F.ContentCASID, "");
}
};

Expand Down Expand Up @@ -302,25 +319,29 @@ void DiagnosticSerializer::handleDiagnostic(SourceManager &SM,

unsigned DiagnosticSerializer::getFileIDFromBufferID(SourceManager &SM,
unsigned Idx) {
auto &Buf = SM.getLLVMSourceMgr().getBufferInfo(Idx);
auto Filename = Buf.Buffer->getBufferIdentifier();
bool IsFSBacked = SM.getFileSystem()->exists(Filename);

// See if the file is already constructed.
auto &Allocated = FileMapper[&SM];
auto ID = Allocated.find(Idx);
if (ID != Allocated.end())
return ID->second;

auto &Buf = SM.getLLVMSourceMgr().getBufferInfo(Idx);
auto Filename = Buf.Buffer->getBufferIdentifier();
bool IsFileBacked = SM.getFileSystem()->exists(Filename);

// Construct and add to files. If there is an IncludeLoc, the file from
// IncludeLoc is added before current file.
assert(CurrentFileID == Files.size() && "File index mismatch");
StringRef FileContent = IsFSBacked ? StringRef() : Buf.Buffer->getBuffer();

StringRef FileContent = Buf.Buffer->getBuffer();
SerializedFile File = {Filename.str(),
convertSourceLoc(SM, SourceLoc(Buf.IncludeLoc)),
FileContent};
{},
IsFileBacked ? "" : FileContent};

// Add file to serializing source manager.
FileMapper[&SrcMgr].insert({CurrentFileID, deserializeFile(File)});
unsigned NewIdx = SrcMgr.addMemBufferCopy(Buf.Buffer.get());
FileMapper[&SrcMgr].insert({CurrentFileID, NewIdx});

Files.emplace_back(std::move(File));
Allocated.insert({Idx, ++CurrentFileID});
Expand Down Expand Up @@ -495,21 +516,26 @@ DiagnosticSerializer::deserializeFixIt(const SerializedFixIt &FI) {
return DiagnosticInfo::FixIt(*Range, FI.Text, {});
}

unsigned DiagnosticSerializer::deserializeFile(const SerializedFile &File) {
llvm::Expected<unsigned>
DiagnosticSerializer::deserializeFile(const SerializedFile &File) {
assert(File.IncludeLoc.FileID == 0 && "IncludeLoc not supported yet");
auto FileName = remapFilePath(File.FileName);
if (File.Content.empty() && FileName == File.FileName)
return SrcMgr.getExternalSourceBufferID(FileName);

std::unique_ptr<llvm::MemoryBuffer> Content;
if (!File.Content.empty())
Content = llvm::MemoryBuffer::getMemBufferCopy(File.Content, FileName);
else if (auto InputFileOrErr = swift::vfs::getFileOrSTDIN(
*SrcMgr.getFileSystem(), File.FileName))
Content = llvm::MemoryBuffer::getMemBufferCopy(
(*InputFileOrErr)->getBuffer(), FileName);

return Content ? SrcMgr.addNewSourceBuffer(std::move(Content)) : 0u;

if (!File.ContentCASID.empty()) {
auto ID = CAS.parseID(File.ContentCASID);
if (!ID)
return ID.takeError();

auto Proxy = CAS.getProxy(*ID);
if (!Proxy)
return Proxy.takeError();

auto Content = Proxy->getMemoryBuffer(FileName);
return SrcMgr.addNewSourceBuffer(std::move(Content));
}

auto Content = llvm::MemoryBuffer::getMemBufferCopy(File.Content, FileName);
return SrcMgr.addNewSourceBuffer(std::move(Content));
}

llvm::Error
Expand Down Expand Up @@ -599,6 +625,32 @@ llvm::Error DiagnosticSerializer::deserializeDiagnosticInfo(

llvm::Error
DiagnosticSerializer::serializeEmittedDiagnostics(llvm::raw_ostream &os) {
// Convert all file backed source file into CASIDs.
for (auto &File : Files) {
if (!File.Content.empty() || !File.ContentCASID.empty())
continue;

auto Ref =
SrcMgr.getFileSystem()->getObjectRefForFileContent(File.FileName);
if (!Ref)
return llvm::createFileError(File.FileName, Ref.getError());

if (*Ref) {
File.ContentCASID = CAS.getID(**Ref).toString();
continue;
}

// Probably a file system that is not CAS based. Ingest the buffer.
auto Buf = SrcMgr.getFileSystem()->getBufferForFile(File.FileName);
if (!Buf)
return llvm::createFileError(File.FileName, Buf.getError());

auto BufRef = CAS.storeFromString({}, (*Buf)->getBuffer());
if (!BufRef)
return llvm::createFileError(File.FileName, BufRef.takeError());
File.ContentCASID = CAS.getID(*BufRef).toString();
}

llvm::yaml::Output yout(os);
yout << *this;
return llvm::Error::success();
Expand All @@ -616,8 +668,10 @@ llvm::Error DiagnosticSerializer::doEmitFromCached(llvm::StringRef Buffer,
unsigned ID = 0;
for (auto &File : Files) {
assert(File.IncludeLoc.FileID == 0 && "IncludeLoc not supported yet");
unsigned Idx = deserializeFile(File);
FileMapper[&SrcMgr].insert({ID++, Idx});
auto Idx = deserializeFile(File);
if (!Idx)
return Idx.takeError();
FileMapper[&SrcMgr].insert({ID++, *Idx});
}

for (auto &VF : VFiles) {
Expand Down Expand Up @@ -651,7 +705,7 @@ class CachingDiagnosticsProcessor::Implementation
: InstanceSourceMgr(Instance.getSourceMgr()),
InAndOut(
Instance.getInvocation().getFrontendOptions().InputsAndOutputs),
Diags(Instance.getDiags()) {
Diags(Instance.getDiags()), CAS(*Instance.getSharedCASInstance()) {
SmallVector<llvm::MappedPrefix, 4> Prefixes;
llvm::MappedPrefix::transformJoinedIfValid(
Instance.getInvocation().getFrontendOptions().CacheReplayPrefixMap,
Expand Down Expand Up @@ -681,7 +735,7 @@ class CachingDiagnosticsProcessor::Implementation

llvm::Error replayCachedDiagnostics(llvm::StringRef Buffer) {
return DiagnosticSerializer::emitDiagnosticsFromCached(
Buffer, getDiagnosticSourceMgr(), Diags, Mapper, InAndOut);
Buffer, Diags, Mapper, CAS, InAndOut);
}

void handleDiagnostic(SourceManager &SM,
Expand All @@ -691,7 +745,7 @@ class CachingDiagnosticsProcessor::Implementation
"Caching for a different file system");
Serializer.handleDiagnostic(SM, Info, [&](const DiagnosticInfo &Info) {
for (auto *Diag : OrigConsumers)
Diag->handleDiagnostic(getDiagnosticSourceMgr(), Info);
Diag->handleDiagnostic(Serializer.getSourceMgr(), Info);
return llvm::Error::success();
});
}
Expand All @@ -718,10 +772,6 @@ class CachingDiagnosticsProcessor::Implementation
}

private:
SourceManager &getDiagnosticSourceMgr() {
return getSerializer().getSourceMgr();
}

DiagnosticSerializer &getSerializer() {
// If the DiagnosticSerializer is not setup, create it. It cannot
// be created on the creation of CachingDiagnosticsProcessor because the
Expand All @@ -730,9 +780,9 @@ class CachingDiagnosticsProcessor::Implementation
// compiler instance on the first diagnostics and assert if the underlying
// file system changes on later diagnostics.
if (!Serializer) {
Serializer.reset(
new DiagnosticSerializer(InstanceSourceMgr.getFileSystem(), Mapper));
Serializer->addInputsToSourceMgr(InAndOut);
Serializer.reset(new DiagnosticSerializer(
InstanceSourceMgr.getFileSystem(), Mapper, CAS));
Serializer->addInputsToSourceMgr(InstanceSourceMgr, InAndOut);
}

return *Serializer;
Expand All @@ -751,6 +801,7 @@ class CachingDiagnosticsProcessor::Implementation
const FrontendInputsAndOutputs &InAndOut;
DiagnosticEngine &Diags;
llvm::PrefixMapper Mapper;
llvm::cas::ObjectStore &CAS;

llvm::unique_function<bool(StringRef)> serializedOutputCallback;

Expand Down
51 changes: 51 additions & 0 deletions test/CAS/path_remap.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// RUN: %empty-directory(%t)
// RUN: split-file %s %t

// RUN: %target-swift-frontend -scan-dependencies -module-name Test -module-cache-path %t/clang-module-cache -O \
// RUN: -disable-implicit-string-processing-module-import -disable-implicit-concurrency-module-import -parse-stdlib \
// RUN: %t/main.swift -o %t/deps.json -swift-version 5 -cache-compile-job -cas-path %t/cas -I %t/include \
// RUN: -scanner-prefix-map %swift_src_root=/^src -scanner-prefix-map %t=/^tmp

// RUN: %{python} %S/Inputs/BuildCommandExtractor.py %t/deps.json A > %t/A.cmd
// RUN: %swift_frontend_plain @%t/A.cmd

// RUN: %{python} %S/Inputs/GenerateExplicitModuleMap.py %t/deps.json > %t/map.json
// RUN: llvm-cas --cas %t/cas --make-blob --data %t/map.json > %t/map.casid
// RUN: %{python} %S/Inputs/BuildCommandExtractor.py %t/deps.json Test > %t/MyApp.cmd

// RUN: %target-swift-frontend \
// RUN: -c -o %t/main.o -cache-compile-job -cas-path %t/cas \
// RUN: -swift-version 5 -disable-implicit-swift-modules \
// RUN: -disable-implicit-string-processing-module-import -disable-implicit-concurrency-module-import -parse-stdlib \
// RUN: -module-name Test -explicit-swift-module-map-file @%t/map.casid \
// RUN: -cache-replay-prefix-map /^src=%swift_src_root -cache-replay-prefix-map /^tmp=%t \
// RUN: /^tmp/main.swift @%t/MyApp.cmd

// RUN: %swift-scan-test -action compute_cache_key_from_index -cas-path %t/cas -input 0 -- \
// RUN: %target-swift-frontend \
// RUN: -c -o %t/main.o -cache-compile-job -cas-path %t/cas \
// RUN: -swift-version 5 -disable-implicit-swift-modules \
// RUN: -disable-implicit-string-processing-module-import -disable-implicit-concurrency-module-import -parse-stdlib \
// RUN: -module-name Test -explicit-swift-module-map-file @%t/map.casid \
// RUN: -cache-replay-prefix-map /^src=%swift_src_root -cache-replay-prefix-map /^tmp=%t \
// RUN: /^tmp/main.swift @%t/MyApp.cmd > %t/key.casid

// RUN: %swift-scan-test -action replay_result -cas-path %t/cas -id @%t/key.casid -- \
// RUN: %target-swift-frontend \
// RUN: -c -o %t/main.o -cache-compile-job -cas-path %t/cas \
// RUN: -swift-version 5 -disable-implicit-swift-modules \
// RUN: -disable-implicit-string-processing-module-import -disable-implicit-concurrency-module-import -parse-stdlib \
// RUN: -module-name Test -explicit-swift-module-map-file @%t/map.casid \
// RUN: -cache-replay-prefix-map /^src=%swift_src_root -cache-replay-prefix-map /^tmp=%t \
// RUN: /^tmp/main.swift @%t/MyApp.cmd

//--- main.swift
import A

#warning("This is a warning")

//--- include/A.swiftinterface
// swift-interface-format-version: 1.0
// swift-module-flags: -module-name A -O -disable-implicit-string-processing-module-import -disable-implicit-concurrency-module-import -parse-stdlib -user-module-version 1.0
public func b() { }