Skip to content

Commit 9a93a3b

Browse files
authored
Merge pull request swiftlang#35394 from rintaro/serialization-source-file-list
[Serialization] Serialize source file information to '.swiftsourceinfo' file
2 parents d121d7d + 5331763 commit 9a93a3b

19 files changed

+251
-46
lines changed

include/swift/AST/FileUnit.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,9 @@ class LoadedFile : public FileUnit {
402402
return false;
403403
}
404404

405+
virtual void collectBasicSourceFileInfo(
406+
llvm::function_ref<void(const BasicSourceFileInfo &)> callback) const {}
407+
405408
static bool classof(const FileUnit *file) {
406409
return file->getKind() == FileUnitKind::SerializedAST ||
407410
file->getKind() == FileUnitKind::ClangModule ||

include/swift/AST/Module.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -728,6 +728,10 @@ class ModuleDecl : public DeclContext, public TypeDecl {
728728
return ReverseFullNameIterator(this);
729729
}
730730

731+
/// Calls \p callback for each source file of the module.
732+
void collectBasicSourceFileInfo(
733+
llvm::function_ref<void(const BasicSourceFileInfo &)> callback);
734+
731735
SourceRange getSourceRange() const { return SourceRange(); }
732736

733737
static bool classof(const DeclContext *DC) {

include/swift/AST/RawComment.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,15 @@
1313
#ifndef SWIFT_AST_RAW_COMMENT_H
1414
#define SWIFT_AST_RAW_COMMENT_H
1515

16+
#include "swift/Basic/Fingerprint.h"
1617
#include "swift/Basic/LLVM.h"
1718
#include "swift/Basic/SourceLoc.h"
1819
#include "swift/Basic/SourceManager.h"
1920

2021
namespace swift {
22+
23+
class SourceFile;
24+
2125
struct SingleRawComment {
2226
enum class CommentKind {
2327
OrdinaryLine, ///< Any normal // comments
@@ -92,6 +96,17 @@ struct BasicDeclLocs {
9296
LineColumn EndLoc;
9397
};
9498

99+
struct BasicSourceFileInfo {
100+
StringRef FilePath;
101+
Fingerprint InterfaceHash = Fingerprint::ZERO();
102+
llvm::sys::TimePoint<> LastModified = {};
103+
uint64_t FileSize = 0;
104+
105+
BasicSourceFileInfo() {}
106+
107+
bool populate(SourceFile *SF);
108+
};
109+
95110
} // namespace swift
96111

97112
#endif // LLVM_SWIFT_AST_RAW_COMMENT_H

include/swift/AST/SourceFile.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,9 @@ class SourceFile final : public FileUnit {
531531
out << getInterfaceHash() << '\n';
532532
}
533533

534+
/// Get this file's interface hash including the type members in the file.
535+
Fingerprint getInterfaceHashIncludingTypeMembers() const;
536+
534537
/// If this source file has been told to collect its parsed tokens, retrieve
535538
/// those tokens.
536539
ArrayRef<Token> getAllTokens() const;

include/swift/Serialization/SerializedModuleLoader.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,9 @@ class SerializedASTFile final : public LoadedFile {
446446

447447
StringRef getTargetTriple() const;
448448

449+
virtual void collectBasicSourceFileInfo(
450+
llvm::function_ref<void(const BasicSourceFileInfo &)>) const override;
451+
449452
static bool classof(const FileUnit *file) {
450453
return file->getKind() == FileUnitKind::SerializedAST;
451454
}

lib/AST/Module.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1099,6 +1099,30 @@ Fingerprint SourceFile::getInterfaceHash() const {
10991099
return Fingerprint{std::move(result)};
11001100
}
11011101

1102+
Fingerprint SourceFile::getInterfaceHashIncludingTypeMembers() const {
1103+
/// FIXME: Gross. Hashing multiple "hash" values.
1104+
llvm::MD5 hash;
1105+
hash.update(getInterfaceHash().getRawValue());
1106+
1107+
std::function<void(IterableDeclContext *)> hashTypeBodyFingerprints =
1108+
[&](IterableDeclContext *IDC) {
1109+
if (auto fp = IDC->getBodyFingerprint())
1110+
hash.update(fp->getRawValue());
1111+
for (auto *member : IDC->getParsedMembers())
1112+
if (auto *childIDC = dyn_cast<IterableDeclContext>(member))
1113+
hashTypeBodyFingerprints(childIDC);
1114+
};
1115+
1116+
for (auto *D : getTopLevelDecls()) {
1117+
if (auto IDC = dyn_cast<IterableDeclContext>(D))
1118+
hashTypeBodyFingerprints(IDC);
1119+
}
1120+
1121+
llvm::MD5::MD5Result result;
1122+
hash.final(result);
1123+
return Fingerprint{std::move(result)};
1124+
}
1125+
11021126
syntax::SourceFileSyntax SourceFile::getSyntaxRoot() const {
11031127
assert(shouldBuildSyntaxTree() && "Syntax tree disabled");
11041128
auto &eval = getASTContext().evaluator;
@@ -1514,6 +1538,20 @@ const clang::Module *ModuleDecl::findUnderlyingClangModule() const {
15141538
return nullptr;
15151539
}
15161540

1541+
void ModuleDecl::collectBasicSourceFileInfo(
1542+
llvm::function_ref<void(const BasicSourceFileInfo &)> callback) {
1543+
for (FileUnit *fileUnit : getFiles()) {
1544+
if (SourceFile *SF = dyn_cast<SourceFile>(fileUnit)) {
1545+
BasicSourceFileInfo info;
1546+
if (info.populate(SF))
1547+
continue;
1548+
callback(info);
1549+
} else if (auto *serialized = dyn_cast<LoadedFile>(fileUnit)) {
1550+
serialized->collectBasicSourceFileInfo(callback);
1551+
}
1552+
}
1553+
}
1554+
15171555
//===----------------------------------------------------------------------===//
15181556
// Cross-Import Overlays
15191557
//===----------------------------------------------------------------------===//

lib/AST/RawComment.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "swift/AST/FileUnit.h"
2323
#include "swift/AST/Module.h"
2424
#include "swift/AST/PrettyStackTrace.h"
25+
#include "swift/AST/SourceFile.h"
2526
#include "swift/AST/Types.h"
2627
#include "swift/Basic/PrimitiveParsing.h"
2728
#include "swift/Basic/SourceManager.h"
@@ -238,3 +239,27 @@ CharSourceRange RawComment::getCharSourceRange() {
238239
static_cast<const char *>(Start.getOpaquePointerValue());
239240
return CharSourceRange(Start, Length);
240241
}
242+
243+
bool BasicSourceFileInfo::populate(SourceFile *SF) {
244+
SourceManager &SM = SF->getASTContext().SourceMgr;
245+
246+
auto filename = SF->getFilename();
247+
if (filename.empty())
248+
return true;
249+
auto stat = SM.getFileSystem()->status(filename);
250+
if (!stat)
251+
return true;
252+
253+
FilePath = filename;
254+
LastModified = stat->getLastModificationTime();
255+
FileSize = stat->getSize();
256+
257+
if (SF->hasInterfaceHash()) {
258+
InterfaceHash = SF->getInterfaceHashIncludingTypeMembers();
259+
} else {
260+
// FIXME: Parse the file with EnableInterfaceHash option.
261+
InterfaceHash = Fingerprint::ZERO();
262+
}
263+
264+
return false;
265+
}

lib/IDE/CompletionInstance.cpp

Lines changed: 20 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -277,49 +277,6 @@ static bool areAnyDependentFilesInvalidated(
277277
});
278278
}
279279

280-
/// Get interface hash of \p SF including the type members in the file.
281-
///
282-
/// See if the inteface of the function and types visible from a function body
283-
/// has changed since the last completion. If they haven't changed, completion
284-
/// can reuse the existing AST of the source file. \c SF->getInterfaceHash() is
285-
/// not enough because it doesn't take the interface of the type members into
286-
/// account. For example:
287-
///
288-
/// struct S {
289-
/// func foo() {}
290-
/// }
291-
/// func main(val: S) {
292-
/// val.<HERE>
293-
/// }
294-
///
295-
/// In this case, we need to ensure that the interface of \c S hasn't changed.
296-
/// Note that we don't care about local types (i.e. type declarations inside
297-
/// function bodies, closures, or top level statement bodies) because they are
298-
/// not visible from other functions where the completion is happening.
299-
static Fingerprint getInterfaceHashIncludingTypeMembers(const SourceFile *SF) {
300-
/// FIXME: Gross. Hashing multiple "hash" values.
301-
llvm::MD5 hash;
302-
hash.update(SF->getInterfaceHash().getRawValue());
303-
304-
std::function<void(IterableDeclContext *)> hashTypeBodyFingerprints =
305-
[&](IterableDeclContext *IDC) {
306-
if (auto fp = IDC->getBodyFingerprint())
307-
hash.update(fp->getRawValue());
308-
for (auto *member : IDC->getParsedMembers())
309-
if (auto *childIDC = dyn_cast<IterableDeclContext>(member))
310-
hashTypeBodyFingerprints(childIDC);
311-
};
312-
313-
for (auto *D : SF->getTopLevelDecls()) {
314-
if (auto IDC = dyn_cast<IterableDeclContext>(D))
315-
hashTypeBodyFingerprints(IDC);
316-
}
317-
318-
llvm::MD5::MD5Result result;
319-
hash.final(result);
320-
return Fingerprint{std::move(result)};
321-
}
322-
323280
} // namespace
324281

325282
bool CompletionInstance::performCachedOperationIfPossible(
@@ -396,8 +353,26 @@ bool CompletionInstance::performCachedOperationIfPossible(
396353
switch (newInfo.Kind) {
397354
case CodeCompletionDelayedDeclKind::FunctionBody: {
398355
// If the interface has changed, AST must be refreshed.
399-
const auto oldInterfaceHash = getInterfaceHashIncludingTypeMembers(oldSF);
400-
const auto newInterfaceHash = getInterfaceHashIncludingTypeMembers(tmpSF);
356+
// See if the inteface of the function and types visible from a function
357+
// body has changed since the last completion. If they haven't changed,
358+
// completion can reuse the existing AST of the source file.
359+
// \c getInterfaceHash() is not enough because it doesn't take the interface
360+
// of the type members into account. For example:
361+
//
362+
// struct S {
363+
// func foo() {}
364+
// }
365+
// func main(val: S) {
366+
// val.<HERE>
367+
// }
368+
//
369+
// In this case, we need to ensure that the interface of \c S hasn't
370+
// changed. Note that we don't care about local types (i.e. type
371+
// declarations inside function bodies, closures, or top level statement
372+
// bodies) because they are not visible from other functions where the
373+
// completion is happening.
374+
const auto oldInterfaceHash = oldSF->getInterfaceHashIncludingTypeMembers();
375+
const auto newInterfaceHash = tmpSF->getInterfaceHashIncludingTypeMembers();
401376
if (oldInterfaceHash != newInterfaceHash)
402377
return false;
403378

lib/Serialization/ModuleFile.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -959,6 +959,41 @@ Optional<CommentInfo> ModuleFile::getCommentForDecl(const Decl *D) const {
959959
return getCommentForDeclByUSR(USRBuffer.str());
960960
}
961961

962+
void ModuleFile::collectBasicSourceFileInfo(
963+
llvm::function_ref<void(const BasicSourceFileInfo &)> callback) const {
964+
if (Core->SourceFileListData.empty())
965+
return;
966+
assert(!Core->SourceLocsTextData.empty());
967+
968+
auto *Cursor = Core->SourceFileListData.bytes_begin();
969+
auto *End = Core->SourceFileListData.bytes_end();
970+
while (Cursor < End) {
971+
// FilePath (byte offset in 'SourceLocsTextData').
972+
auto fileID = endian::readNext<uint32_t, little, unaligned>(Cursor);
973+
// InterfaceHash (fixed length string).
974+
auto fpStr = StringRef{reinterpret_cast<const char *>(Cursor),
975+
Fingerprint::DIGEST_LENGTH};
976+
Cursor += Fingerprint::DIGEST_LENGTH;
977+
// LastModified (nanoseconds since epoch).
978+
auto timestamp = endian::readNext<uint64_t, little, unaligned>(Cursor);
979+
// FileSize (num of bytes).
980+
auto fileSize = endian::readNext<uint64_t, little, unaligned>(Cursor);
981+
982+
assert(fileID < Core->SourceLocsTextData.size());
983+
auto filePath = Core->SourceLocsTextData.substr(fileID);
984+
size_t terminatorOffset = filePath.find('\0');
985+
filePath = filePath.slice(0, terminatorOffset);
986+
987+
BasicSourceFileInfo info;
988+
info.FilePath = filePath;
989+
info.InterfaceHash = Fingerprint::fromString(fpStr);
990+
info.LastModified =
991+
llvm::sys::TimePoint<>(std::chrono::nanoseconds(timestamp));
992+
info.FileSize = fileSize;
993+
callback(info);
994+
}
995+
}
996+
962997
Optional<BasicDeclLocs>
963998
ModuleFile::getBasicDeclLocsForDecl(const Decl *D) const {
964999
assert(D);

lib/Serialization/ModuleFile.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -694,6 +694,8 @@ class ModuleFile
694694
Optional<BasicDeclLocs> getBasicDeclLocsForDecl(const Decl *D) const;
695695
Identifier getDiscriminatorForPrivateValue(const ValueDecl *D);
696696
Optional<Fingerprint> loadFingerprint(const IterableDeclContext *IDC) const;
697+
void collectBasicSourceFileInfo(
698+
llvm::function_ref<void(const BasicSourceFileInfo &)> callback) const;
697699

698700

699701
// MARK: Deserialization interface

lib/Serialization/ModuleFileSharedCore.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -974,6 +974,9 @@ bool ModuleFileSharedCore::readDeclLocsBlock(llvm::BitstreamCursor &cursor) {
974974
return false;
975975
}
976976
switch (*kind) {
977+
case decl_locs_block::SOURCE_FILE_LIST:
978+
SourceFileListData = blobData;
979+
break;
977980
case decl_locs_block::BASIC_DECL_LOCS:
978981
BasicDeclLocsData = blobData;
979982
break;

lib/Serialization/ModuleFileSharedCore.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,9 @@ class ModuleFileSharedCore {
289289
/// A blob of 0 terminated string segments referenced in \c SourceLocsTextData
290290
StringRef SourceLocsTextData;
291291

292+
/// A blob of source file list.
293+
StringRef SourceFileListData;
294+
292295
/// An array of fixed size source location data for each USR appearing in
293296
/// \c DeclUSRsTable.
294297
StringRef BasicDeclLocsData;

lib/Serialization/SerializeDoc.cpp

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -783,6 +783,66 @@ static void emitBasicLocsRecord(llvm::BitstreamWriter &Out,
783783
DeclLocsList.emit(scratch, Writer.Buffer);
784784
}
785785

786+
static void emitFileListRecord(llvm::BitstreamWriter &Out,
787+
ModuleOrSourceFile MSF, StringWriter &FWriter) {
788+
assert(MSF);
789+
790+
struct SourceFileListWriter {
791+
StringWriter &FWriter;
792+
793+
llvm::SmallString<0> Buffer;
794+
llvm::StringSet<> seenFilenames;
795+
796+
void emitSourceFileInfo(const BasicSourceFileInfo &info) {
797+
// Make 'FilePath' absolute for serialization;
798+
SmallString<128> absolutePath = info.FilePath;
799+
llvm::sys::fs::make_absolute(absolutePath);
800+
801+
// Don't emit duplicated files.
802+
if (!seenFilenames.insert(info.FilePath).second)
803+
return;
804+
805+
auto fileID = FWriter.getTextOffset(absolutePath);
806+
auto fingerprintStr = info.InterfaceHash.getRawValue();
807+
auto timestamp = std::chrono::duration_cast<std::chrono::nanoseconds>(
808+
info.LastModified.time_since_epoch())
809+
.count();
810+
811+
llvm::raw_svector_ostream out(Buffer);
812+
endian::Writer writer(out, little);
813+
// FilePath.
814+
writer.write<uint32_t>(fileID);
815+
// InterfaceHash (fixed length string).
816+
assert(fingerprintStr.size() == Fingerprint::DIGEST_LENGTH);
817+
out << fingerprintStr;
818+
// LastModified (nanoseconds since epoch).
819+
writer.write<uint64_t>(timestamp);
820+
// FileSize (num of bytes).
821+
writer.write<uint64_t>(info.FileSize);
822+
}
823+
824+
SourceFileListWriter(StringWriter &FWriter) : FWriter(FWriter) {
825+
Buffer.reserve(1024);
826+
}
827+
} writer(FWriter);
828+
829+
if (SourceFile *SF = MSF.dyn_cast<SourceFile *>()) {
830+
BasicSourceFileInfo info;
831+
if (info.populate(SF))
832+
return;
833+
writer.emitSourceFileInfo(info);
834+
} else {
835+
auto *M = MSF.get<ModuleDecl *>();
836+
M->collectBasicSourceFileInfo([&](const BasicSourceFileInfo &info) {
837+
writer.emitSourceFileInfo(info);
838+
});
839+
}
840+
841+
const decl_locs_block::SourceFileListLayout FileList(Out);
842+
SmallVector<uint64_t, 8> scratch;
843+
FileList.emit(scratch, writer.Buffer);
844+
}
845+
786846
class SourceInfoSerializer : public SerializerBase {
787847
public:
788848
using SerializerBase::SerializerBase;
@@ -807,6 +867,7 @@ class SourceInfoSerializer : public SerializerBase {
807867
BLOCK_RECORD(control_block, TARGET);
808868

809869
BLOCK(DECL_LOCS_BLOCK);
870+
BLOCK_RECORD(decl_locs_block, SOURCE_FILE_LIST);
810871
BLOCK_RECORD(decl_locs_block, BASIC_DECL_LOCS);
811872
BLOCK_RECORD(decl_locs_block, DECL_USRS);
812873
BLOCK_RECORD(decl_locs_block, TEXT_DATA);
@@ -849,6 +910,7 @@ void serialization::writeSourceInfoToStream(raw_ostream &os,
849910
DeclUSRsTableWriter USRWriter;
850911
StringWriter FPWriter;
851912
DocRangeWriter DocWriter;
913+
emitFileListRecord(S.Out, DC, FPWriter);
852914
emitBasicLocsRecord(S.Out, DC, USRWriter, FPWriter, DocWriter);
853915
// Emit USR table mapping from a USR to USR Id.
854916
// The basic locs record uses USR Id instead of actual USR, so that we

0 commit comments

Comments
 (0)