Skip to content

Commit a5d11e2

Browse files
committed
[Dependency Scanning] Add functionality to validate contents of a loaded scanner cache state
Checking each module dependency info if it is up-to-date with respect to when the cache contents were serialized in a prior scan. - Add a timestamp field to the serialization format for the dependency scanner cache - Add a flag "-validate-prior-dependency-scan-cache" which, when combined with "-load-dependency-scan-cache" will have the scanner prune dependencies from the deserialized cache which have inputs that are newer than the prior scan itself With the above in-place, the scan otherwise proceeds as-is, getting cache hits for entries still valid since the prior scan.
1 parent eb1ea4e commit a5d11e2

File tree

13 files changed

+382
-48
lines changed

13 files changed

+382
-48
lines changed

include/swift/AST/DiagnosticsCommon.def

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -212,17 +212,25 @@ ERROR(scanner_arguments_invalid, none,
212212
ERROR(error_scanner_extra, none,
213213
"failed inside dependency scanner: '%0'", (StringRef))
214214

215-
WARNING(warn_scanner_deserialize_failed, none,
216-
"Failed to load module scanning dependency cache from: '%0', re-building scanner cache from scratch.", (StringRef))
215+
REMARK(warn_scanner_deserialize_failed, none,
216+
"Incremental module scan: Failed to load module scanning dependency cache from: '%0', re-building scanner cache from scratch.", (StringRef))
217217

218218
REMARK(remark_reuse_cache, none,
219-
"Re-using serialized module scanning dependency cache from: '%0'.", (StringRef))
219+
"Incremental module scan: Re-using serialized module scanning dependency cache from: '%0'.", (StringRef))
220220

221221
REMARK(remark_scanner_uncached_lookups, none,
222222
"Module Dependency Scanner queries executed: '%0'.", (unsigned))
223223

224224
REMARK(remark_save_cache, none,
225-
"Serializing module scanning dependency cache to: '%0'.", (StringRef))
225+
"Incremental module scan: Serializing module scanning dependency cache to: '%0'.", (StringRef))
226+
227+
REMARK(remark_scanner_stale_result_invalidate, none,
228+
"Incremental module scan: Dependency info for module '%0' invalidated due to a modified input"
229+
" since last scan: '%1'.", (StringRef, StringRef))
230+
231+
REMARK(remark_scanner_invalidate_upstream, none,
232+
"Incremental module scan: Dependency info for module '%0' invalidated due to an out-of-date"
233+
" dependency.", (StringRef))
226234

227235
//------------------------------------------------------------------------------
228236
// MARK: custom attribute diagnostics

include/swift/AST/ModuleDependencies.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1252,6 +1252,9 @@ class ModuleDependenciesCache {
12521252
/// Update stored dependencies for the given module.
12531253
void updateDependency(ModuleDependencyID moduleID,
12541254
ModuleDependencyInfo dependencyInfo);
1255+
1256+
/// Remove a given dependency info from the cache.
1257+
void removeDependency(ModuleDependencyID moduleID);
12551258

12561259
/// Resolve this module's set of directly-imported Swift module
12571260
/// dependencies

include/swift/Basic/Statistics.def

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,9 @@ FRONTEND_STATISTIC(AST, ModuleVisibilityCacheMiss)
165165
FRONTEND_STATISTIC(AST, ModuleShadowCacheHit)
166166
FRONTEND_STATISTIC(AST, ModuleShadowCacheMiss)
167167

168+
/// Number of types deserialized.
169+
FRONTEND_STATISTIC(AST, NumDepScanFilesystemLookups)
170+
168171
/// Number of full function bodies parsed.
169172
FRONTEND_STATISTIC(Parse, NumFunctionsParsed)
170173

include/swift/DependencyScan/ScanDependencies.h

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ namespace swift {
2727
class CompilerInvocation;
2828
class CompilerInstance;
2929
class ModuleDependenciesCache;
30+
struct ModuleDependencyID;
31+
struct ModuleDependencyIDHash;
32+
using ModuleDependencyIDSet =
33+
std::unordered_set<ModuleDependencyID, ModuleDependencyIDHash>;
3034
class SwiftDependencyScanningService;
3135

3236
namespace dependencies {
@@ -77,6 +81,36 @@ performBatchModuleScan(CompilerInstance &invocationInstance,
7781
CompilerArgInstanceCacheMap *versionedPCMInstanceCache,
7882
llvm::StringSaver &saver,
7983
const std::vector<BatchScanInput> &BatchInput);
84+
85+
namespace incremental {
86+
/// For the given module dependency graph captured in the 'cache',
87+
/// validate whether each dependency node is up-to-date w.r.t. serialization
88+
/// time-stamp. i.e. if any of the input files of a module dependency are newer
89+
/// than the serialized dependency graph, it is considered invalidated and must
90+
/// be re-scanned.
91+
void validateInterModuleDependenciesCache(
92+
const ModuleDependencyID &rootModuleID, ModuleDependenciesCache &cache,
93+
const llvm::sys::TimePoint<> &cacheTimeStamp, llvm::vfs::FileSystem &fs,
94+
DiagnosticEngine &diags, bool emitRemarks = false);
95+
96+
/// Perform a postorder DFS to locate modules whose build recipe is out-of-date
97+
/// with respect to their inputs. Upon encountering such a module, add it to the
98+
/// set of invalidated modules, along with the path from the root to this
99+
/// module.
100+
void outOfDateModuleScan(const ModuleDependencyID &sourceModuleID,
101+
const ModuleDependenciesCache &cache,
102+
const llvm::sys::TimePoint<> &cacheTimeStamp,
103+
llvm::vfs::FileSystem &fs, DiagnosticEngine &diags,
104+
bool emitRemarks, ModuleDependencyIDSet &visited,
105+
ModuleDependencyIDSet &modulesRequiringRescan);
106+
107+
/// Validate whether all inputs of a given module dependency
108+
/// are older than the cache serialization time.
109+
bool verifyModuleDependencyUpToDate(
110+
const ModuleDependencyID &moduleID, const ModuleDependenciesCache &cache,
111+
const llvm::sys::TimePoint<> &cacheTimeStamp, llvm::vfs::FileSystem &fs,
112+
DiagnosticEngine &diags, bool emitRemarks);
113+
} // end namespace incremental
80114
} // end namespace dependencies
81115
} // end namespace swift
82116

include/swift/DependencyScan/SerializedModuleDependencyCacheFormat.h

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ namespace graph_block {
9595
enum {
9696
METADATA = 1,
9797
MODULE_NODE,
98+
TIME_NODE,
9899
LINK_LIBRARY_NODE,
99100
LINK_LIBRARY_ARRAY_NODE,
100101
MACRO_DEPENDENCY_NODE,
@@ -113,13 +114,19 @@ enum {
113114

114115
// Always the first record in the file.
115116
using MetadataLayout = BCRecordLayout<
116-
METADATA, // ID
117-
BCFixed<16>, // Inter-Module Dependency graph format major version
118-
BCFixed<16>, // Inter-Module Dependency graph format minor version
119-
BCBlob // Scanner Invocation Context Hash
117+
METADATA, // ID
118+
BCFixed<16>, // Inter-Module Dependency graph format major version
119+
BCFixed<16>, // Inter-Module Dependency graph format minor version
120+
BCBlob // Scanner Invocation Context Hash
120121
>;
121122

122-
// After the metadata record, we have zero or more identifier records,
123+
// After the metadata record, emit serialization time-stamp.
124+
using TimeLayout = BCRecordLayout<
125+
TIME_NODE, // ID
126+
BCBlob // Nanoseconds since epoch as a string
127+
>;
128+
129+
// After the time stamp record, we have zero or more identifier records,
123130
// for each unique string that is referenced in the graph.
124131
//
125132
// Identifiers are referenced by their sequence number, starting from 1.
@@ -138,29 +145,31 @@ using IdentifierNodeLayout = BCRecordLayout<IDENTIFIER_NODE, BCBlob>;
138145
using IdentifierArrayLayout =
139146
BCRecordLayout<IDENTIFIER_ARRAY_NODE, IdentifierIDArryField>;
140147

141-
// ACTODO: Comment
148+
// A record for a given link library node containing information
149+
// required for the build system client to capture a requirement
150+
// to link a given dependency library.
142151
using LinkLibraryLayout =
143152
BCRecordLayout<LINK_LIBRARY_NODE, // ID
144153
IdentifierIDField, // libraryName
145154
IsFrameworkField, // isFramework
146155
IsForceLoadField // forceLoad
147156
>;
148-
// ACTODO: Comment
149157
using LinkLibraryArrayLayout =
150158
BCRecordLayout<LINK_LIBRARY_ARRAY_NODE, IdentifierIDArryField>;
151159

152-
// ACTODO: Comment
160+
// A record for a Macro module dependency of a given dependency
161+
// node.
153162
using MacroDependencyLayout =
154163
BCRecordLayout<MACRO_DEPENDENCY_NODE, // ID
155164
IdentifierIDField, // macroModuleName
156165
IdentifierIDField, // libraryPath
157166
IdentifierIDField // executablePath
158167
>;
159-
// ACTODO: Comment
160168
using MacroDependencyArrayLayout =
161169
BCRecordLayout<MACRO_DEPENDENCY_ARRAY_NODE, IdentifierIDArryField>;
162170

163-
// ACTODO: Comment
171+
// A record capturing information about a given 'import' statement
172+
// captured in a dependency node, including its source location.
164173
using ImportStatementLayout =
165174
BCRecordLayout<IMPORT_STATEMENT_NODE, // ID
166175
IdentifierIDField, // importIdentifier
@@ -169,7 +178,6 @@ using ImportStatementLayout =
169178
ColumnNumberField, // columnNumber
170179
IsOptionalImport // isOptional
171180
>;
172-
// ACTODO: Comment
173181
using ImportStatementArrayLayout =
174182
BCRecordLayout<IMPORT_STATEMENT_ARRAY_NODE, IdentifierIDArryField>;
175183
using OptionalImportStatementArrayLayout =
@@ -271,12 +279,14 @@ using ClangModuleDetailsLayout =
271279
/// Tries to read the dependency graph from the given buffer.
272280
/// Returns \c true if there was an error.
273281
bool readInterModuleDependenciesCache(llvm::MemoryBuffer &buffer,
274-
ModuleDependenciesCache &cache);
282+
ModuleDependenciesCache &cache,
283+
llvm::sys::TimePoint<> &serializedCacheTimeStamp);
275284

276285
/// Tries to read the dependency graph from the given path name.
277286
/// Returns true if there was an error.
278287
bool readInterModuleDependenciesCache(llvm::StringRef path,
279-
ModuleDependenciesCache &cache);
288+
ModuleDependenciesCache &cache,
289+
llvm::sys::TimePoint<> &serializedCacheTimeStamp);
280290

281291
/// Tries to write the dependency graph to the given path name.
282292
/// Returns true if there was an error.

include/swift/Frontend/FrontendOptions.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,11 @@ class FrontendOptions {
363363
/// Load and re-use a prior serialized dependency scanner cache.
364364
bool ReuseDependencyScannerCache = false;
365365

366+
/// Upon loading a prior serialized dependency scanner cache, filter out
367+
/// dependency module information which is no longer up-to-date with respect
368+
/// to input files of every given module.
369+
bool ValidatePriorDependencyScannerCache = false;
370+
366371
/// The path at which to either serialize or deserialize the dependency scanner cache.
367372
std::string SerializedDependencyScannerCachePath;
368373

include/swift/Option/FrontendOptions.td

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,10 @@ def serialize_dependency_scan_cache : Flag<["-"], "serialize-dependency-scan-cac
270270
HelpText<"After performing a dependency scan, serialize the scanner's internal state.">;
271271

272272
def reuse_dependency_scan_cache : Flag<["-"], "load-dependency-scan-cache">,
273-
HelpText<"After performing a dependency scan, serialize the scanner's internal state.">;
273+
HelpText<"For performing a dependency scan, deserialize the scanner's internal state from a prior scan.">;
274+
275+
def validate_prior_dependency_scan_cache : Flag<["-"], "validate-prior-dependency-scan-cache">,
276+
HelpText<"For performing a dependency scan with a prior scanner state, validate module dependencies.">;
274277

275278
def dependency_scan_cache_path : Separate<["-"], "dependency-scan-cache-path">,
276279
HelpText<"The path to output the dependency scanner's internal state.">;

lib/AST/ModuleDependencies.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -807,6 +807,7 @@ ModuleDependenciesCache::findSwiftDependency(StringRef moduleName) const {
807807

808808
const ModuleDependencyInfo &ModuleDependenciesCache::findKnownDependency(
809809
const ModuleDependencyID &moduleID) const {
810+
810811
auto dep = findDependency(moduleID);
811812
assert(dep && "dependency unknown");
812813
return **dep;
@@ -860,6 +861,11 @@ void ModuleDependenciesCache::updateDependency(
860861
map.insert_or_assign(moduleID.ModuleName, std::move(dependencyInfo));
861862
}
862863

864+
void ModuleDependenciesCache::removeDependency(ModuleDependencyID moduleID) {
865+
auto &map = getDependenciesMap(moduleID.Kind);
866+
map.erase(moduleID.ModuleName);
867+
}
868+
863869
void
864870
ModuleDependenciesCache::setImportedSwiftDependencies(ModuleDependencyID moduleID,
865871
const ArrayRef<ModuleDependencyID> dependencyIDs) {

lib/DependencyScan/ModuleDependencyCacheSerialization.cpp

Lines changed: 60 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ class ModuleDependenciesCacheDeserializer {
5858
bool readSignature();
5959
bool enterGraphBlock();
6060
bool readMetadata(StringRef scannerContextHash);
61+
bool readSerializationTime(llvm::sys::TimePoint<> &SerializationTimeStamp);
6162
bool readGraph(ModuleDependenciesCache &cache);
6263

6364
std::optional<std::string> getIdentifier(unsigned n);
@@ -76,7 +77,8 @@ class ModuleDependenciesCacheDeserializer {
7677
public:
7778
ModuleDependenciesCacheDeserializer(llvm::MemoryBufferRef Data)
7879
: Cursor(Data) {}
79-
bool readInterModuleDependenciesCache(ModuleDependenciesCache &cache);
80+
bool readInterModuleDependenciesCache(ModuleDependenciesCache &cache,
81+
llvm::sys::TimePoint<> &serializedCacheTimeStamp);
8082
};
8183

8284
} // namespace swift
@@ -177,6 +179,35 @@ bool ModuleDependenciesCacheDeserializer::readMetadata(StringRef scannerContextH
177179
return false;
178180
}
179181

182+
bool ModuleDependenciesCacheDeserializer::readSerializationTime(llvm::sys::TimePoint<> &SerializationTimeStamp) {
183+
using namespace graph_block;
184+
185+
auto entry = Cursor.advance();
186+
if (!entry) {
187+
consumeError(entry.takeError());
188+
return true;
189+
}
190+
191+
if (entry->Kind != llvm::BitstreamEntry::Record)
192+
return true;
193+
194+
auto recordID = Cursor.readRecord(entry->ID, Scratch, &BlobData);
195+
if (!recordID) {
196+
consumeError(recordID.takeError());
197+
return true;
198+
}
199+
200+
if (*recordID != TIME_NODE)
201+
return true;
202+
203+
TimeLayout::readRecord(Scratch);
204+
std::string serializedTimeStamp = BlobData.str();
205+
206+
SerializationTimeStamp =
207+
llvm::sys::TimePoint<>(llvm::sys::TimePoint<>::duration(std::stoll(serializedTimeStamp)));
208+
return SerializationTimeStamp == llvm::sys::TimePoint<>();
209+
}
210+
180211
/// Read in the top-level block's graph structure by first reading in
181212
/// all of the file's identifiers and arrays of identifiers, followed by
182213
/// consuming individual module info records and registering them into the
@@ -808,7 +839,8 @@ bool ModuleDependenciesCacheDeserializer::readGraph(
808839
}
809840

810841
bool ModuleDependenciesCacheDeserializer::readInterModuleDependenciesCache(
811-
ModuleDependenciesCache &cache) {
842+
ModuleDependenciesCache &cache,
843+
llvm::sys::TimePoint<> &serializedCacheTimeStamp) {
812844
using namespace graph_block;
813845

814846
if (readSignature())
@@ -819,6 +851,9 @@ bool ModuleDependenciesCacheDeserializer::readInterModuleDependenciesCache(
819851

820852
if (readMetadata(cache.scannerContextHash))
821853
return true;
854+
855+
if (readSerializationTime(serializedCacheTimeStamp))
856+
return true;
822857

823858
if (readGraph(cache))
824859
return true;
@@ -992,21 +1027,23 @@ ModuleDependenciesCacheDeserializer::getModuleDependencyIDArray(unsigned n) {
9921027

9931028
bool swift::dependencies::module_dependency_cache_serialization::
9941029
readInterModuleDependenciesCache(llvm::MemoryBuffer &buffer,
995-
ModuleDependenciesCache &cache) {
1030+
ModuleDependenciesCache &cache,
1031+
llvm::sys::TimePoint<> &serializedCacheTimeStamp) {
9961032
ModuleDependenciesCacheDeserializer deserializer(buffer.getMemBufferRef());
997-
return deserializer.readInterModuleDependenciesCache(cache);
1033+
return deserializer.readInterModuleDependenciesCache(cache, serializedCacheTimeStamp);
9981034
}
9991035

10001036
bool swift::dependencies::module_dependency_cache_serialization::
10011037
readInterModuleDependenciesCache(StringRef path,
1002-
ModuleDependenciesCache &cache) {
1038+
ModuleDependenciesCache &cache,
1039+
llvm::sys::TimePoint<> &serializedCacheTimeStamp) {
10031040
PrettyStackTraceStringAction stackTrace(
10041041
"loading inter-module dependency graph", path);
10051042
auto buffer = llvm::MemoryBuffer::getFile(path);
10061043
if (!buffer)
10071044
return true;
10081045

1009-
return readInterModuleDependenciesCache(*buffer.get(), cache);
1046+
return readInterModuleDependenciesCache(*buffer.get(), cache, serializedCacheTimeStamp);
10101047
}
10111048

10121049
// MARK: Serialization
@@ -1126,6 +1163,7 @@ class ModuleDependenciesCacheSerializer {
11261163
void writeBlockInfoBlock();
11271164

11281165
void writeMetadata(StringRef scanningContextHash);
1166+
void writeSerializationTime();
11291167
void writeIdentifiers();
11301168
void writeArraysOfIdentifiers();
11311169

@@ -1189,6 +1227,7 @@ void ModuleDependenciesCacheSerializer::writeBlockInfoBlock() {
11891227

11901228
BLOCK(GRAPH_BLOCK);
11911229
BLOCK_RECORD(graph_block, METADATA);
1230+
BLOCK_RECORD(graph_block, TIME_NODE);
11921231
BLOCK_RECORD(graph_block, IDENTIFIER_NODE);
11931232
BLOCK_RECORD(graph_block, IDENTIFIER_ARRAY_NODE);
11941233

@@ -1221,6 +1260,16 @@ void ModuleDependenciesCacheSerializer::writeMetadata(StringRef scanningContextH
12211260
scanningContextHash);
12221261
}
12231262

1263+
void ModuleDependenciesCacheSerializer::writeSerializationTime() {
1264+
using namespace graph_block;
1265+
llvm::sys::TimePoint<> now = std::chrono::system_clock::now();
1266+
auto timeSinceEpoch = now.time_since_epoch().count();
1267+
std::string serializationData = std::to_string(timeSinceEpoch);
1268+
TimeLayout::emitRecord(Out, ScratchRecord,
1269+
AbbrCodes[TimeLayout::Code],
1270+
serializationData);
1271+
}
1272+
12241273
void ModuleDependenciesCacheSerializer::writeIdentifiers() {
12251274
using namespace graph_block;
12261275
for (auto str : Identifiers) {
@@ -1795,6 +1844,7 @@ void ModuleDependenciesCacheSerializer::collectStringsAndArrays(
17951844
addIdentifier(swiftBinDeps->sourceInfoPath);
17961845
addIdentifier(swiftBinDeps->moduleCacheKey);
17971846
addIdentifier(swiftBinDeps->headerImport);
1847+
addIdentifier(swiftBinDeps->definingModuleInterfacePath);
17981848
addIdentifier(swiftBinDeps->userModuleVersion);
17991849
addStringArray(moduleID,
18001850
ModuleIdentifierArrayKind::HeaderInputModuleDependencies,
@@ -1877,6 +1927,7 @@ void ModuleDependenciesCacheSerializer::writeInterModuleDependenciesCache(
18771927
using namespace graph_block;
18781928

18791929
registerRecordAbbr<MetadataLayout>();
1930+
registerRecordAbbr<TimeLayout>();
18801931
registerRecordAbbr<IdentifierNodeLayout>();
18811932
registerRecordAbbr<IdentifierArrayLayout>();
18821933
registerRecordAbbr<LinkLibraryLayout>();
@@ -1899,6 +1950,9 @@ void ModuleDependenciesCacheSerializer::writeInterModuleDependenciesCache(
18991950
// Write the version information
19001951
writeMetadata(cache.scannerContextHash);
19011952

1953+
// The current time-stamp
1954+
writeSerializationTime();
1955+
19021956
// Write the strings
19031957
writeIdentifiers();
19041958

0 commit comments

Comments
 (0)