Skip to content

Commit c58fa70

Browse files
committed
Don’t re-index all files that include a header when a header is modified
When a header is modified, we don’t we want to re-index all main files that include it. Instead, we just want to index one main to effectively re-index the header itself. I originally implemented re-indexing of all files that include the header but on second thought, headers are like Swift modules, where we also don’t re-index all dependencies either. And if you change a low-level header that’s included by the entire project, you probably don’t want the indexer to go off and re-index the entire project.
1 parent 597932c commit c58fa70

File tree

4 files changed

+123
-195
lines changed

4 files changed

+123
-195
lines changed

Sources/SemanticIndex/CheckedIndex.swift

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ public final class CheckedIndex {
122122
public func mainFilesContainingFile(uri: DocumentURI, crossLanguage: Bool = false) -> [DocumentURI] {
123123
return index.mainFilesContainingFile(path: uri.pseudoPath, crossLanguage: crossLanguage).compactMap {
124124
let url = URL(fileURLWithPath: $0)
125-
guard checker.indexHasUpToDateUnit(for: url, index: self.index) else {
125+
guard checker.indexHasUpToDateUnit(for: url, mainFile: nil, index: self.index) else {
126126
return nil
127127
}
128128
return DocumentURI(url)
@@ -137,8 +137,12 @@ public final class CheckedIndex {
137137
/// Return `true` if a unit file has been indexed for the given file path after its last modification date.
138138
///
139139
/// This means that at least a single build configuration of this file has been indexed since its last modification.
140-
public func hasUpToDateUnit(for url: URL) -> Bool {
141-
return checker.indexHasUpToDateUnit(for: url, index: index)
140+
///
141+
/// If `mainFile` is passed, then `url` is a header file that won't have a unit associated with it. `mainFile` is
142+
/// assumed to be a file that imports `url`. To check that `url` has an up-to-date unit, check that the latest unit
143+
/// for `mainFile` is newer than the mtime of the header file at `url`.
144+
public func hasUpToDateUnit(for url: URL, mainFile: URL? = nil) -> Bool {
145+
return checker.indexHasUpToDateUnit(for: url, mainFile: mainFile, index: index)
142146
}
143147

144148
/// Returns true if the file at the given URL has a different content in the document manager than on-disk. This is
@@ -254,7 +258,11 @@ private struct IndexOutOfDateChecker {
254258
/// Return `true` if a unit file has been indexed for the given file path after its last modification date.
255259
///
256260
/// This means that at least a single build configuration of this file has been indexed since its last modification.
257-
mutating func indexHasUpToDateUnit(for filePath: URL, index: IndexStoreDB) -> Bool {
261+
///
262+
/// If `mainFile` is passed, then `filePath` is a header file that won't have a unit associated with it. `mainFile` is
263+
/// assumed to be a file that imports `url`. To check that `url` has an up-to-date unit, check that the latest unit
264+
/// for `mainFile` is newer than the mtime of the header file at `url`.
265+
mutating func indexHasUpToDateUnit(for filePath: URL, mainFile: URL?, index: IndexStoreDB) -> Bool {
258266
switch checkLevel {
259267
case .inMemoryModifiedFiles(let documentManager):
260268
if fileHasInMemoryModifications(filePath, documentManager: documentManager) {
@@ -265,7 +273,7 @@ private struct IndexOutOfDateChecker {
265273
// If there are no in-memory modifications check if there are on-disk modifications.
266274
fallthrough
267275
case .modifiedFiles:
268-
guard let lastUnitDate = index.dateOfLatestUnitFor(filePath: filePath.path) else {
276+
guard let lastUnitDate = index.dateOfLatestUnitFor(filePath: (mainFile ?? filePath).path) else {
269277
return false
270278
}
271279
do {

Sources/SemanticIndex/SemanticIndexManager.swift

Lines changed: 47 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ public final actor SemanticIndexManager {
115115
/// Returns immediately after scheduling that task.
116116
///
117117
/// Indexing is being performed with a low priority.
118-
public func scheduleBackgroundIndex(files: some Collection<DocumentURI>) async {
118+
private func scheduleBackgroundIndex(files: some Collection<DocumentURI>) async {
119119
await self.index(files: files, priority: .low)
120120
}
121121

@@ -127,7 +127,7 @@ public final actor SemanticIndexManager {
127127
generateBuildGraphTask = Task(priority: .low) {
128128
await orLog("Generating build graph") { try await self.buildSystemManager.generateBuildGraph() }
129129
let index = index.checked(for: .modifiedFiles)
130-
let filesToIndex = await self.buildSystemManager.sourceFiles().map(\.uri)
130+
let filesToIndex = await self.buildSystemManager.sourceFiles().lazy.map(\.uri)
131131
.filter { uri in
132132
guard let url = uri.fileURL else {
133133
// The URI is not a file, so there's nothing we can index.
@@ -185,31 +185,41 @@ public final actor SemanticIndexManager {
185185
}
186186

187187
public func filesDidChange(_ events: [FileEvent]) async {
188-
let filesToReIndex = await filesToReIndex(changedFiles: events.map(\.uri))
189-
188+
// We only re-index the files that were changed and don't re-index any of their dependencies. See the
189+
// `Documentation/Files_To_Reindex.md` file.
190+
let changedFiles = events.map(\.uri)
190191
// Reset the index status for these files so they get re-indexed by `index(files:priority:)`
191-
for uri in filesToReIndex {
192+
for uri in changedFiles {
192193
indexStatus[uri] = nil
193194
}
194-
await scheduleBackgroundIndex(files: filesToReIndex)
195+
await scheduleBackgroundIndex(files: changedFiles)
195196
}
196197

197-
/// Returns the files that should be re-indexed if the given files have been modified.
198+
/// Returns the files that should be indexed to get up-to-date index information for the given files.
198199
///
199-
/// - SeeAlso: The `Documentation/Files_To_Reindex.md` file.
200-
private func filesToReIndex(changedFiles: [DocumentURI]) async -> Set<DocumentURI> {
200+
/// If `files` contains a header file, this will return a `FileToIndex` that re-indexes a main file which includes the
201+
/// header file to update the header file's index.
202+
private func filesToIndex(toCover files: some Collection<DocumentURI>) async -> [FileToIndex] {
201203
let sourceFiles = Set(await buildSystemManager.sourceFiles().map(\.uri))
202-
let filesToReIndex = await changedFiles.asyncMap { (uri) -> [DocumentURI] in
204+
let filesToReIndex = await files.asyncCompactMap { (uri) -> FileToIndex? in
203205
if sourceFiles.contains(uri) {
204-
// If this is a source file, re-index it
205-
return [uri]
206+
// If this is a source file, just index it.
207+
return FileToIndex(uri: uri, mainFile: nil)
208+
}
209+
// Otherwise, see if it is a header file. If so, index a main file that that imports it to update header file's
210+
// index.
211+
// Deterministically pick a main file. This ensures that we always pick the same main file for a header. This way,
212+
// if we request the same header to be indexed twice, we'll pick the same unit file the second time around,
213+
// realize that its timestamp is later than the modification date of the header and we don't need to re-index.
214+
let mainFile = index.checked(for: .deletedFiles)
215+
.mainFilesContainingFile(uri: uri, crossLanguage: false)
216+
.sorted(by: { $0.stringValue < $1.stringValue }).first
217+
guard let mainFile else {
218+
return nil
206219
}
207-
// Otherwise, see if it is a header file. If so, re-index all the files that import it.
208-
// We don't want to re-index `.swift` files that depend on a header file, similar to how we don't re-index a Swift
209-
// module if one of its dependent modules has changed.
210-
return index.checked(for: .deletedFiles).mainFilesContainingFile(uri: uri, crossLanguage: false)
211-
}.flatMap { $0 }
212-
return Set(filesToReIndex)
220+
return FileToIndex(uri: uri, mainFile: mainFile)
221+
}
222+
return filesToReIndex
213223
}
214224

215225
// MARK: - Helper functions
@@ -227,46 +237,47 @@ public final actor SemanticIndexManager {
227237
}
228238

229239
/// Update the index store for the given files, assuming that their targets have already been prepared.
230-
private func updateIndexStore(for files: [DocumentURI], priority: TaskPriority?) async {
240+
private func updateIndexStore(for files: [FileToIndex], priority: TaskPriority?) async {
231241
let taskDescription = AnyIndexTaskDescription(
232242
UpdateIndexStoreTaskDescription(
233243
filesToIndex: Set(files),
234-
buildSystemManager: self.buildSystemManager
244+
buildSystemManager: self.buildSystemManager,
245+
index: index
235246
)
236247
)
237248
let updateIndexStoreTask = await self.indexTaskScheduler.schedule(priority: priority, taskDescription) { newState in
238249
switch newState {
239250
case .executing:
240251
for file in files {
241-
if case .scheduled(let task) = self.indexStatus[file] {
242-
self.indexStatus[file] = .executing(task)
252+
if case .scheduled(let task) = self.indexStatus[file.uri] {
253+
self.indexStatus[file.uri] = .executing(task)
243254
} else {
244255
logger.fault(
245256
"""
246-
Index status of \(file) is in an unexpected state \
247-
'\(self.indexStatus[file]?.description ?? "<nil>", privacy: .public)' when update index store task \
257+
Index status of \(file.uri) is in an unexpected state \
258+
'\(self.indexStatus[file.uri]?.description ?? "<nil>", privacy: .public)' when update index store task \
248259
started executing
249260
"""
250261
)
251262
}
252263
}
253264
case .cancelledToBeRescheduled:
254265
for file in files {
255-
if case .executing(let task) = self.indexStatus[file] {
256-
self.indexStatus[file] = .scheduled(task)
266+
if case .executing(let task) = self.indexStatus[file.uri] {
267+
self.indexStatus[file.uri] = .scheduled(task)
257268
} else {
258269
logger.fault(
259270
"""
260-
Index status of \(file) is in an unexpected state \
261-
'\(self.indexStatus[file]?.description ?? "<nil>", privacy: .public)' when update index store task \
271+
Index status of \(file.uri) is in an unexpected state \
272+
'\(self.indexStatus[file.uri]?.description ?? "<nil>", privacy: .public)' when update index store task \
262273
is cancelled to be rescheduled.
263274
"""
264275
)
265276
}
266277
}
267278
case .finished:
268279
for file in files {
269-
self.indexStatus[file] = .upToDate
280+
self.indexStatus[file.uri] = .upToDate
270281
}
271282
self.indexTaskDidFinish()
272283
}
@@ -279,20 +290,20 @@ public final actor SemanticIndexManager {
279290
/// The returned task finishes when all files are indexed.
280291
@discardableResult
281292
private func index(files: some Collection<DocumentURI>, priority: TaskPriority?) async -> Task<Void, Never> {
282-
let outOfDateFiles = files.filter {
283-
if case .upToDate = indexStatus[$0] {
293+
let outOfDateFiles = await filesToIndex(toCover: files).filter {
294+
if case .upToDate = indexStatus[$0.uri] {
284295
return false
285296
}
286297
return true
287298
}
288-
.sorted(by: { $0.stringValue < $1.stringValue }) // sort files to get deterministic indexing order
299+
.sorted(by: { $0.uri.stringValue < $1.uri.stringValue }) // sort files to get deterministic indexing order
289300

290301
// Sort the targets in topological order so that low-level targets get built before high-level targets, allowing us
291302
// to index the low-level targets ASAP.
292-
var filesByTarget: [ConfiguredTarget: [DocumentURI]] = [:]
303+
var filesByTarget: [ConfiguredTarget: [FileToIndex]] = [:]
293304
for file in outOfDateFiles {
294-
guard let target = await buildSystemManager.canonicalConfiguredTarget(for: file) else {
295-
logger.error("Not indexing \(file.forLogging) because the target could not be determined")
305+
guard let target = await buildSystemManager.canonicalConfiguredTarget(for: file.uri) else {
306+
logger.error("Not indexing \(file.uri.forLogging) because the target could not be determined")
296307
continue
297308
}
298309
filesByTarget[target, default: []].append(file)
@@ -349,7 +360,7 @@ public final actor SemanticIndexManager {
349360
// setting it to `.scheduled` because we don't have an `await` call between the creation of `indexTask` and
350361
// this loop, so we still have exclusive access to the `SemanticIndexManager` actor and hence `updateIndexStore`
351362
// can't execute until we have set all index statuses to `.scheduled`.
352-
indexStatus[file] = .scheduled(indexTask)
363+
indexStatus[file.uri] = .scheduled(indexTask)
353364
}
354365
indexTasksWereScheduled(filesToIndex.count)
355366
}

Sources/SemanticIndex/UpdateIndexStoreTaskDescription.swift

Lines changed: 51 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,16 @@ import class TSCBasic.Process
2222

2323
private nonisolated(unsafe) var updateIndexStoreIDForLogging = AtomicUInt32(initialValue: 1)
2424

25+
/// Information about a file that should be indexed.
26+
///
27+
/// The URI of the file whose index should be updated. This could be a header file that can't actually be indexed on
28+
/// its own. In that case `mainFile` is the file that should be indexed, which will effectively update the index of
29+
/// `uri`.
30+
struct FileToIndex: Hashable {
31+
let uri: DocumentURI
32+
let mainFile: DocumentURI?
33+
}
34+
2535
/// Describes a task to index a set of source files.
2636
///
2737
/// This task description can be scheduled in a `TaskScheduler`.
@@ -30,11 +40,15 @@ public struct UpdateIndexStoreTaskDescription: IndexTaskDescription {
3040
public let id = updateIndexStoreIDForLogging.fetchAndIncrement()
3141

3242
/// The files that should be indexed.
33-
private let filesToIndex: Set<DocumentURI>
43+
private let filesToIndex: Set<FileToIndex>
3444

3545
/// The build system manager that is used to get the toolchain and build settings for the files to index.
3646
private let buildSystemManager: BuildSystemManager
3747

48+
/// A reference to the underlying index store. Used to check if the index is already up-to-date for a file, in which
49+
/// case we don't need to index it again.
50+
private let index: UncheckedIndex
51+
3852
/// The task is idempotent because indexing the same file twice produces the same result as indexing it once.
3953
public var isIdempotent: Bool { true }
4054

@@ -49,11 +63,13 @@ public struct UpdateIndexStoreTaskDescription: IndexTaskDescription {
4963
}
5064

5165
init(
52-
filesToIndex: Set<DocumentURI>,
53-
buildSystemManager: BuildSystemManager
66+
filesToIndex: Set<FileToIndex>,
67+
buildSystemManager: BuildSystemManager,
68+
index: UncheckedIndex
5469
) {
5570
self.filesToIndex = filesToIndex
5671
self.buildSystemManager = buildSystemManager
72+
self.index = index
5773
}
5874

5975
public func execute() async {
@@ -66,12 +82,12 @@ public struct UpdateIndexStoreTaskDescription: IndexTaskDescription {
6682
) {
6783
let startDate = Date()
6884

69-
let filesToIndexDescription = filesToIndex.map { $0.fileURL?.lastPathComponent ?? $0.stringValue }
85+
let filesToIndexDescription = filesToIndex.map { $0.uri.fileURL?.lastPathComponent ?? $0.uri.stringValue }
7086
.joined(separator: ", ")
7187
logger.log(
7288
"Starting updating index store with priority \(Task.currentPriority.rawValue, privacy: .public): \(filesToIndexDescription)"
7389
)
74-
let filesToIndex = filesToIndex.sorted(by: { $0.stringValue < $1.stringValue })
90+
let filesToIndex = filesToIndex.sorted(by: { $0.uri.stringValue < $1.uri.stringValue })
7591
// TODO (indexing): Once swiftc supports it, we should group files by target and index files within the same
7692
// target together in one swiftc invocation.
7793
// https://github.com/apple/sourcekit-lsp/issues/1268
@@ -104,44 +120,60 @@ public struct UpdateIndexStoreTaskDescription: IndexTaskDescription {
104120
}
105121
}
106122

107-
private func updateIndexStoreForSingleFile(_ uri: DocumentURI) async {
108-
guard let language = await buildSystemManager.defaultLanguage(for: uri) else {
109-
logger.error("Not indexing \(uri.forLogging) because its language could not be determined")
123+
private func updateIndexStoreForSingleFile(_ fileToIndex: FileToIndex) async {
124+
let mainFileUri = fileToIndex.mainFile ?? fileToIndex.uri
125+
guard let fileToIndexUrl = fileToIndex.uri.fileURL else {
126+
// The URI is not a file, so there's nothing we can index.
127+
return
128+
}
129+
guard
130+
!index.checked(for: .modifiedFiles).hasUpToDateUnit(for: fileToIndexUrl, mainFile: fileToIndex.mainFile?.fileURL)
131+
else {
132+
logger.debug("Not indexing \(fileToIndex.uri.forLogging) because index has an up-to-date unit")
133+
// We consider a file's index up-to-date if we have any up-to-date unit. Changing build settings does not
134+
// invalidate the up-to-date status of the index.
135+
return
136+
}
137+
if let mainFile = fileToIndex.mainFile {
138+
logger.log("Updating index store of \(fileToIndex.uri.forLogging) using main file \(mainFile.forLogging)")
139+
}
140+
guard let language = await buildSystemManager.defaultLanguage(for: mainFileUri) else {
141+
logger.error("Not indexing \(fileToIndex.uri.forLogging) because its language could not be determined")
110142
return
111143
}
112144
let buildSettings = await buildSystemManager.buildSettingsInferredFromMainFile(
113-
for: uri,
145+
for: mainFileUri,
114146
language: language,
115147
logBuildSettings: false
116148
)
117149
guard let buildSettings else {
118-
logger.error("Not indexing \(uri.forLogging) because it has no compiler arguments")
150+
logger.error("Not indexing \(fileToIndex.uri.forLogging) because it has no compiler arguments")
119151
return
120152
}
121-
guard let toolchain = await buildSystemManager.toolchain(for: uri, language) else {
153+
guard let toolchain = await buildSystemManager.toolchain(for: mainFileUri, language) else {
122154
logger.error(
123-
"Not updating index store for \(uri.forLogging) because no toolchain could be determined for the document"
155+
"Not updating index store for \(mainFileUri.forLogging) because no toolchain could be determined for the document"
124156
)
125157
return
126158
}
127159
switch language {
128160
case .swift:
129161
do {
130-
try await updateIndexStore(forSwiftFile: uri, buildSettings: buildSettings, toolchain: toolchain)
162+
try await updateIndexStore(forSwiftFile: mainFileUri, buildSettings: buildSettings, toolchain: toolchain)
131163
} catch {
132-
logger.error("Updating index store for \(uri) failed: \(error.forLogging)")
133-
BuildSettingsLogger.log(settings: buildSettings, for: uri)
164+
logger.error("Updating index store for \(fileToIndex.uri) failed: \(error.forLogging)")
165+
BuildSettingsLogger.log(settings: buildSettings, for: mainFileUri)
134166
}
135167
case .c, .cpp, .objective_c, .objective_cpp:
136168
do {
137-
try await updateIndexStore(forClangFile: uri, buildSettings: buildSettings, toolchain: toolchain)
169+
try await updateIndexStore(forClangFile: mainFileUri, buildSettings: buildSettings, toolchain: toolchain)
138170
} catch {
139-
logger.error("Updating index store for \(uri) failed: \(error.forLogging)")
140-
BuildSettingsLogger.log(settings: buildSettings, for: uri)
171+
logger.error("Updating index store for \(fileToIndex.uri) failed: \(error.forLogging)")
172+
BuildSettingsLogger.log(settings: buildSettings, for: mainFileUri)
141173
}
142174
default:
143175
logger.error(
144-
"Not updating index store for \(uri) because it is a language that is not supported by background indexing"
176+
"Not updating index store for \(fileToIndex.uri) because it is a language that is not supported by background indexing"
145177
)
146178
}
147179
}

0 commit comments

Comments
 (0)