Skip to content

Commit e5ede0f

Browse files
committed
Do not schedule re-indexing of files if they haven’t been modified since an in-progress index was requested
This fixes the following problem, which caused files to get indexed twice: You open a workspace, which schedules initial indexing. While we are waiting for the build graph generation from the build server, the build servers sends `buildTarget/didChange` notification to SourceKit-LSP (eg. because it has finished loading the build graph). This causes all files in the changed targets to be scheduled for re-indexing since new files might be present in the updated targets. And since we have already started indexing of those files, we assumed that we need to index them again because the contents might have changed. To fix this, keep track of the file’s modification time at the time at which we scheduled indexing for it. If that time hasn’t changed and we have an in-progress indexing operation for it, there is no need to schedule another one.
1 parent 1c96822 commit e5ede0f

File tree

2 files changed

+121
-75
lines changed

2 files changed

+121
-75
lines changed

Sources/SemanticIndex/CheckedIndex.swift

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
#if compiler(>=6)
14-
import Foundation
14+
package import Foundation
1515
@preconcurrency package import IndexStoreDB
1616
package import LanguageServerProtocol
1717
import SKLogging
@@ -188,6 +188,20 @@ package final class CheckedIndex {
188188
return checker.fileHasInMemoryModifications(uri)
189189
}
190190

191+
/// Determine the modification date of the file at the given location or, if it is a symlink, the maximum modification
192+
/// time of any hop in the symlink change until the real file.
193+
///
194+
/// This uses the `CheckedIndex`'s mod date cache, so it doesn't require disk access if the modification date of the
195+
/// file has already been computed.
196+
///
197+
/// Returns `nil` if the modification date of the file could not be determined.
198+
package func modificationDate(of uri: DocumentURI) -> Date? {
199+
switch try? checker.modificationDate(of: uri) {
200+
case nil, .fileDoesNotExist: return nil
201+
case .date(let date): return date
202+
}
203+
}
204+
191205
/// If there are any definition occurrences of the given USR, return these.
192206
/// Otherwise return declaration occurrences.
193207
package func definitionOrDeclarationOccurrences(ofUSR usr: String) -> [SymbolOccurrence] {
@@ -327,7 +341,7 @@ private struct IndexOutOfDateChecker {
327341
private let checkLevel: IndexCheckLevel
328342

329343
/// The last modification time of a file. Can also represent the fact that the file does not exist.
330-
private enum ModificationTime {
344+
enum ModificationTime {
331345
case fileDoesNotExist
332346
case date(Date)
333347
}
@@ -495,7 +509,7 @@ private struct IndexOutOfDateChecker {
495509
}
496510
}
497511

498-
private mutating func modificationDate(of uri: DocumentURI) throws -> ModificationTime {
512+
mutating func modificationDate(of uri: DocumentURI) throws -> ModificationTime {
499513
if let cached = modTimeCache[uri] {
500514
return cached
501515
}

Sources/SemanticIndex/SemanticIndexManager.swift

Lines changed: 104 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -52,42 +52,53 @@ private struct OpaqueQueuedIndexTask: Equatable {
5252
}
5353
}
5454

55-
private enum InProgressIndexStore {
56-
/// We know that we need to index the file but and are currently gathering all information to create the `indexTask`
57-
/// that will index it.
58-
///
59-
/// This is needed to avoid the following race: We request indexing of file A. Getting the canonical target for A
60-
/// takes a bit and before that finishes, we request another index of A. In this case, we don't want to kick off
61-
/// two tasks to update the index store.
62-
case creatingIndexTask
55+
private struct InProgressIndexStore {
56+
enum State {
57+
/// We know that we need to index the file but and are currently gathering all information to create the `indexTask`
58+
/// that will index it.
59+
///
60+
/// This is needed to avoid the following race: We request indexing of file A. Getting the canonical target for A
61+
/// takes a bit and before that finishes, we request another index of A. In this case, we don't want to kick off
62+
/// two tasks to update the index store.
63+
case creatingIndexTask
64+
65+
/// We are waiting for preparation of the file's target to be scheduled. The next step is that we wait for
66+
/// preparation to finish before we can update the index store for this file.
67+
///
68+
/// `preparationTaskID` identifies the preparation task so that we can transition a file's index state to
69+
/// `updatingIndexStore` when its preparation task has finished.
70+
///
71+
/// `indexTask` is a task that finishes after both preparation and index store update are done. Whoever owns the index
72+
/// task is still the sole owner of it and responsible for its cancellation.
73+
case waitingForPreparation(preparationTaskID: UUID, indexTask: Task<Void, Never>)
74+
75+
/// We have started preparing this file and are waiting for preparation to finish before we can update the index
76+
/// store for this file.
77+
///
78+
/// `preparationTaskID` identifies the preparation task so that we can transition a file's index state to
79+
/// `updatingIndexStore` when its preparation task has finished.
80+
///
81+
/// `indexTask` is a task that finishes after both preparation and index store update are done. Whoever owns the index
82+
/// task is still the sole owner of it and responsible for its cancellation.
83+
case preparing(preparationTaskID: UUID, indexTask: Task<Void, Never>)
84+
85+
/// The file's target has been prepared and we are updating the file's index store.
86+
///
87+
/// `updateIndexStoreTask` is the task that updates the index store itself.
88+
///
89+
/// `indexTask` is a task that finishes after both preparation and index store update are done. Whoever owns the index
90+
/// task is still the sole owner of it and responsible for its cancellation.
91+
case updatingIndexStore(updateIndexStoreTask: OpaqueQueuedIndexTask, indexTask: Task<Void, Never>)
92+
}
6393

64-
/// We are waiting for preparation of the file's target to be scheduled. The next step is that we wait for
65-
/// preparation to finish before we can update the index store for this file.
66-
///
67-
/// `preparationTaskID` identifies the preparation task so that we can transition a file's index state to
68-
/// `updatingIndexStore` when its preparation task has finished.
69-
///
70-
/// `indexTask` is a task that finishes after both preparation and index store update are done. Whoever owns the index
71-
/// task is still the sole owner of it and responsible for its cancellation.
72-
case waitingForPreparation(preparationTaskID: UUID, indexTask: Task<Void, Never>)
94+
var state: State
7395

74-
/// We have started preparing this file and are waiting for preparation to finish before we can update the index
75-
/// store for this file.
76-
///
77-
/// `preparationTaskID` identifies the preparation task so that we can transition a file's index state to
78-
/// `updatingIndexStore` when its preparation task has finished.
96+
/// The modification time of the time of `FileToIndex.sourceFile` at the time that indexing was scheduled. This allows
97+
/// us to avoid scheduling another indexing operation for the file if the file hasn't been modified since an
98+
/// in-progress indexing operation was scheduled.
7999
///
80-
/// `indexTask` is a task that finishes after both preparation and index store update are done. Whoever owns the index
81-
/// task is still the sole owner of it and responsible for its cancellation.
82-
case preparing(preparationTaskID: UUID, indexTask: Task<Void, Never>)
83-
84-
/// The file's target has been prepared and we are updating the file's index store.
85-
///
86-
/// `updateIndexStoreTask` is the task that updates the index store itself.
87-
///
88-
/// `indexTask` is a task that finishes after both preparation and index store update are done. Whoever owns the index
89-
/// task is still the sole owner of it and responsible for its cancellation.
90-
case updatingIndexStore(updateIndexStoreTask: OpaqueQueuedIndexTask, indexTask: Task<Void, Never>)
100+
/// `nil` if the modification date of the file could not be determined.
101+
var fileModificationDate: Date?
91102
}
92103

93104
/// Status of document indexing / target preparation in `inProgressIndexAndPreparationTasks`.
@@ -229,8 +240,8 @@ package final actor SemanticIndexManager {
229240
let preparationTasks = inProgressPreparationTasks.mapValues { inProgressTask in
230241
return inProgressTask.task.isExecuting ? IndexTaskStatus.executing : IndexTaskStatus.scheduled
231242
}
232-
let indexTasks = inProgressIndexTasks.mapValues { status in
233-
switch status {
243+
let indexTasks = inProgressIndexTasks.mapValues { inProgress in
244+
switch inProgress.state {
234245
case .creatingIndexTask, .waitingForPreparation, .preparing:
235246
return IndexTaskStatus.scheduled
236247
case .updatingIndexStore(updateIndexStoreTask: let updateIndexStoreTask, indexTask: _):
@@ -333,8 +344,8 @@ package final actor SemanticIndexManager {
333344
// can await the index tasks below.
334345
await waitForBuildGraphGenerationTasks()
335346

336-
await inProgressIndexTasks.concurrentForEach { (_, status) in
337-
switch status {
347+
await inProgressIndexTasks.concurrentForEach { (_, inProgress) in
348+
switch inProgress.state {
338349
case .creatingIndexTask:
339350
break
340351
case .waitingForPreparation(preparationTaskID: _, indexTask: let indexTask),
@@ -413,7 +424,7 @@ package final actor SemanticIndexManager {
413424
private func filesToIndex(
414425
toCover files: some Collection<DocumentURI> & Sendable,
415426
indexFilesWithUpToDateUnits: Bool
416-
) async -> [FileToIndex] {
427+
) async -> [(file: FileToIndex, fileModificationDate: Date?)] {
417428
let sourceFiles = await orLog("Getting source files in project") {
418429
Set(try await buildSystemManager.sourceFiles(includeNonBuildableFiles: false).keys)
419430
}
@@ -422,6 +433,7 @@ package final actor SemanticIndexManager {
422433
}
423434
let deletedFilesIndex = index.checked(for: .deletedFiles)
424435
let modifiedFilesIndex = index.checked(for: .modifiedFiles)
436+
425437
let filesToReIndex =
426438
await files
427439
.asyncFilter {
@@ -431,13 +443,13 @@ package final actor SemanticIndexManager {
431443
return false
432444
}
433445
return true
434-
}.compactMap { (uri) -> FileToIndex? in
446+
}.compactMap { uri -> (FileToIndex, Date?)? in
435447
if sourceFiles.contains(uri) {
436448
if !indexFilesWithUpToDateUnits, modifiedFilesIndex.hasUpToDateUnit(for: uri) {
437449
return nil
438450
}
439451
// If this is a source file, just index it.
440-
return .indexableFile(uri)
452+
return (.indexableFile(uri), modifiedFilesIndex.modificationDate(of: uri))
441453
}
442454
// Otherwise, see if it is a header file. If so, index a main file that that imports it to update header file's
443455
// index.
@@ -454,15 +466,7 @@ package final actor SemanticIndexManager {
454466
if !indexFilesWithUpToDateUnits, modifiedFilesIndex.hasUpToDateUnit(for: uri, mainFile: mainFile) {
455467
return nil
456468
}
457-
return .headerFile(header: uri, mainFile: mainFile)
458-
}
459-
.filter {
460-
switch inProgressIndexTasks[$0] {
461-
case .waitingForPreparation, .creatingIndexTask:
462-
return false
463-
default:
464-
return true
465-
}
469+
return (.headerFile(header: uri, mainFile: mainFile), modifiedFilesIndex.modificationDate(of: uri))
466470
}
467471
return filesToReIndex
468472
}
@@ -640,7 +644,7 @@ package final actor SemanticIndexManager {
640644
return
641645
}
642646
for fileAndTarget in filesAndTargets {
643-
switch self.inProgressIndexTasks[fileAndTarget.file] {
647+
switch self.inProgressIndexTasks[fileAndTarget.file]?.state {
644648
case .updatingIndexStore(let registeredTask, _):
645649
if registeredTask == OpaqueQueuedIndexTask(task) {
646650
self.inProgressIndexTasks[fileAndTarget.file] = nil
@@ -656,9 +660,9 @@ package final actor SemanticIndexManager {
656660
self.indexProgressStatusDidChange()
657661
}
658662
for fileAndTarget in filesAndTargets {
659-
switch inProgressIndexTasks[fileAndTarget.file] {
663+
switch inProgressIndexTasks[fileAndTarget.file]?.state {
660664
case .waitingForPreparation(preparationTaskID, let indexTask), .preparing(preparationTaskID, let indexTask):
661-
inProgressIndexTasks[fileAndTarget.file] = .updatingIndexStore(
665+
inProgressIndexTasks[fileAndTarget.file]?.state = .updatingIndexStore(
662666
updateIndexStoreTask: OpaqueQueuedIndexTask(updateIndexTask),
663667
indexTask: indexTask
664668
)
@@ -683,34 +687,62 @@ package final actor SemanticIndexManager {
683687
// We will check the up-to-date status again in `IndexTaskDescription.execute`. This ensures that if we schedule
684688
// schedule two indexing jobs for the same file in quick succession, only the first one actually updates the index
685689
// store and the second one will be a no-op once it runs.
686-
let outOfDateFiles = await filesToIndex(toCover: files, indexFilesWithUpToDateUnits: indexFilesWithUpToDateUnit)
690+
var filesToIndex = await filesToIndex(toCover: files, indexFilesWithUpToDateUnits: indexFilesWithUpToDateUnit)
687691
// sort files to get deterministic indexing order
688-
.sorted(by: { $0.sourceFile.stringValue < $1.sourceFile.stringValue })
689-
690-
if outOfDateFiles.isEmpty {
691-
// Early exit if there are no files to index.
692-
return Task {}
693-
}
694-
695-
logger.debug("Scheduling indexing of \(outOfDateFiles.map(\.sourceFile.stringValue).joined(separator: ", "))")
696-
697-
// Sort the targets in topological order so that low-level targets get built before high-level targets, allowing us
698-
// to index the low-level targets ASAP.
699-
var filesByTarget: [BuildTargetIdentifier: [FileToIndex]] = [:]
692+
.sorted(by: { $0.file.sourceFile.stringValue < $1.file.sourceFile.stringValue })
700693

701694
// The number of index tasks that don't currently have an in-progress task associated with it.
702695
// The denominator in the index progress should get incremented by this amount.
703696
// We don't want to increment the denominator for tasks that already have an index in progress.
704697
var newIndexTasks = 0
705-
for file in outOfDateFiles {
706-
if inProgressIndexTasks[file] == nil {
698+
var alreadyScheduledTasks: Set<FileToIndex> = []
699+
for file in filesToIndex {
700+
let inProgress = inProgressIndexTasks[file.file]
701+
702+
let shouldScheduleIndexing: Bool
703+
switch inProgress?.state {
704+
case nil:
707705
newIndexTasks += 1
706+
shouldScheduleIndexing = true
707+
case .creatingIndexTask, .waitingForPreparation:
708+
// We already have a task that indexes the file but hasn't started preparation yet. Indexing the file again
709+
// won't produce any new results.
710+
alreadyScheduledTasks.insert(file.file)
711+
shouldScheduleIndexing = false
712+
case .preparing(_, _), .updatingIndexStore(_, _):
713+
// We have started indexing of the file and are now requesting to index it again. Unless we know that the file
714+
// hasn't been modified since the last request for indexing, we need to schedule it to get re-indexed again.
715+
if let modDate = file.fileModificationDate, inProgress?.fileModificationDate == modDate {
716+
shouldScheduleIndexing = false
717+
} else {
718+
shouldScheduleIndexing = true
719+
}
720+
}
721+
if shouldScheduleIndexing {
722+
inProgressIndexTasks[file.file] = InProgressIndexStore(
723+
state: .creatingIndexTask,
724+
fileModificationDate: file.fileModificationDate
725+
)
726+
} else {
727+
alreadyScheduledTasks.insert(file.file)
728+
708729
}
709-
inProgressIndexTasks[file] = .creatingIndexTask
710730
}
711731
indexTasksWereScheduled(newIndexTasks)
732+
filesToIndex = filesToIndex.filter { !alreadyScheduledTasks.contains($0.file) }
733+
734+
if filesToIndex.isEmpty {
735+
// Early exit if there are no files to index.
736+
return Task {}
737+
}
738+
739+
logger.debug("Scheduling indexing of \(filesToIndex.map(\.file.sourceFile.stringValue).joined(separator: ", "))")
740+
741+
// Sort the targets in topological order so that low-level targets get built before high-level targets, allowing us
742+
// to index the low-level targets ASAP.
743+
var filesByTarget: [BuildTargetIdentifier: [(FileToIndex)]] = [:]
712744

713-
for fileToIndex in outOfDateFiles {
745+
for fileToIndex in filesToIndex.map(\.file) {
714746
guard let target = await buildSystemManager.canonicalTarget(for: fileToIndex.mainFile) else {
715747
logger.error(
716748
"Not indexing \(fileToIndex.forLogging) because the target could not be determined"
@@ -757,9 +789,9 @@ package final actor SemanticIndexManager {
757789
if case .executing = newState {
758790
for file in filesToIndex {
759791
if case .waitingForPreparation(preparationTaskID: preparationTaskID, indexTask: let indexTask) =
760-
self.inProgressIndexTasks[file]
792+
self.inProgressIndexTasks[file]?.state
761793
{
762-
self.inProgressIndexTasks[file] = .preparing(
794+
self.inProgressIndexTasks[file]?.state = .preparing(
763795
preparationTaskID: preparationTaskID,
764796
indexTask: indexTask
765797
)
@@ -796,7 +828,7 @@ package final actor SemanticIndexManager {
796828
// `.waitingForPreparation` here because we don't have an `await` call between the creation of `indexTask` and
797829
// this loop, so we still have exclusive access to the `SemanticIndexManager` actor and hence `updateIndexStore`
798830
// can't execute until we have set all index statuses to `.waitingForPreparation`.
799-
inProgressIndexTasks[file] = .waitingForPreparation(
831+
inProgressIndexTasks[file]?.state = .waitingForPreparation(
800832
preparationTaskID: preparationTaskID,
801833
indexTask: indexTask
802834
)

0 commit comments

Comments
 (0)