Skip to content

Don’t re-index all files that include a header when a header is modified #1306

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 13 additions & 5 deletions Sources/SemanticIndex/CheckedIndex.swift
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ public final class CheckedIndex {
public func mainFilesContainingFile(uri: DocumentURI, crossLanguage: Bool = false) -> [DocumentURI] {
return index.mainFilesContainingFile(path: uri.pseudoPath, crossLanguage: crossLanguage).compactMap {
let url = URL(fileURLWithPath: $0)
guard checker.indexHasUpToDateUnit(for: url, index: self.index) else {
guard checker.indexHasUpToDateUnit(for: url, mainFile: nil, index: self.index) else {
return nil
}
return DocumentURI(url)
Expand All @@ -137,8 +137,12 @@ public final class CheckedIndex {
/// Return `true` if a unit file has been indexed for the given file path after its last modification date.
///
/// This means that at least a single build configuration of this file has been indexed since its last modification.
public func hasUpToDateUnit(for url: URL) -> Bool {
return checker.indexHasUpToDateUnit(for: url, index: index)
///
/// If `mainFile` is passed, then `url` is a header file that won't have a unit associated with it. `mainFile` is
/// assumed to be a file that imports `url`. To check that `url` has an up-to-date unit, check that the latest unit
/// for `mainFile` is newer than the mtime of the header file at `url`.
public func hasUpToDateUnit(for url: URL, mainFile: URL? = nil) -> Bool {
return checker.indexHasUpToDateUnit(for: url, mainFile: mainFile, index: index)
}

/// Returns true if the file at the given URL has a different content in the document manager than on-disk. This is
Expand Down Expand Up @@ -254,7 +258,11 @@ private struct IndexOutOfDateChecker {
/// Return `true` if a unit file has been indexed for the given file path after its last modification date.
///
/// This means that at least a single build configuration of this file has been indexed since its last modification.
mutating func indexHasUpToDateUnit(for filePath: URL, index: IndexStoreDB) -> Bool {
///
/// If `mainFile` is passed, then `filePath` is a header file that won't have a unit associated with it. `mainFile` is
/// assumed to be a file that imports `url`. To check that `url` has an up-to-date unit, check that the latest unit
/// for `mainFile` is newer than the mtime of the header file at `url`.
mutating func indexHasUpToDateUnit(for filePath: URL, mainFile: URL?, index: IndexStoreDB) -> Bool {
switch checkLevel {
case .inMemoryModifiedFiles(let documentManager):
if fileHasInMemoryModifications(filePath, documentManager: documentManager) {
Expand All @@ -265,7 +273,7 @@ private struct IndexOutOfDateChecker {
// If there are no in-memory modifications check if there are on-disk modifications.
fallthrough
case .modifiedFiles:
guard let lastUnitDate = index.dateOfLatestUnitFor(filePath: filePath.path) else {
guard let lastUnitDate = index.dateOfLatestUnitFor(filePath: (mainFile ?? filePath).path) else {
return false
}
do {
Expand Down
83 changes: 47 additions & 36 deletions Sources/SemanticIndex/SemanticIndexManager.swift
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ public final actor SemanticIndexManager {
/// Returns immediately after scheduling that task.
///
/// Indexing is being performed with a low priority.
public func scheduleBackgroundIndex(files: some Collection<DocumentURI>) async {
private func scheduleBackgroundIndex(files: some Collection<DocumentURI>) async {
await self.index(files: files, priority: .low)
}

Expand All @@ -127,7 +127,7 @@ public final actor SemanticIndexManager {
generateBuildGraphTask = Task(priority: .low) {
await orLog("Generating build graph") { try await self.buildSystemManager.generateBuildGraph() }
let index = index.checked(for: .modifiedFiles)
let filesToIndex = await self.buildSystemManager.sourceFiles().map(\.uri)
let filesToIndex = await self.buildSystemManager.sourceFiles().lazy.map(\.uri)
.filter { uri in
guard let url = uri.fileURL else {
// The URI is not a file, so there's nothing we can index.
Expand Down Expand Up @@ -185,31 +185,41 @@ public final actor SemanticIndexManager {
}

public func filesDidChange(_ events: [FileEvent]) async {
let filesToReIndex = await filesToReIndex(changedFiles: events.map(\.uri))

// We only re-index the files that were changed and don't re-index any of their dependencies. See the
// `Documentation/Files_To_Reindex.md` file.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does that need updating to mention we only pick one file to reindex?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, thought about that on my way home last night as well and wanted to put up a PR to change that anyway. But well spotted 👍🏽

let changedFiles = events.map(\.uri)
// Reset the index status for these files so they get re-indexed by `index(files:priority:)`
for uri in filesToReIndex {
for uri in changedFiles {
indexStatus[uri] = nil
}
await scheduleBackgroundIndex(files: filesToReIndex)
await scheduleBackgroundIndex(files: changedFiles)
}

/// Returns the files that should be re-indexed if the given files have been modified.
/// Returns the files that should be indexed to get up-to-date index information for the given files.
///
/// - SeeAlso: The `Documentation/Files_To_Reindex.md` file.
private func filesToReIndex(changedFiles: [DocumentURI]) async -> Set<DocumentURI> {
/// If `files` contains a header file, this will return a `FileToIndex` that re-indexes a main file which includes the
/// header file to update the header file's index.
private func filesToIndex(toCover files: some Collection<DocumentURI>) async -> [FileToIndex] {
let sourceFiles = Set(await buildSystemManager.sourceFiles().map(\.uri))
let filesToReIndex = await changedFiles.asyncMap { (uri) -> [DocumentURI] in
let filesToReIndex = await files.asyncCompactMap { (uri) -> FileToIndex? in
if sourceFiles.contains(uri) {
// If this is a source file, re-index it
return [uri]
// If this is a source file, just index it.
return FileToIndex(uri: uri, mainFile: nil)
}
// Otherwise, see if it is a header file. If so, index a main file that that imports it to update header file's
// index.
// Deterministically pick a main file. This ensures that we always pick the same main file for a header. This way,
// if we request the same header to be indexed twice, we'll pick the same unit file the second time around,
// realize that its timestamp is later than the modification date of the header and we don't need to re-index.
let mainFile = index.checked(for: .deletedFiles)
.mainFilesContainingFile(uri: uri, crossLanguage: false)
.sorted(by: { $0.stringValue < $1.stringValue }).first
guard let mainFile else {
return nil
}
// Otherwise, see if it is a header file. If so, re-index all the files that import it.
// We don't want to re-index `.swift` files that depend on a header file, similar to how we don't re-index a Swift
// module if one of its dependent modules has changed.
return index.checked(for: .deletedFiles).mainFilesContainingFile(uri: uri, crossLanguage: false)
}.flatMap { $0 }
return Set(filesToReIndex)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given we're no longer uniquing, is there any concern that if e.g 5 headers get modified, and those headers are all included by a single main file, that we could end up running 5 concurrent indexing jobs for that main file?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also didn't really think about the scheduling logic here :) Looking at it again, we'll block the requests on each other, and then they should all be considered up-to-date

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don’t actually, because they all have different FileToIndex.uri and thus they would actually be executing in parallel. Fixing that now.

return FileToIndex(uri: uri, mainFile: mainFile)
}
return filesToReIndex
}

// MARK: - Helper functions
Expand All @@ -227,46 +237,47 @@ public final actor SemanticIndexManager {
}

/// Update the index store for the given files, assuming that their targets have already been prepared.
private func updateIndexStore(for files: [DocumentURI], priority: TaskPriority?) async {
private func updateIndexStore(for files: [FileToIndex], priority: TaskPriority?) async {
let taskDescription = AnyIndexTaskDescription(
UpdateIndexStoreTaskDescription(
filesToIndex: Set(files),
buildSystemManager: self.buildSystemManager
buildSystemManager: self.buildSystemManager,
index: index
)
)
let updateIndexStoreTask = await self.indexTaskScheduler.schedule(priority: priority, taskDescription) { newState in
switch newState {
case .executing:
for file in files {
if case .scheduled(let task) = self.indexStatus[file] {
self.indexStatus[file] = .executing(task)
if case .scheduled(let task) = self.indexStatus[file.uri] {
self.indexStatus[file.uri] = .executing(task)
} else {
logger.fault(
"""
Index status of \(file) is in an unexpected state \
'\(self.indexStatus[file]?.description ?? "<nil>", privacy: .public)' when update index store task \
Index status of \(file.uri) is in an unexpected state \
'\(self.indexStatus[file.uri]?.description ?? "<nil>", privacy: .public)' when update index store task \
started executing
"""
)
}
}
case .cancelledToBeRescheduled:
for file in files {
if case .executing(let task) = self.indexStatus[file] {
self.indexStatus[file] = .scheduled(task)
if case .executing(let task) = self.indexStatus[file.uri] {
self.indexStatus[file.uri] = .scheduled(task)
} else {
logger.fault(
"""
Index status of \(file) is in an unexpected state \
'\(self.indexStatus[file]?.description ?? "<nil>", privacy: .public)' when update index store task \
Index status of \(file.uri) is in an unexpected state \
'\(self.indexStatus[file.uri]?.description ?? "<nil>", privacy: .public)' when update index store task \
is cancelled to be rescheduled.
"""
)
}
}
case .finished:
for file in files {
self.indexStatus[file] = .upToDate
self.indexStatus[file.uri] = .upToDate
}
self.indexTaskDidFinish()
}
Expand All @@ -279,20 +290,20 @@ public final actor SemanticIndexManager {
/// The returned task finishes when all files are indexed.
@discardableResult
private func index(files: some Collection<DocumentURI>, priority: TaskPriority?) async -> Task<Void, Never> {
let outOfDateFiles = files.filter {
if case .upToDate = indexStatus[$0] {
let outOfDateFiles = await filesToIndex(toCover: files).filter {
if case .upToDate = indexStatus[$0.uri] {
return false
}
return true
}
.sorted(by: { $0.stringValue < $1.stringValue }) // sort files to get deterministic indexing order
.sorted(by: { $0.uri.stringValue < $1.uri.stringValue }) // sort files to get deterministic indexing order

// Sort the targets in topological order so that low-level targets get built before high-level targets, allowing us
// to index the low-level targets ASAP.
var filesByTarget: [ConfiguredTarget: [DocumentURI]] = [:]
var filesByTarget: [ConfiguredTarget: [FileToIndex]] = [:]
for file in outOfDateFiles {
guard let target = await buildSystemManager.canonicalConfiguredTarget(for: file) else {
logger.error("Not indexing \(file.forLogging) because the target could not be determined")
guard let target = await buildSystemManager.canonicalConfiguredTarget(for: file.uri) else {
logger.error("Not indexing \(file.uri.forLogging) because the target could not be determined")
Comment on lines +305 to +306
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It probably(?) doesn't matter, but IMO passing the main file here makes more sense since that's the thing we're indexing

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For canonicalConfiguredTarget(for:): Yes, you’re right, we should use file.mainFile ?? file.uri

For logger.error: Added both file.uri and file.mainFile to the logging message.

continue
}
filesByTarget[target, default: []].append(file)
Expand Down Expand Up @@ -349,7 +360,7 @@ public final actor SemanticIndexManager {
// setting it to `.scheduled` because we don't have an `await` call between the creation of `indexTask` and
// this loop, so we still have exclusive access to the `SemanticIndexManager` actor and hence `updateIndexStore`
// can't execute until we have set all index statuses to `.scheduled`.
indexStatus[file] = .scheduled(indexTask)
indexStatus[file.uri] = .scheduled(indexTask)
}
indexTasksWereScheduled(filesToIndex.count)
}
Expand Down
70 changes: 51 additions & 19 deletions Sources/SemanticIndex/UpdateIndexStoreTaskDescription.swift
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,16 @@ import class TSCBasic.Process

private nonisolated(unsafe) var updateIndexStoreIDForLogging = AtomicUInt32(initialValue: 1)

/// Information about a file that should be indexed.
///
/// The URI of the file whose index should be updated. This could be a header file that can't actually be indexed on
/// its own. In that case `mainFile` is the file that should be indexed, which will effectively update the index of
/// `uri`.
struct FileToIndex: Hashable {
let uri: DocumentURI
let mainFile: DocumentURI?
}

/// Describes a task to index a set of source files.
///
/// This task description can be scheduled in a `TaskScheduler`.
Expand All @@ -30,11 +40,15 @@ public struct UpdateIndexStoreTaskDescription: IndexTaskDescription {
public let id = updateIndexStoreIDForLogging.fetchAndIncrement()

/// The files that should be indexed.
private let filesToIndex: Set<DocumentURI>
private let filesToIndex: Set<FileToIndex>

/// The build system manager that is used to get the toolchain and build settings for the files to index.
private let buildSystemManager: BuildSystemManager

/// A reference to the underlying index store. Used to check if the index is already up-to-date for a file, in which
/// case we don't need to index it again.
private let index: UncheckedIndex

/// The task is idempotent because indexing the same file twice produces the same result as indexing it once.
public var isIdempotent: Bool { true }

Expand All @@ -49,11 +63,13 @@ public struct UpdateIndexStoreTaskDescription: IndexTaskDescription {
}

init(
filesToIndex: Set<DocumentURI>,
buildSystemManager: BuildSystemManager
filesToIndex: Set<FileToIndex>,
buildSystemManager: BuildSystemManager,
index: UncheckedIndex
) {
self.filesToIndex = filesToIndex
self.buildSystemManager = buildSystemManager
self.index = index
}

public func execute() async {
Expand All @@ -66,12 +82,12 @@ public struct UpdateIndexStoreTaskDescription: IndexTaskDescription {
) {
let startDate = Date()

let filesToIndexDescription = filesToIndex.map { $0.fileURL?.lastPathComponent ?? $0.stringValue }
let filesToIndexDescription = filesToIndex.map { $0.uri.fileURL?.lastPathComponent ?? $0.uri.stringValue }
.joined(separator: ", ")
logger.log(
"Starting updating index store with priority \(Task.currentPriority.rawValue, privacy: .public): \(filesToIndexDescription)"
)
let filesToIndex = filesToIndex.sorted(by: { $0.stringValue < $1.stringValue })
let filesToIndex = filesToIndex.sorted(by: { $0.uri.stringValue < $1.uri.stringValue })
// TODO (indexing): Once swiftc supports it, we should group files by target and index files within the same
// target together in one swiftc invocation.
// https://github.com/apple/sourcekit-lsp/issues/1268
Expand Down Expand Up @@ -104,44 +120,60 @@ public struct UpdateIndexStoreTaskDescription: IndexTaskDescription {
}
}

private func updateIndexStoreForSingleFile(_ uri: DocumentURI) async {
guard let language = await buildSystemManager.defaultLanguage(for: uri) else {
logger.error("Not indexing \(uri.forLogging) because its language could not be determined")
private func updateIndexStoreForSingleFile(_ fileToIndex: FileToIndex) async {
let mainFileUri = fileToIndex.mainFile ?? fileToIndex.uri
guard let fileToIndexUrl = fileToIndex.uri.fileURL else {
// The URI is not a file, so there's nothing we can index.
return
}
guard
!index.checked(for: .modifiedFiles).hasUpToDateUnit(for: fileToIndexUrl, mainFile: fileToIndex.mainFile?.fileURL)
else {
logger.debug("Not indexing \(fileToIndex.uri.forLogging) because index has an up-to-date unit")
// We consider a file's index up-to-date if we have any up-to-date unit. Changing build settings does not
// invalidate the up-to-date status of the index.
return
}
if let mainFile = fileToIndex.mainFile {
logger.log("Updating index store of \(fileToIndex.uri.forLogging) using main file \(mainFile.forLogging)")
}
guard let language = await buildSystemManager.defaultLanguage(for: mainFileUri) else {
logger.error("Not indexing \(fileToIndex.uri.forLogging) because its language could not be determined")
return
}
let buildSettings = await buildSystemManager.buildSettingsInferredFromMainFile(
for: uri,
for: mainFileUri,
language: language,
logBuildSettings: false
)
guard let buildSettings else {
logger.error("Not indexing \(uri.forLogging) because it has no compiler arguments")
logger.error("Not indexing \(fileToIndex.uri.forLogging) because it has no compiler arguments")
return
}
guard let toolchain = await buildSystemManager.toolchain(for: uri, language) else {
guard let toolchain = await buildSystemManager.toolchain(for: mainFileUri, language) else {
logger.error(
"Not updating index store for \(uri.forLogging) because no toolchain could be determined for the document"
"Not updating index store for \(mainFileUri.forLogging) because no toolchain could be determined for the document"
)
return
}
switch language {
case .swift:
do {
try await updateIndexStore(forSwiftFile: uri, buildSettings: buildSettings, toolchain: toolchain)
try await updateIndexStore(forSwiftFile: mainFileUri, buildSettings: buildSettings, toolchain: toolchain)
} catch {
logger.error("Updating index store for \(uri) failed: \(error.forLogging)")
BuildSettingsLogger.log(settings: buildSettings, for: uri)
logger.error("Updating index store for \(fileToIndex.uri) failed: \(error.forLogging)")
BuildSettingsLogger.log(settings: buildSettings, for: mainFileUri)
}
case .c, .cpp, .objective_c, .objective_cpp:
do {
try await updateIndexStore(forClangFile: uri, buildSettings: buildSettings, toolchain: toolchain)
try await updateIndexStore(forClangFile: mainFileUri, buildSettings: buildSettings, toolchain: toolchain)
} catch {
logger.error("Updating index store for \(uri) failed: \(error.forLogging)")
BuildSettingsLogger.log(settings: buildSettings, for: uri)
logger.error("Updating index store for \(fileToIndex.uri) failed: \(error.forLogging)")
BuildSettingsLogger.log(settings: buildSettings, for: mainFileUri)
}
default:
logger.error(
"Not updating index store for \(uri) because it is a language that is not supported by background indexing"
"Not updating index store for \(fileToIndex.uri) because it is a language that is not supported by background indexing"
)
}
}
Expand Down
Loading