Skip to content

Commit 2aa9dba

Browse files
committed
Speed-up lookups for incremental re-parsing
Previously the search for nodes to re-use was naively doing a lookup that always started from the top of the tree, every time the parser called back. The new mechanism takes advantage of the fact that the parser continuously calls back with ascending source positions, not random ones, and keeps a "cursor" state for re-starting the lookup search from the last node position. This commit also introduces `SyntaxNode` which is a more efficient representation for a syntax tree node, that `SyntaxParser` uses to efficiently report which syntax nodes got re-used during incremental re-parsing. Changes result in improved performance for incremental reparsing. When using the test case from rdar://48511326, performance improves by 321x.
1 parent 08bbf70 commit 2aa9dba

File tree

7 files changed

+312
-45
lines changed

7 files changed

+312
-45
lines changed

Sources/SwiftSyntax/IncrementalParseTransition.swift

Lines changed: 135 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -25,33 +25,28 @@ public protocol IncrementalParseReusedNodeDelegate {
2525
/// - range: The source region of the currently parsed source.
2626
/// - previousNode: The node from the previous tree that is associated with
2727
/// the skipped source region.
28-
func parserReusedNode(range: ByteSourceRange, previousNode: Syntax)
28+
func parserReusedNode(range: ByteSourceRange, previousNode: SyntaxNode)
2929
}
3030

3131
/// An implementation of `IncrementalParseReusedNodeDelegate` that just collects
3232
/// the range and re-used node into an array.
3333
public final class IncrementalParseReusedNodeCollector:
3434
IncrementalParseReusedNodeDelegate {
35-
public var rangeAndNodes: [(ByteSourceRange, Syntax)] = []
35+
public var rangeAndNodes: [(ByteSourceRange, SyntaxNode)] = []
3636

3737
public init() {}
3838

39-
public func parserReusedNode(range: ByteSourceRange, previousNode: Syntax) {
39+
public func parserReusedNode(range: ByteSourceRange, previousNode: SyntaxNode) {
4040
rangeAndNodes.append((range, previousNode))
4141
}
4242
}
4343

4444
/// Keeps track of a previously parsed syntax tree and the source edits that
45-
/// occurred since it was created and provides a mechanism for the parser to
46-
/// skip regions of an incrementally updated source that was already parsed
47-
/// during a previous parse invocation.
45+
/// occurred since it was created.
4846
public final class IncrementalParseTransition {
49-
// The implementation is based on `SyntaxParsingCache` from the swift
50-
// repository.
51-
52-
private let previousTree: SourceFileSyntax
53-
private let edits: [SourceEdit]
54-
private let reusedDelegate: IncrementalParseReusedNodeDelegate?
47+
fileprivate let previousTree: SourceFileSyntax
48+
fileprivate let edits: [SourceEdit]
49+
fileprivate let reusedDelegate: IncrementalParseReusedNodeDelegate?
5550

5651
/// - Parameters:
5752
/// - previousTree: The previous tree to do lookups on.
@@ -92,6 +87,26 @@ public final class IncrementalParseTransition {
9287
}
9388
return true
9489
}
90+
}
91+
92+
/// Provides a mechanism for the parser to skip regions of an incrementally
93+
/// updated source that was already parsed during a previous parse invocation.
94+
internal struct IncrementalParseLookup {
95+
fileprivate let transition: IncrementalParseTransition
96+
fileprivate var cursor: SyntaxCursor
97+
98+
init(transition: IncrementalParseTransition) {
99+
self.transition = transition
100+
self.cursor = .init(root: transition.previousTree.data.absoluteRaw)
101+
}
102+
103+
fileprivate var edits: [SourceEdit] {
104+
return transition.edits
105+
}
106+
107+
fileprivate var reusedDelegate: IncrementalParseReusedNodeDelegate? {
108+
return transition.reusedDelegate
109+
}
95110

96111
/// Does a lookup to see if the current source `offset` should be associated
97112
/// with a known `Syntax` node and its region skipped during parsing.
@@ -102,15 +117,15 @@ public final class IncrementalParseTransition {
102117
/// - Parameters:
103118
/// - offset: The byte offset of the source string that is currently parsed.
104119
/// - kind: The `SyntaxKind` that the parser expects at this position.
105-
/// - Returns: A `Syntax` node from the previous parse invocation,
120+
/// - Returns: A `SyntaxNode` node from the previous parse invocation,
106121
/// representing the contents of this region, if it is still valid
107122
/// to re-use. `nil` otherwise.
108-
func lookUp(_ newOffset: Int, kind: SyntaxKind) -> _SyntaxBase? {
123+
mutating func lookUp(_ newOffset: Int, kind: SyntaxKind) -> SyntaxNode? {
109124
guard let prevOffset = translateToPreEditOffset(newOffset) else {
110125
return nil
111126
}
112127
let prevPosition = AbsolutePosition(utf8Offset: prevOffset)
113-
let node = lookUpFrom(previousTree, prevPosition: prevPosition, kind: kind)
128+
let node = cursorLookup(prevPosition: prevPosition, kind: kind)
114129
if let delegate = reusedDelegate, let node = node {
115130
delegate.parserReusedNode(
116131
range: ByteSourceRange(offset: newOffset, length: node.byteSize),
@@ -119,46 +134,57 @@ public final class IncrementalParseTransition {
119134
return node
120135
}
121136

122-
fileprivate func lookUpFrom(
123-
_ node: _SyntaxBase, prevPosition: AbsolutePosition, kind: SyntaxKind
124-
) -> _SyntaxBase? {
125-
if nodeCanBeReused(node, prevPosition: prevPosition, kind: kind) {
126-
return node
127-
}
137+
mutating fileprivate func cursorLookup(
138+
prevPosition: AbsolutePosition, kind: SyntaxKind
139+
) -> SyntaxNode? {
140+
guard !cursor.finished else { return nil }
128141

129-
for child in node.children {
130-
if child.position <= prevPosition && prevPosition < child.endPosition {
131-
return lookUpFrom(child, prevPosition: prevPosition, kind: kind)
142+
while true {
143+
if nodeAtCursorCanBeReused(prevPosition: prevPosition, kind: kind) {
144+
return cursor.asSyntaxNode
132145
}
146+
guard cursor.advanceToNextNode(at: prevPosition) else { return nil }
133147
}
134-
return nil
135148
}
136149

137-
fileprivate func nodeCanBeReused(
138-
_ node: _SyntaxBase, prevPosition: AbsolutePosition, kind: SyntaxKind
150+
fileprivate func nodeAtCursorCanBeReused(
151+
prevPosition: AbsolutePosition, kind: SyntaxKind
139152
) -> Bool {
153+
let node = cursor.node
140154
if node.position != prevPosition {
141155
return false
142156
}
143157
if node.raw.kind != kind {
144158
return false
145159
}
146160

161+
// Fast path check: if parser is past all the edits then any matching node
162+
// can be re-used.
163+
if !edits.isEmpty && edits.last!.range.endOffset < node.position.utf8Offset {
164+
return true;
165+
}
166+
147167
// Node can also not be reused if an edit has been made in the next token's
148168
// text, e.g. because `private struct Foo {}` parses as a CodeBlockItem with
149169
// a StructDecl inside and `private struc Foo {}` parses as two
150170
// CodeBlockItems one for `private` and one for `struc Foo {}`
151-
var nextLeafNodeLength = 0
152-
if let nextToken = node.nextToken {
153-
assert(nextToken.isPresent)
154-
nextLeafNodeLength = nextToken.byteSize - nextToken.trailingTriviaLength.utf8Length
171+
var nextLeafNodeLength: SourceLength = .zero
172+
if let nextSibling = cursor.nextSibling {
173+
// Fast path check: if next sibling is before all the edits then we can
174+
// re-use the node.
175+
if !edits.isEmpty && edits.first!.range.offset > nextSibling.endPosition.utf8Offset {
176+
return true;
177+
}
178+
if let nextToken = nextSibling.raw.firstPresentToken {
179+
nextLeafNodeLength = nextToken.totalLength - nextToken.trailingTriviaLength
180+
}
155181
}
182+
let nodeAffectRange = ByteSourceRange(offset: node.position.utf8Offset,
183+
length: (node.raw.totalLength + nextLeafNodeLength).utf8Length)
156184

157185
for edit in edits {
158186
// Check if this node or the trivia of the next node has been edited. If
159187
// it has, we cannot reuse it.
160-
let nodeAffectRange = ByteSourceRange(offset: node.position.utf8Offset,
161-
length: node.byteSize + nextLeafNodeLength)
162188
if edit.range.offset > nodeAffectRange.endOffset {
163189
// Remaining edits don't affect the node. (Edits are sorted)
164190
break
@@ -188,3 +214,79 @@ public final class IncrementalParseTransition {
188214
return offset
189215
}
190216
}
217+
218+
/// Functions as an iterator that walks the tree looking for nodes with a
219+
/// certain position.
220+
fileprivate struct SyntaxCursor {
221+
var parents: [AbsoluteRawSyntax]
222+
var node: AbsoluteRawSyntax
223+
var finished: Bool
224+
225+
init(root: AbsoluteRawSyntax) {
226+
self.node = root
227+
self.parents = []
228+
self.finished = false
229+
}
230+
231+
var asSyntaxNode: SyntaxNode {
232+
return SyntaxNode(node: node, parents: ArraySlice(parents))
233+
}
234+
235+
/// Returns the next sibling node or the parent's sibling node if this is
236+
/// the last child. The cursor state is unmodified.
237+
/// - Returns: False if it run out of nodes to walk to.
238+
var nextSibling: AbsoluteRawSyntax? {
239+
var parents = ArraySlice(self.parents)
240+
var node = self.node
241+
while !parents.isEmpty {
242+
if let sibling = node.nextSibling(parent: parents.last!) {
243+
return sibling
244+
}
245+
node = parents.removeLast()
246+
}
247+
248+
return nil
249+
}
250+
251+
/// Moves to the first child of the current node.
252+
/// - Returns: False if the node has no children.
253+
mutating func advanceToFirstChild() -> Bool {
254+
guard let child = node.firstChild else { return false }
255+
parents.append(node)
256+
node = child
257+
return true
258+
}
259+
260+
/// Moves to the next sibling node or the parent's sibling node if this is
261+
/// the last child.
262+
/// - Returns: False if it run out of nodes to walk to.
263+
mutating func advanceToNextSibling() -> Bool {
264+
while !parents.isEmpty {
265+
if let sibling = node.nextSibling(parent: parents.last!) {
266+
node = sibling
267+
return true
268+
}
269+
node = parents.removeLast()
270+
}
271+
272+
finished = true
273+
return false
274+
}
275+
276+
/// Moves to the next node in the tree with the provided `position`.
277+
/// The caller should be calling this with `position`s in ascending order, not
278+
/// random ones.
279+
/// - Returns: True if it moved to a new node at the provided position,
280+
/// false if it moved to a node past the position or there are no more nodes.
281+
mutating func advanceToNextNode(at position: AbsolutePosition) -> Bool {
282+
repeat {
283+
// if the node is fully before the requested position we can skip its children.
284+
if node.endPosition > position {
285+
if advanceToFirstChild() { continue }
286+
}
287+
if !advanceToNextSibling() { return false }
288+
} while node.position < position
289+
290+
return node.position == position
291+
}
292+
}

Sources/SwiftSyntax/RawSyntax.swift

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -997,7 +997,7 @@ extension RawSyntax {
997997
}
998998
}
999999

1000-
extension RawSyntax: TextOutputStreamable {
1000+
extension RawSyntax: TextOutputStreamable, CustomStringConvertible {
10011001
/// Prints the RawSyntax node, and all of its children, to the provided
10021002
/// stream. This implementation must be source-accurate.
10031003
/// - Parameter stream: The stream on which to output this node.
@@ -1014,6 +1014,13 @@ extension RawSyntax: TextOutputStreamable {
10141014
}
10151015
}
10161016
}
1017+
1018+
/// A source-accurate description of this node.
1019+
var description: String {
1020+
var s = ""
1021+
self.write(to: &s)
1022+
return s
1023+
}
10171024
}
10181025

10191026
extension RawSyntax {

Sources/SwiftSyntax/Syntax.swift

Lines changed: 102 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -257,9 +257,7 @@ extension _SyntaxBase {
257257

258258
/// A source-accurate description of this node.
259259
public var description: String {
260-
var s = ""
261-
self.write(to: &s)
262-
return s
260+
return data.raw.description
263261
}
264262

265263
/// Prints the raw value of this node to the provided stream.
@@ -704,3 +702,104 @@ public struct ReversedTokenSequence: Sequence {
704702
return TokenSequence(node)
705703
}
706704
}
705+
706+
/// Represents a node from the syntax tree.
707+
///
708+
/// This is a more efficient representation than `Syntax` because it avoids casts
709+
/// to `Syntax` for representing the parent hierarchy.
710+
/// It provides generic information, like the node's position, range, and
711+
/// `uniqueIdentifier`, while still allowing getting the associated `Syntax`
712+
/// object if necessary.
713+
///
714+
/// `SyntaxParser` uses `SyntaxNode` to efficiently report which syntax nodes
715+
/// got re-used during incremental re-parsing.
716+
public struct SyntaxNode {
717+
let absoluteRaw: AbsoluteRawSyntax
718+
let parents: ArraySlice<AbsoluteRawSyntax>
719+
720+
internal init(node: AbsoluteRawSyntax, parents: ArraySlice<AbsoluteRawSyntax>) {
721+
self.absoluteRaw = node
722+
self.parents = parents
723+
}
724+
725+
internal var raw: RawSyntax {
726+
return absoluteRaw.raw
727+
}
728+
729+
/// Converts this node to a `SyntaxData` object.
730+
///
731+
/// This operation results in wrapping all of the node's parents into
732+
/// `_SyntaxBase` objects. There's a cost associated with it that should be taken
733+
/// into account before used inside performance critical code.
734+
internal var asSyntaxData: SyntaxData {
735+
return SyntaxData(absoluteRaw, parent: parent?.asSyntaxBase)
736+
}
737+
738+
/// Converts this node to a `_SyntaxBase` object.
739+
///
740+
/// This operation results in wrapping this node and all of its parents into
741+
/// `_SyntaxBase` objects. There's a cost associated with it that should be taken
742+
/// into account before used inside performance critical code.
743+
internal var asSyntaxBase: _SyntaxBase {
744+
return makeSyntax(asSyntaxData)
745+
}
746+
747+
/// Converts this node to a `Syntax` object.
748+
///
749+
/// This operation results in wrapping this node and all of its parents into
750+
/// `Syntax` objects. There's a cost associated with it that should be taken
751+
/// into account before used inside performance critical code.
752+
public var asSyntax: Syntax {
753+
return asSyntaxBase
754+
}
755+
756+
/// The parent of this syntax node, or `nil` if this node is the root.
757+
public var parent: SyntaxNode? {
758+
guard !parents.isEmpty else { return nil }
759+
return SyntaxNode(node: parents.last!, parents: parents.dropLast())
760+
}
761+
762+
/// The absolute position of the starting point of this node.
763+
public var position: AbsolutePosition {
764+
return absoluteRaw.position
765+
}
766+
767+
/// The end position of this node, including its trivia.
768+
public var endPosition: AbsolutePosition {
769+
return absoluteRaw.endPosition
770+
}
771+
772+
/// The textual byte length of this node including leading and trailing trivia.
773+
public var byteSize: Int {
774+
return totalLength.utf8Length
775+
}
776+
777+
/// The byte source range of this node including leading and trailing trivia.
778+
public var byteRange: ByteSourceRange {
779+
return ByteSourceRange(offset: position.utf8Offset, length: byteSize)
780+
}
781+
782+
/// The length of this node including all of its trivia.
783+
public var totalLength: SourceLength {
784+
return raw.totalLength
785+
}
786+
787+
/// Returns a value representing the unique identity of the node.
788+
public var uniqueIdentifier: SyntaxIdentifier {
789+
return absoluteRaw.info.nodeId
790+
}
791+
}
792+
793+
extension SyntaxNode: CustomStringConvertible, TextOutputStreamable {
794+
/// A source-accurate description of this node.
795+
public var description: String {
796+
return raw.description
797+
}
798+
799+
/// Prints the raw value of this node to the provided stream.
800+
/// - Parameter stream: The stream to which to print the raw tree.
801+
public func write<Target>(to target: inout Target)
802+
where Target: TextOutputStream {
803+
raw.write(to: &target)
804+
}
805+
}

0 commit comments

Comments
 (0)