Skip to content

Speed-up lookups for incremental re-parsing #101

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 4, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
168 changes: 135 additions & 33 deletions Sources/SwiftSyntax/IncrementalParseTransition.swift
Original file line number Diff line number Diff line change
Expand Up @@ -25,33 +25,28 @@ public protocol IncrementalParseReusedNodeDelegate {
/// - range: The source region of the currently parsed source.
/// - previousNode: The node from the previous tree that is associated with
/// the skipped source region.
func parserReusedNode(range: ByteSourceRange, previousNode: Syntax)
func parserReusedNode(range: ByteSourceRange, previousNode: SyntaxNode)
}

/// An implementation of `IncrementalParseReusedNodeDelegate` that just collects
/// the range and re-used node into an array.
public final class IncrementalParseReusedNodeCollector:
IncrementalParseReusedNodeDelegate {
public var rangeAndNodes: [(ByteSourceRange, Syntax)] = []
public var rangeAndNodes: [(ByteSourceRange, SyntaxNode)] = []

public init() {}

public func parserReusedNode(range: ByteSourceRange, previousNode: Syntax) {
public func parserReusedNode(range: ByteSourceRange, previousNode: SyntaxNode) {
rangeAndNodes.append((range, previousNode))
}
}

/// Keeps track of a previously parsed syntax tree and the source edits that
/// occurred since it was created and provides a mechanism for the parser to
/// skip regions of an incrementally updated source that was already parsed
/// during a previous parse invocation.
/// occurred since it was created.
public final class IncrementalParseTransition {
// The implementation is based on `SyntaxParsingCache` from the swift
// repository.

private let previousTree: SourceFileSyntax
private let edits: [SourceEdit]
private let reusedDelegate: IncrementalParseReusedNodeDelegate?
fileprivate let previousTree: SourceFileSyntax
fileprivate let edits: [SourceEdit]
fileprivate let reusedDelegate: IncrementalParseReusedNodeDelegate?

/// - Parameters:
/// - previousTree: The previous tree to do lookups on.
Expand Down Expand Up @@ -92,6 +87,26 @@ public final class IncrementalParseTransition {
}
return true
}
}

/// Provides a mechanism for the parser to skip regions of an incrementally
/// updated source that was already parsed during a previous parse invocation.
internal struct IncrementalParseLookup {
fileprivate let transition: IncrementalParseTransition
fileprivate var cursor: SyntaxCursor

init(transition: IncrementalParseTransition) {
self.transition = transition
self.cursor = .init(root: transition.previousTree.data.absoluteRaw)
}

fileprivate var edits: [SourceEdit] {
return transition.edits
}

fileprivate var reusedDelegate: IncrementalParseReusedNodeDelegate? {
return transition.reusedDelegate
}

/// Does a lookup to see if the current source `offset` should be associated
/// with a known `Syntax` node and its region skipped during parsing.
Expand All @@ -102,15 +117,15 @@ public final class IncrementalParseTransition {
/// - Parameters:
/// - offset: The byte offset of the source string that is currently parsed.
/// - kind: The `SyntaxKind` that the parser expects at this position.
/// - Returns: A `Syntax` node from the previous parse invocation,
/// - Returns: A `SyntaxNode` node from the previous parse invocation,
/// representing the contents of this region, if it is still valid
/// to re-use. `nil` otherwise.
func lookUp(_ newOffset: Int, kind: SyntaxKind) -> _SyntaxBase? {
mutating func lookUp(_ newOffset: Int, kind: SyntaxKind) -> SyntaxNode? {
guard let prevOffset = translateToPreEditOffset(newOffset) else {
return nil
}
let prevPosition = AbsolutePosition(utf8Offset: prevOffset)
let node = lookUpFrom(previousTree, prevPosition: prevPosition, kind: kind)
let node = cursorLookup(prevPosition: prevPosition, kind: kind)
if let delegate = reusedDelegate, let node = node {
delegate.parserReusedNode(
range: ByteSourceRange(offset: newOffset, length: node.byteSize),
Expand All @@ -119,46 +134,57 @@ public final class IncrementalParseTransition {
return node
}

fileprivate func lookUpFrom(
_ node: _SyntaxBase, prevPosition: AbsolutePosition, kind: SyntaxKind
) -> _SyntaxBase? {
if nodeCanBeReused(node, prevPosition: prevPosition, kind: kind) {
return node
}
mutating fileprivate func cursorLookup(
prevPosition: AbsolutePosition, kind: SyntaxKind
) -> SyntaxNode? {
guard !cursor.finished else { return nil }

for child in node.children {
if child.position <= prevPosition && prevPosition < child.endPosition {
return lookUpFrom(child, prevPosition: prevPosition, kind: kind)
while true {
if nodeAtCursorCanBeReused(prevPosition: prevPosition, kind: kind) {
return cursor.asSyntaxNode
}
guard cursor.advanceToNextNode(at: prevPosition) else { return nil }
}
return nil
}

fileprivate func nodeCanBeReused(
_ node: _SyntaxBase, prevPosition: AbsolutePosition, kind: SyntaxKind
fileprivate func nodeAtCursorCanBeReused(
prevPosition: AbsolutePosition, kind: SyntaxKind
) -> Bool {
let node = cursor.node
if node.position != prevPosition {
return false
}
if node.raw.kind != kind {
return false
}

// Fast path check: if parser is past all the edits then any matching node
// can be re-used.
if !edits.isEmpty && edits.last!.range.endOffset < node.position.utf8Offset {
return true;
}

// Node can also not be reused if an edit has been made in the next token's
// text, e.g. because `private struct Foo {}` parses as a CodeBlockItem with
// a StructDecl inside and `private struc Foo {}` parses as two
// CodeBlockItems one for `private` and one for `struc Foo {}`
var nextLeafNodeLength = 0
if let nextToken = node.nextToken {
assert(nextToken.isPresent)
nextLeafNodeLength = nextToken.byteSize - nextToken.trailingTriviaLength.utf8Length
var nextLeafNodeLength: SourceLength = .zero
if let nextSibling = cursor.nextSibling {
// Fast path check: if next sibling is before all the edits then we can
// re-use the node.
if !edits.isEmpty && edits.first!.range.offset > nextSibling.endPosition.utf8Offset {
return true;
}
if let nextToken = nextSibling.raw.firstPresentToken {
nextLeafNodeLength = nextToken.totalLength - nextToken.trailingTriviaLength
}
}
let nodeAffectRange = ByteSourceRange(offset: node.position.utf8Offset,
length: (node.raw.totalLength + nextLeafNodeLength).utf8Length)

for edit in edits {
// Check if this node or the trivia of the next node has been edited. If
// it has, we cannot reuse it.
let nodeAffectRange = ByteSourceRange(offset: node.position.utf8Offset,
length: node.byteSize + nextLeafNodeLength)
if edit.range.offset > nodeAffectRange.endOffset {
// Remaining edits don't affect the node. (Edits are sorted)
break
Expand Down Expand Up @@ -188,3 +214,79 @@ public final class IncrementalParseTransition {
return offset
}
}

/// Functions as an iterator that walks the tree looking for nodes with a
/// certain position.
fileprivate struct SyntaxCursor {
var parents: [AbsoluteRawSyntax]
var node: AbsoluteRawSyntax
var finished: Bool

init(root: AbsoluteRawSyntax) {
self.node = root
self.parents = []
self.finished = false
}

var asSyntaxNode: SyntaxNode {
return SyntaxNode(node: node, parents: ArraySlice(parents))
}

/// Returns the next sibling node or the parent's sibling node if this is
/// the last child. The cursor state is unmodified.
/// - Returns: False if it run out of nodes to walk to.
var nextSibling: AbsoluteRawSyntax? {
var parents = ArraySlice(self.parents)
var node = self.node
while !parents.isEmpty {
if let sibling = node.nextSibling(parent: parents.last!) {
return sibling
}
node = parents.removeLast()
}

return nil
}

/// Moves to the first child of the current node.
/// - Returns: False if the node has no children.
mutating func advanceToFirstChild() -> Bool {
guard let child = node.firstChild else { return false }
parents.append(node)
node = child
return true
}

/// Moves to the next sibling node or the parent's sibling node if this is
/// the last child.
/// - Returns: False if it run out of nodes to walk to.
mutating func advanceToNextSibling() -> Bool {
while !parents.isEmpty {
if let sibling = node.nextSibling(parent: parents.last!) {
node = sibling
return true
}
node = parents.removeLast()
}

finished = true
return false
}

/// Moves to the next node in the tree with the provided `position`.
/// The caller should be calling this with `position`s in ascending order, not
/// random ones.
/// - Returns: True if it moved to a new node at the provided position,
/// false if it moved to a node past the position or there are no more nodes.
mutating func advanceToNextNode(at position: AbsolutePosition) -> Bool {
repeat {
// if the node is fully before the requested position we can skip its children.
if node.endPosition > position {
if advanceToFirstChild() { continue }
}
if !advanceToNextSibling() { return false }
} while node.position < position

return node.position == position
}
}
9 changes: 8 additions & 1 deletion Sources/SwiftSyntax/RawSyntax.swift
Original file line number Diff line number Diff line change
Expand Up @@ -997,7 +997,7 @@ extension RawSyntax {
}
}

extension RawSyntax: TextOutputStreamable {
extension RawSyntax: TextOutputStreamable, CustomStringConvertible {
/// Prints the RawSyntax node, and all of its children, to the provided
/// stream. This implementation must be source-accurate.
/// - Parameter stream: The stream on which to output this node.
Expand All @@ -1014,6 +1014,13 @@ extension RawSyntax: TextOutputStreamable {
}
}
}

/// A source-accurate description of this node.
var description: String {
var s = ""
self.write(to: &s)
return s
}
}

extension RawSyntax {
Expand Down
105 changes: 102 additions & 3 deletions Sources/SwiftSyntax/Syntax.swift
Original file line number Diff line number Diff line change
Expand Up @@ -257,9 +257,7 @@ extension _SyntaxBase {

/// A source-accurate description of this node.
public var description: String {
var s = ""
self.write(to: &s)
return s
return data.raw.description
}

/// Prints the raw value of this node to the provided stream.
Expand Down Expand Up @@ -704,3 +702,104 @@ public struct ReversedTokenSequence: Sequence {
return TokenSequence(node)
}
}

/// Represents a node from the syntax tree.
///
/// This is a more efficient representation than `Syntax` because it avoids casts
/// to `Syntax` for representing the parent hierarchy.
/// It provides generic information, like the node's position, range, and
/// `uniqueIdentifier`, while still allowing getting the associated `Syntax`
/// object if necessary.
///
/// `SyntaxParser` uses `SyntaxNode` to efficiently report which syntax nodes
/// got re-used during incremental re-parsing.
public struct SyntaxNode {
let absoluteRaw: AbsoluteRawSyntax
let parents: ArraySlice<AbsoluteRawSyntax>

internal init(node: AbsoluteRawSyntax, parents: ArraySlice<AbsoluteRawSyntax>) {
self.absoluteRaw = node
self.parents = parents
}

internal var raw: RawSyntax {
return absoluteRaw.raw
}

/// Converts this node to a `SyntaxData` object.
///
/// This operation results in wrapping all of the node's parents into
/// `_SyntaxBase` objects. There's a cost associated with it that should be taken
/// into account before used inside performance critical code.
internal var asSyntaxData: SyntaxData {
return SyntaxData(absoluteRaw, parent: parent?.asSyntaxBase)
}

/// Converts this node to a `_SyntaxBase` object.
///
/// This operation results in wrapping this node and all of its parents into
/// `_SyntaxBase` objects. There's a cost associated with it that should be taken
/// into account before used inside performance critical code.
internal var asSyntaxBase: _SyntaxBase {
return makeSyntax(asSyntaxData)
}

/// Converts this node to a `Syntax` object.
///
/// This operation results in wrapping this node and all of its parents into
/// `Syntax` objects. There's a cost associated with it that should be taken
/// into account before used inside performance critical code.
public var asSyntax: Syntax {
return asSyntaxBase
}

/// The parent of this syntax node, or `nil` if this node is the root.
public var parent: SyntaxNode? {
guard !parents.isEmpty else { return nil }
return SyntaxNode(node: parents.last!, parents: parents.dropLast())
}

/// The absolute position of the starting point of this node.
public var position: AbsolutePosition {
return absoluteRaw.position
}

/// The end position of this node, including its trivia.
public var endPosition: AbsolutePosition {
return absoluteRaw.endPosition
}

/// The textual byte length of this node including leading and trailing trivia.
public var byteSize: Int {
return totalLength.utf8Length
}

/// The byte source range of this node including leading and trailing trivia.
public var byteRange: ByteSourceRange {
return ByteSourceRange(offset: position.utf8Offset, length: byteSize)
}

/// The length of this node including all of its trivia.
public var totalLength: SourceLength {
return raw.totalLength
}

/// Returns a value representing the unique identity of the node.
public var uniqueIdentifier: SyntaxIdentifier {
return absoluteRaw.info.nodeId
}
}

extension SyntaxNode: CustomStringConvertible, TextOutputStreamable {
/// A source-accurate description of this node.
public var description: String {
return raw.description
}

/// Prints the raw value of this node to the provided stream.
/// - Parameter stream: The stream to which to print the raw tree.
public func write<Target>(to target: inout Target)
where Target: TextOutputStream {
raw.write(to: &target)
}
}
Loading