Skip to content

Eliminate parser recovery based on errorTokens in CodeBlock #672

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Sources/SwiftParser/Declarations.swift
Original file line number Diff line number Diff line change
Expand Up @@ -1530,8 +1530,8 @@ extension Parser {
}

var body = [RawCodeBlockItemSyntax]()
while !self.at(.eof) && !self.at(.rightBrace) {
body.append(self.parseCodeBlockItem())
while !self.at(.rightBrace), let newItem = self.parseCodeBlockItem() {
body.append(newItem)
}
let (unexpectedBeforeRBrace, rbrace) = self.expect(.rightBrace)
return RawSyntax(RawCodeBlockSyntax(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
//===----------------------------------------------------------------------===//

import SwiftDiagnostics
import SwiftSyntax
@_spi(RawSyntax) import SwiftSyntax

extension UnexpectedNodesSyntax {
func tokens(satisfying isIncluded: (TokenSyntax) -> Bool) -> [TokenSyntax] {
Expand Down Expand Up @@ -58,6 +58,9 @@ public class ParseDiagnosticsGenerator: SyntaxAnyVisitor {
) -> [Diagnostic] {
let diagProducer = ParseDiagnosticsGenerator()
diagProducer.walk(tree)
diagProducer.diagnostics.sort {
return $0.node.id.indexInTree < $1.node.id.indexInTree
}
return diagProducer.diagnostics
}

Expand Down Expand Up @@ -172,6 +175,17 @@ public class ParseDiagnosticsGenerator: SyntaxAnyVisitor {
return .visitChildren
}

public override func visit(_ node: SourceFileSyntax) -> SyntaxVisitorContinueKind {
if shouldSkip(node) {
return .skipChildren
}
if let extraneous = node.unexpectedBetweenStatementsAndEOFToken, !extraneous.isEmpty {
addDiagnostic(extraneous, ExtaneousCodeAtTopLevel(extraneousCode: extraneous))
markNodesAsHandled(extraneous.id)
}
return .visitChildren
}

public override func visit(_ node: UnresolvedTernaryExprSyntax) -> SyntaxVisitorContinueKind {
if shouldSkip(node) {
return .skipChildren
Expand Down
15 changes: 15 additions & 0 deletions Sources/SwiftParser/Diagnostics/ParserDiagnosticMessages.swift
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,21 @@ public enum StaticParserFixIt: String, FixItMessage {

// MARK: - Diagnostics (please sort alphabetically)

public struct ExtaneousCodeAtTopLevel: ParserError {
public let extraneousCode: UnexpectedNodesSyntax

public var message: String {
let extraneousCodeStr = extraneousCode.withoutLeadingTrivia().withoutTrailingTrivia().description
// If the extraneous code is multi-line or long (100 is in arbitrarily chosen value),
// it just spams the diagnostic. Just show a generic diagnostic in this case.
if extraneousCodeStr.contains("\n") || extraneousCodeStr.count > 100 {
return "Extraneous code at top level"
} else {
return "Extraneous '\(extraneousCodeStr)' at top level"
}
}
}

public struct MissingTokenError: ParserError {
public let missingToken: TokenSyntax

Expand Down
4 changes: 2 additions & 2 deletions Sources/SwiftParser/Expressions.swift
Original file line number Diff line number Diff line change
Expand Up @@ -1547,8 +1547,8 @@ extension Parser {
// Parse the body.
var elements = [RawCodeBlockItemSyntax]()
do {
while !self.at(.eof) && !self.at(.rightBrace) {
elements.append(self.parseCodeBlockItem())
while !self.at(.rightBrace), let newItem = self.parseCodeBlockItem() {
elements.append(newItem)
}
}

Expand Down
51 changes: 0 additions & 51 deletions Sources/SwiftParser/Recovery.swift
Original file line number Diff line number Diff line change
Expand Up @@ -107,57 +107,6 @@ extension Parser {
}
}

extension Parser {
/// A recovery function that recovers from a number of special cases for syntax
/// elements that cannot possibly be the start of items.
///
/// This function is intended to be called at the start of item parsing so
/// that future calls to item parsing will have a better shot at succeeding
/// without necessarily invoking the general purpose recovery
/// mechanism.
///
/// - Returns: A syntax node capturing the result of recovering from a bad
/// item parse, or `nil` if recovery did not occur.
mutating func recoverFromBadItem() -> RawCodeBlockItemSyntax? {
if let extraRightBrace = self.consume(if: .rightBrace) {
// If we see an extraneous right brace, we need to make progress by
// eating it. The legacy parser forms an unknown stmt kind here, so
// we match it.
let missingStmt = RawMissingStmtSyntax(arena: self.arena)
return RawCodeBlockItemSyntax(
item: RawSyntax(missingStmt),
semicolon: nil,
errorTokens: RawSyntax(RawNonEmptyTokenListSyntax(elements: [ extraRightBrace ], arena: self.arena)),
arena: self.arena)
} else if self.at(.caseKeyword) || self.at(.defaultKeyword) {
// If there's a case or default label at the top level then the user
// has tried to write one outside of the scope of a switch statement.
// Recover up to the next braced block.
let missingStmt = RawMissingStmtSyntax(arena: self.arena)
let extraTokens = self.recover()
return RawCodeBlockItemSyntax(
item: RawSyntax(missingStmt),
semicolon: nil,
errorTokens: RawSyntax(RawNonEmptyTokenListSyntax(elements: extraTokens, arena: self.arena)),
arena: self.arena)
} else if self.at(.poundElseKeyword) || self.at(.poundElseifKeyword)
|| self.at(.poundEndifKeyword) {
// In the case of a catastrophic parse error, consume any trailing
// #else, #elseif, or #endif and move on to the next statement or
// declaration block.
let token = self.consumeAnyToken()
// Create 'MissingDecl' for orphan directives.
return RawCodeBlockItemSyntax(
item: RawSyntax(RawMissingDeclSyntax(attributes: nil, modifiers: nil, arena: self.arena)),
semicolon: nil,
errorTokens: RawSyntax(RawNonEmptyTokenListSyntax(elements: [ token ], arena: self.arena)),
arena: self.arena)
} else {
return nil
}
}
}

// MARK: Lookahead

extension Parser.Lookahead {
Expand Down
8 changes: 4 additions & 4 deletions Sources/SwiftParser/Statements.swift
Original file line number Diff line number Diff line change
Expand Up @@ -681,13 +681,13 @@ extension Parser {
let statements: RawCodeBlockItemListSyntax
do {
var items = [RawCodeBlockItemSyntax]()
while !self.at(.eof) &&
!self.at(.rightBrace) &&
while !self.at(.rightBrace) &&
!self.at(.poundEndifKeyword) &&
!self.at(.poundElseifKeyword) &&
!self.at(.poundElseKeyword) &&
!self.lookahead().isStartOfConditionalSwitchCases() {
items.append(self.parseCodeBlockItem())
!self.lookahead().isStartOfConditionalSwitchCases(),
let newItem = self.parseCodeBlockItem() {
items.append(newItem)
}
statements = RawCodeBlockItemListSyntax(elements: items, arena: self.arena)
}
Expand Down
58 changes: 16 additions & 42 deletions Sources/SwiftParser/TopLevel.swift
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,13 @@ extension Parser {
@_spi(RawSyntax)
public mutating func parseSourceFile() -> RawSourceFileSyntax {
let items = self.parseTopLevelCodeBlockItems()
var extraneousTokens = [RawSyntax]()
while currentToken.tokenKind != .eof {
extraneousTokens.append(RawSyntax(consumeAnyToken()))
}
let unexpectedBeforeEof = extraneousTokens.isEmpty ? nil : RawUnexpectedNodesSyntax(elements: extraneousTokens, arena: self.arena)
let eof = self.eat(.eof)
return .init(statements: items, eofToken: eof, arena: self.arena)
return .init(statements: items, unexpectedBeforeEof, eofToken: eof, arena: self.arena)
}
}

Expand All @@ -40,8 +45,8 @@ extension Parser {
/// top-level-declaration → statements?
mutating func parseTopLevelCodeBlockItems() -> RawCodeBlockItemListSyntax {
var elements = [RawCodeBlockItemSyntax]()
while !self.at(.eof) {
elements.append(self.parseCodeBlockItem())
while let newElement = self.parseCodeBlockItem() {
elements.append(newElement)
}
return .init(elements: elements, arena: self.arena)
}
Expand All @@ -67,8 +72,8 @@ extension Parser {
mutating func parseCodeBlock() -> RawCodeBlockSyntax {
let (unexpectedBeforeLBrace, lbrace) = self.expect(.leftBrace)
var items = [RawCodeBlockItemSyntax]()
while !self.at(.eof) && !self.at(.rightBrace) {
items.append(self.parseCodeBlockItem())
while !self.at(.rightBrace), let newItem = self.parseCodeBlockItem() {
items.append(newItem)
}
let (unexpectedBeforeRBrace, rbrace) = self.expect(.rightBrace)

Expand All @@ -89,9 +94,8 @@ extension Parser {

/// Parse an individual item - either in a code block or at the top level.
///
/// This function performs the majority of recovery because it
/// is both the first and last opportunity the parser has to examine the
/// input stream before encountering a closing delimiter or the end of input.
/// Returns `nil` if the parser did not consume any tokens while trying to
/// parse the code block item.
///
/// Grammar
/// =======
Expand All @@ -107,46 +111,16 @@ extension Parser {
/// statement → compiler-control-statement
/// statements → statement statements?
@_spi(RawSyntax)
public mutating func parseCodeBlockItem() -> RawCodeBlockItemSyntax {
public mutating func parseCodeBlockItem() -> RawCodeBlockItemSyntax? {
// FIXME: It is unfortunate that the Swift book refers to these as
// "statements" and not "items".
if let recovery = self.recoverFromBadItem() {
return recovery
}

let item = self.parseItem()
let semi = self.consume(if: .semicolon)

let errorTokens: RawSyntax?
if item.is(RawMissingExprSyntax.self) || item.is(RawMissingStmtSyntax.self) {
var elements = [RawTokenSyntax]()
if self.at(.atSign) {
// Recover from erroneously placed attribute.
elements.append(self.eat(.atSign))
if self.currentToken.isIdentifier {
elements.append(self.consumeAnyToken())
}
}

while
!self.at(.eof),
!self.at(.rightBrace),
!self.at(.poundIfKeyword), !self.at(.poundElseKeyword),
!self.at(.poundElseifKeyword),
!self.lookahead().isStartOfStatement(),
!self.lookahead().isStartOfDeclaration()
{
let tokens = self.recover()
guard !tokens.isEmpty else {
break
}
elements.append(contentsOf: tokens)
}
errorTokens = RawSyntax(RawNonEmptyTokenListSyntax(elements: elements, arena: self.arena))
} else {
errorTokens = nil
if item.raw.byteLength == 0 && semi == nil {
return nil
}
return .init(item: item, semicolon: semi, errorTokens: errorTokens, arena: self.arena)
return .init(item: item, semicolon: semi, errorTokens: nil, arena: self.arena)
}

private mutating func parseItem() -> RawSyntax {
Expand Down
6 changes: 4 additions & 2 deletions Sources/SwiftSyntax/Raw/RawSyntax.swift
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,8 @@ public struct RawSyntax {

extension RawSyntax {
/// The syntax kind of this raw syntax.
var kind: SyntaxKind {
@_spi(RawSyntax)
public var kind: SyntaxKind {
switch rawData.payload {
case .parsedToken(_): return .token
case .materializedToken(_): return .token
Expand Down Expand Up @@ -187,7 +188,8 @@ extension RawSyntax {
/// The "width" of the node.
///
/// Sum of text byte lengths of all present descendant token nodes.
var byteLength: Int {
@_spi(RawSyntax)
public var byteLength: Int {
switch rawData.payload {
case .parsedToken(let dat):
if dat.presence == .present {
Expand Down
10 changes: 8 additions & 2 deletions Sources/SwiftSyntax/SyntaxData.swift
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ struct AbsoluteSyntaxInfo {
}

/// Represents a unique value for a node within its own tree.
struct SyntaxIndexInTree: Hashable {
@_spi(RawSyntax)
public struct SyntaxIndexInTree: Comparable, Hashable {
let indexInTree: UInt32

static var zero: SyntaxIndexInTree = SyntaxIndexInTree(indexInTree: 0)
Expand Down Expand Up @@ -99,6 +100,10 @@ struct SyntaxIndexInTree: Hashable {
init(indexInTree: UInt32) {
self.indexInTree = indexInTree
}

public static func < (lhs: SyntaxIndexInTree, rhs: SyntaxIndexInTree) -> Bool {
return lhs.indexInTree < rhs.indexInTree
}
}

/// Provides a stable and unique identity for `Syntax` nodes.
Expand All @@ -111,7 +116,8 @@ public struct SyntaxIdentifier: Hashable {
/// might still have different 'rootId's.
let rootId: UInt
/// Unique value for a node within its own tree.
let indexInTree: SyntaxIndexInTree
@_spi(RawSyntax)
public let indexInTree: SyntaxIndexInTree

func advancedBySibling(_ raw: RawSyntax?) -> SyntaxIdentifier {
let newIndexInTree = indexInTree.advancedBy(raw)
Expand Down
25 changes: 11 additions & 14 deletions Tests/SwiftParserTest/Assertions.swift
Original file line number Diff line number Diff line change
Expand Up @@ -118,16 +118,11 @@ func AssertDiagnostic<T: SyntaxProtocol>(
let actualColumn = actualLocation.column,
let expectedLine = expectedLocation.line,
let expectedColumn = expectedLocation.column {
XCTAssertEqual(
actualLine, expectedLine,
"Expected diagnostic on line \(expectedLine) but got \(actualLine)",
file: file, line: line
)
XCTAssertEqual(
actualColumn, expectedColumn,
"Expected diagnostic on column \(expectedColumn) but got \(actualColumn)",
file: file, line: line
)
if actualLine != expectedLine || actualColumn != expectedColumn {
XCTFail("Expected diagnostic on \(expectedLine):\(expectedColumn) but got \(actualLine):\(actualColumn)",
file: file, line: line
)
}
} else {
XCTFail("Failed to resolve diagnostic location to line/column", file: file, line: line)
}
Expand Down Expand Up @@ -198,10 +193,12 @@ func AssertParse<Node: RawSyntaxNodeProtocol>(

// Diagnostics
let diags = ParseDiagnosticsGenerator.diagnostics(for: tree)
XCTAssertEqual(diags.count, expectedDiagnostics.count, """
Expected \(expectedDiagnostics.count) diagnostics but received \(diags.count):
\(diags.map(\.debugDescription).joined(separator: "\n"))
""", file: file, line: line)
if diags.count != expectedDiagnostics.count {
XCTFail("""
Expected \(expectedDiagnostics.count) diagnostics but received \(diags.count):
\(diags.map(\.debugDescription).joined(separator: "\n"))
""", file: file, line: line)
}
for (diag, expectedDiag) in zip(diags, expectedDiagnostics) {
AssertDiagnostic(diag, in: tree, markerLocations: markerLocations, expected: expectedDiag, file: file, line: line)
}
Expand Down
Loading