Skip to content

Commit 5364927

Browse files
committed
Eliminate parser recovery based on errorTokens in CodeBlock
1 parent aadd366 commit 5364927

13 files changed

+132
-131
lines changed

Sources/SwiftParser/Declarations.swift

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1531,7 +1531,11 @@ extension Parser {
15311531

15321532
var body = [RawCodeBlockItemSyntax]()
15331533
while !self.at(.eof) && !self.at(.rightBrace) {
1534-
body.append(self.parseCodeBlockItem())
1534+
if let newItem = self.parseCodeBlockItem() {
1535+
body.append(newItem)
1536+
} else {
1537+
break
1538+
}
15351539
}
15361540
let (unexpectedBeforeRBrace, rbrace) = self.expect(.rightBrace)
15371541
return RawSyntax(RawCodeBlockSyntax(

Sources/SwiftParser/Diagnostics/ParseDiagnosticsGenerator.swift

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
import SwiftDiagnostics
14-
import SwiftSyntax
14+
@_spi(RawSyntax) import SwiftSyntax
1515

1616
extension UnexpectedNodesSyntax {
1717
func tokens(satisfying isIncluded: (TokenSyntax) -> Bool) -> [TokenSyntax] {
@@ -58,6 +58,9 @@ public class ParseDiagnosticsGenerator: SyntaxAnyVisitor {
5858
) -> [Diagnostic] {
5959
let diagProducer = ParseDiagnosticsGenerator()
6060
diagProducer.walk(tree)
61+
diagProducer.diagnostics.sort {
62+
return $0.node.id.indexInTree < $1.node.id.indexInTree
63+
}
6164
return diagProducer.diagnostics
6265
}
6366

@@ -172,6 +175,17 @@ public class ParseDiagnosticsGenerator: SyntaxAnyVisitor {
172175
return .visitChildren
173176
}
174177

178+
public override func visit(_ node: SourceFileSyntax) -> SyntaxVisitorContinueKind {
179+
if shouldSkip(node) {
180+
return .skipChildren
181+
}
182+
if let extraneous = node.unexpectedBetweenStatementsAndEOFToken, !extraneous.isEmpty {
183+
addDiagnostic(extraneous, ExtaneousCodeAtTopLevel(extraneousCode: extraneous))
184+
markNodesAsHandled(extraneous.id)
185+
}
186+
return .visitChildren
187+
}
188+
175189
public override func visit(_ node: UnresolvedTernaryExprSyntax) -> SyntaxVisitorContinueKind {
176190
if shouldSkip(node) {
177191
return .skipChildren

Sources/SwiftParser/Diagnostics/ParserDiagnosticMessages.swift

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,21 @@ public enum StaticParserFixIt: String, FixItMessage {
9090

9191
// MARK: - Diagnostics (please sort alphabetically)
9292

93+
public struct ExtaneousCodeAtTopLevel: ParserError {
94+
public let extraneousCode: UnexpectedNodesSyntax
95+
96+
public var message: String {
97+
let extraneousCodeStr = extraneousCode.withoutLeadingTrivia().withoutTrailingTrivia().description
98+
// If the extraneous code is multi-line or long (100 is in arbitrarily chosen value),
99+
// it just spams the diagnostic. Just show a generic diagnostic in this case.
100+
if extraneousCodeStr.contains("\n") || extraneousCodeStr.count > 100 {
101+
return "Extraneous code at top level"
102+
} else {
103+
return "Extraneous '\(extraneousCodeStr)' at top level"
104+
}
105+
}
106+
}
107+
93108
public struct MissingTokenError: ParserError {
94109
public let missingToken: TokenSyntax
95110

Sources/SwiftParser/Expressions.swift

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1548,7 +1548,11 @@ extension Parser {
15481548
var elements = [RawCodeBlockItemSyntax]()
15491549
do {
15501550
while !self.at(.eof) && !self.at(.rightBrace) {
1551-
elements.append(self.parseCodeBlockItem())
1551+
if let newItem = self.parseCodeBlockItem() {
1552+
elements.append(newItem)
1553+
} else {
1554+
break
1555+
}
15521556
}
15531557
}
15541558

Sources/SwiftParser/Recovery.swift

Lines changed: 0 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -107,57 +107,6 @@ extension Parser {
107107
}
108108
}
109109

110-
extension Parser {
111-
/// A recovery function that recovers from a number of special cases for syntax
112-
/// elements that cannot possibly be the start of items.
113-
///
114-
/// This function is intended to be called at the start of item parsing so
115-
/// that future calls to item parsing will have a better shot at succeeding
116-
/// without necessarily invoking the general purpose recovery
117-
/// mechanism.
118-
///
119-
/// - Returns: A syntax node capturing the result of recovering from a bad
120-
/// item parse, or `nil` if recovery did not occur.
121-
mutating func recoverFromBadItem() -> RawCodeBlockItemSyntax? {
122-
if let extraRightBrace = self.consume(if: .rightBrace) {
123-
// If we see an extraneous right brace, we need to make progress by
124-
// eating it. The legacy parser forms an unknown stmt kind here, so
125-
// we match it.
126-
let missingStmt = RawMissingStmtSyntax(arena: self.arena)
127-
return RawCodeBlockItemSyntax(
128-
item: RawSyntax(missingStmt),
129-
semicolon: nil,
130-
errorTokens: RawSyntax(RawNonEmptyTokenListSyntax(elements: [ extraRightBrace ], arena: self.arena)),
131-
arena: self.arena)
132-
} else if self.at(.caseKeyword) || self.at(.defaultKeyword) {
133-
// If there's a case or default label at the top level then the user
134-
// has tried to write one outside of the scope of a switch statement.
135-
// Recover up to the next braced block.
136-
let missingStmt = RawMissingStmtSyntax(arena: self.arena)
137-
let extraTokens = self.recover()
138-
return RawCodeBlockItemSyntax(
139-
item: RawSyntax(missingStmt),
140-
semicolon: nil,
141-
errorTokens: RawSyntax(RawNonEmptyTokenListSyntax(elements: extraTokens, arena: self.arena)),
142-
arena: self.arena)
143-
} else if self.at(.poundElseKeyword) || self.at(.poundElseifKeyword)
144-
|| self.at(.poundEndifKeyword) {
145-
// In the case of a catastrophic parse error, consume any trailing
146-
// #else, #elseif, or #endif and move on to the next statement or
147-
// declaration block.
148-
let token = self.consumeAnyToken()
149-
// Create 'MissingDecl' for orphan directives.
150-
return RawCodeBlockItemSyntax(
151-
item: RawSyntax(RawMissingDeclSyntax(attributes: nil, modifiers: nil, arena: self.arena)),
152-
semicolon: nil,
153-
errorTokens: RawSyntax(RawNonEmptyTokenListSyntax(elements: [ token ], arena: self.arena)),
154-
arena: self.arena)
155-
} else {
156-
return nil
157-
}
158-
}
159-
}
160-
161110
// MARK: Lookahead
162111

163112
extension Parser.Lookahead {

Sources/SwiftParser/Statements.swift

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -687,7 +687,11 @@ extension Parser {
687687
!self.at(.poundElseifKeyword) &&
688688
!self.at(.poundElseKeyword) &&
689689
!self.lookahead().isStartOfConditionalSwitchCases() {
690-
items.append(self.parseCodeBlockItem())
690+
if let newItem = self.parseCodeBlockItem() {
691+
items.append(newItem)
692+
} else {
693+
break
694+
}
691695
}
692696
statements = RawCodeBlockItemListSyntax(elements: items, arena: self.arena)
693697
}

Sources/SwiftParser/TopLevel.swift

Lines changed: 22 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,13 @@ extension Parser {
2626
@_spi(RawSyntax)
2727
public mutating func parseSourceFile() -> RawSourceFileSyntax {
2828
let items = self.parseTopLevelCodeBlockItems()
29+
var extraneousTokens = [RawSyntax]()
30+
while currentToken.tokenKind != .eof {
31+
extraneousTokens.append(RawSyntax(consumeAnyToken()))
32+
}
33+
let unexpectedBeforeEof = extraneousTokens.isEmpty ? nil : RawUnexpectedNodesSyntax(elements: extraneousTokens, arena: self.arena)
2934
let eof = self.eat(.eof)
30-
return .init(statements: items, eofToken: eof, arena: self.arena)
35+
return .init(statements: items, unexpectedBeforeEof, eofToken: eof, arena: self.arena)
3136
}
3237
}
3338

@@ -41,7 +46,11 @@ extension Parser {
4146
mutating func parseTopLevelCodeBlockItems() -> RawCodeBlockItemListSyntax {
4247
var elements = [RawCodeBlockItemSyntax]()
4348
while !self.at(.eof) {
44-
elements.append(self.parseCodeBlockItem())
49+
if let newElement = self.parseCodeBlockItem() {
50+
elements.append(newElement)
51+
} else {
52+
break
53+
}
4554
}
4655
return .init(elements: elements, arena: self.arena)
4756
}
@@ -68,7 +77,11 @@ extension Parser {
6877
let (unexpectedBeforeLBrace, lbrace) = self.expect(.leftBrace)
6978
var items = [RawCodeBlockItemSyntax]()
7079
while !self.at(.eof) && !self.at(.rightBrace) {
71-
items.append(self.parseCodeBlockItem())
80+
if let newItem = self.parseCodeBlockItem() {
81+
items.append(newItem)
82+
} else {
83+
break
84+
}
7285
}
7386
let (unexpectedBeforeRBrace, rbrace) = self.expect(.rightBrace)
7487

@@ -89,9 +102,8 @@ extension Parser {
89102

90103
/// Parse an individual item - either in a code block or at the top level.
91104
///
92-
/// This function performs the majority of recovery because it
93-
/// is both the first and last opportunity the parser has to examine the
94-
/// input stream before encountering a closing delimiter or the end of input.
105+
/// Returns `nil` if the parser did not consume any tokens while trying to
106+
/// parse the code block item.
95107
///
96108
/// Grammar
97109
/// =======
@@ -107,46 +119,16 @@ extension Parser {
107119
/// statement → compiler-control-statement
108120
/// statements → statement statements?
109121
@_spi(RawSyntax)
110-
public mutating func parseCodeBlockItem() -> RawCodeBlockItemSyntax {
122+
public mutating func parseCodeBlockItem() -> RawCodeBlockItemSyntax? {
111123
// FIXME: It is unfortunate that the Swift book refers to these as
112124
// "statements" and not "items".
113-
if let recovery = self.recoverFromBadItem() {
114-
return recovery
115-
}
116-
117125
let item = self.parseItem()
118126
let semi = self.consume(if: .semicolon)
119127

120-
let errorTokens: RawSyntax?
121-
if item.is(RawMissingExprSyntax.self) || item.is(RawMissingStmtSyntax.self) {
122-
var elements = [RawTokenSyntax]()
123-
if self.at(.atSign) {
124-
// Recover from erroneously placed attribute.
125-
elements.append(self.eat(.atSign))
126-
if self.currentToken.isIdentifier {
127-
elements.append(self.consumeAnyToken())
128-
}
129-
}
130-
131-
while
132-
!self.at(.eof),
133-
!self.at(.rightBrace),
134-
!self.at(.poundIfKeyword), !self.at(.poundElseKeyword),
135-
!self.at(.poundElseifKeyword),
136-
!self.lookahead().isStartOfStatement(),
137-
!self.lookahead().isStartOfDeclaration()
138-
{
139-
let tokens = self.recover()
140-
guard !tokens.isEmpty else {
141-
break
142-
}
143-
elements.append(contentsOf: tokens)
144-
}
145-
errorTokens = RawSyntax(RawNonEmptyTokenListSyntax(elements: elements, arena: self.arena))
146-
} else {
147-
errorTokens = nil
128+
if item.raw.byteLength == 0 && semi == nil {
129+
return nil
148130
}
149-
return .init(item: item, semicolon: semi, errorTokens: errorTokens, arena: self.arena)
131+
return .init(item: item, semicolon: semi, errorTokens: nil, arena: self.arena)
150132
}
151133

152134
private mutating func parseItem() -> RawSyntax {

Sources/SwiftSyntax/Raw/RawSyntax.swift

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,8 @@ public struct RawSyntax {
137137

138138
extension RawSyntax {
139139
/// The syntax kind of this raw syntax.
140-
var kind: SyntaxKind {
140+
@_spi(RawSyntax)
141+
public var kind: SyntaxKind {
141142
switch rawData.payload {
142143
case .parsedToken(_): return .token
143144
case .materializedToken(_): return .token
@@ -187,7 +188,8 @@ extension RawSyntax {
187188
/// The "width" of the node.
188189
///
189190
/// Sum of text byte lengths of all present descendant token nodes.
190-
var byteLength: Int {
191+
@_spi(RawSyntax)
192+
public var byteLength: Int {
191193
switch rawData.payload {
192194
case .parsedToken(let dat):
193195
if dat.presence == .present {

Sources/SwiftSyntax/SyntaxData.swift

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,8 @@ struct AbsoluteSyntaxInfo {
7070
}
7171

7272
/// Represents a unique value for a node within its own tree.
73-
struct SyntaxIndexInTree: Hashable {
73+
@_spi(RawSyntax)
74+
public struct SyntaxIndexInTree: Comparable, Hashable {
7475
let indexInTree: UInt32
7576

7677
static var zero: SyntaxIndexInTree = SyntaxIndexInTree(indexInTree: 0)
@@ -99,6 +100,10 @@ struct SyntaxIndexInTree: Hashable {
99100
init(indexInTree: UInt32) {
100101
self.indexInTree = indexInTree
101102
}
103+
104+
public static func < (lhs: SyntaxIndexInTree, rhs: SyntaxIndexInTree) -> Bool {
105+
return lhs.indexInTree < rhs.indexInTree
106+
}
102107
}
103108

104109
/// Provides a stable and unique identity for `Syntax` nodes.
@@ -111,7 +116,8 @@ public struct SyntaxIdentifier: Hashable {
111116
/// might still have different 'rootId's.
112117
let rootId: UInt
113118
/// Unique value for a node within its own tree.
114-
let indexInTree: SyntaxIndexInTree
119+
@_spi(RawSyntax)
120+
public let indexInTree: SyntaxIndexInTree
115121

116122
func advancedBySibling(_ raw: RawSyntax?) -> SyntaxIdentifier {
117123
let newIndexInTree = indexInTree.advancedBy(raw)

Tests/SwiftParserTest/Declarations.swift

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -438,8 +438,12 @@ final class DeclarationTests: XCTestCase {
438438
}
439439

440440
func testExtraneousRightBraceRecovery() {
441-
// FIXME: This test case should produce a diagnostics
442-
AssertParse("class ABC { let def = ghi(jkl: mno) } }")
441+
AssertParse(
442+
"class ABC { let def = ghi(jkl: mno) } #^DIAG^#}",
443+
diagnostics: [
444+
DiagnosticSpec(message: "Extraneous '}' at top level")
445+
]
446+
)
443447
}
444448

445449
func testMissingSubscriptReturnClause() {
@@ -472,6 +476,7 @@ final class DeclarationTests: XCTestCase {
472476
DiagnosticSpec(message: "Expected '' in class"),
473477
DiagnosticSpec(message: "Expected '{' to start class"),
474478
DiagnosticSpec(message: "Expected '}' to end class"),
479+
DiagnosticSpec(message: "Extraneous code at top level"),
475480
]
476481
)
477482
}
@@ -527,11 +532,10 @@ final class DeclarationTests: XCTestCase {
527532
func testTextRecovery() {
528533
AssertParse(
529534
"""
530-
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do #^DIAG_1^#eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.#^DIAG_2^#
535+
Lorem ipsum dolor sit amet#^DIAG^#, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
531536
""",
532537
diagnostics: [
533-
DiagnosticSpec(locationMarker: "DIAG_1", message: "Expected '{' to start 'do' statement"),
534-
DiagnosticSpec(locationMarker: "DIAG_2", message: "Expected '}' to end 'do' statement"),
538+
DiagnosticSpec(message: "Extraneous code at top level"),
535539
]
536540
)
537541
}
@@ -598,6 +602,7 @@ final class DeclarationTests: XCTestCase {
598602
DiagnosticSpec(locationMarker: "MISSING_IDENTIFIER", message: "Expected '' in struct"),
599603
DiagnosticSpec(locationMarker: "BRACES", message: "Expected '{' to start struct"),
600604
DiagnosticSpec(locationMarker: "BRACES", message: "Expected '}' to end struct"),
605+
DiagnosticSpec(locationMarker: "BRACES", message: "Extraneous ': Int) {}' at top level"),
601606
]
602607
)
603608
}
@@ -630,7 +635,7 @@ final class DeclarationTests: XCTestCase {
630635

631636
func testDontRecoverFromUnbalancedParens() {
632637
AssertParse(
633-
"func foo(first second #^COLON^#[third #^RSQUARE_COLON^#fourth: Int) {}",
638+
"func foo(first second #^COLON^#[third #^RSQUARE_COLON^#fourth#^EXTRANEOUS^#: Int) {}",
634639
substructure: Syntax(FunctionParameterSyntax(
635640
attributes: nil,
636641
firstName: TokenSyntax.identifier("first"),
@@ -649,6 +654,7 @@ final class DeclarationTests: XCTestCase {
649654
DiagnosticSpec(locationMarker: "COLON", message: "Expected ':' in function parameter"),
650655
DiagnosticSpec(locationMarker: "RSQUARE_COLON" , message: "Expected ']' to end array type"),
651656
DiagnosticSpec(locationMarker: "RSQUARE_COLON", message: "Expected ')' to end parameter clause"),
657+
DiagnosticSpec(locationMarker: "EXTRANEOUS", message: "Extraneous ': Int) {}' at top level")
652658
]
653659
)
654660
}
@@ -657,7 +663,7 @@ final class DeclarationTests: XCTestCase {
657663
AssertParse(
658664
"""
659665
func foo(first second #^COLON^#third#^RPAREN^#
660-
: Int) {}
666+
#^EXTRANEOUS^#: Int) {}
661667
""",
662668
substructure: Syntax(FunctionParameterSyntax(
663669
attributes: nil,
@@ -672,6 +678,7 @@ final class DeclarationTests: XCTestCase {
672678
diagnostics: [
673679
DiagnosticSpec(locationMarker: "COLON", message: "Expected ':' in function parameter"),
674680
DiagnosticSpec(locationMarker: "RPAREN", message: "Expected ')' to end parameter clause"),
681+
DiagnosticSpec(locationMarker: "EXTRANEOUS", message: "Extraneous ': Int) {}' at top level")
675682
]
676683
)
677684
}

0 commit comments

Comments
 (0)