Skip to content

Commit 213ddc6

Browse files
committed
Restrict the maximum nesting level in the parser to avoid stack overflows
1 parent 01e94a1 commit 213ddc6

File tree

13 files changed

+210
-55
lines changed

13 files changed

+210
-55
lines changed

Sources/SwiftParser/Declarations.swift

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -363,8 +363,18 @@ extension Parser {
363363

364364
@_spi(RawSyntax)
365365
public mutating func parseGenericParameters() -> RawGenericParameterClauseSyntax {
366-
assert(self.currentToken.starts(with: "<"))
366+
if let remainingTokens = remainingTokensIfMaximumNestingLevelReached() {
367+
return RawGenericParameterClauseSyntax(
368+
remainingTokens,
369+
leftAngleBracket: missingToken(.leftAngle),
370+
genericParameterList: RawGenericParameterListSyntax(elements: [], arena: self.arena),
371+
genericWhereClause: nil,
372+
rightAngleBracket: missingToken(.rightAngle),
373+
arena: self.arena
374+
)
375+
}
367376

377+
assert(self.currentToken.starts(with: "<"))
368378
let langle = self.consumeAnyToken(remapping: .leftAngle)
369379
var elements = [RawGenericParameterSyntax]()
370380
do {
@@ -616,6 +626,16 @@ extension Parser {
616626
extension Parser {
617627
@_spi(RawSyntax)
618628
public mutating func parseMemberDeclListItem() -> RawMemberDeclListItemSyntax? {
629+
if let remainingTokens = remainingTokensIfMaximumNestingLevelReached() {
630+
let item = RawMemberDeclListItemSyntax(
631+
remainingTokens,
632+
decl: RawDeclSyntax(RawMissingDeclSyntax(attributes: nil, modifiers: nil, arena: self.arena)),
633+
semicolon: nil,
634+
arena: self.arena
635+
)
636+
return item
637+
}
638+
619639
let decl: RawDeclSyntax
620640
if self.at(.poundSourceLocationKeyword) {
621641
decl = RawDeclSyntax(self.parsePoundSourceLocationDirective())

Sources/SwiftParser/Directives.swift

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,15 @@ extension Parser {
7070
addSemicolonIfNeeded: (_ lastElement: Element, _ newItemAtStartOfLine: Bool, _ parser: inout Parser) -> Element? = { _, _, _ in nil },
7171
syntax: (inout Parser, [Element]) -> RawSyntax?
7272
) -> RawIfConfigDeclSyntax {
73+
if let remainingTokens = remainingTokensIfMaximumNestingLevelReached() {
74+
return RawIfConfigDeclSyntax(
75+
remainingTokens,
76+
clauses: RawIfConfigClauseListSyntax(elements: [], arena: self.arena),
77+
poundEndif: missingToken(.poundEndifKeyword),
78+
arena: self.arena
79+
)
80+
}
81+
7382
var clauses = [RawIfConfigClauseSyntax]()
7483
do {
7584
var firstIteration = true

Sources/SwiftParser/Expressions.swift

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1757,6 +1757,16 @@ extension Parser {
17571757
/// dictionary-literal-items → dictionary-literal-item ','? | dictionary-literal-item ',' dictionary-literal-items
17581758
@_spi(RawSyntax)
17591759
public mutating func parseCollectionLiteral() -> RawExprSyntax {
1760+
if let remainingTokens = remainingTokensIfMaximumNestingLevelReached() {
1761+
return RawExprSyntax(RawArrayExprSyntax(
1762+
remainingTokens,
1763+
leftSquare: missingToken(.leftSquareBracket),
1764+
elements: RawArrayElementListSyntax(elements: [], arena: self.arena),
1765+
rightSquare: missingToken(.rightSquareBracket),
1766+
arena: self.arena
1767+
))
1768+
}
1769+
17601770
let (unexpectedBeforeLSquare, lsquare) = self.expect(.leftSquareBracket)
17611771

17621772
if let rsquare = self.consume(if: .rightSquareBracket) {
@@ -2177,6 +2187,17 @@ extension Parser {
21772187
/// tuple-element → expression | identifier ':' expression
21782188
@_spi(RawSyntax)
21792189
public mutating func parseArgumentListElements(pattern: PatternContext) -> [RawTupleExprElementSyntax] {
2190+
if let remainingTokens = remainingTokensIfMaximumNestingLevelReached() {
2191+
return [RawTupleExprElementSyntax(
2192+
remainingTokens,
2193+
label: nil,
2194+
colon: nil,
2195+
expression: RawExprSyntax(RawMissingExprSyntax(arena: self.arena)),
2196+
trailingComma: nil,
2197+
arena: self.arena
2198+
)]
2199+
}
2200+
21802201
guard !self.at(.rightParen) else {
21812202
return []
21822203
}

Sources/SwiftParser/Parser.swift

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,16 @@ extension Parser {
2626
}
2727

2828
/// Parse the source code in the given string as Swift source file.
29+
/// If `maximumNestingLevel` is set, the parser will stop if a nesting level
30+
/// that is greater than this value is reached to avoid overflowing the stack.
31+
/// The nesting level is increased whenever a bracketed expression like `(`
32+
/// or `{` is stared.
2933
public static func parse(
3034
source: UnsafeBufferPointer<UInt8>,
35+
maximumNestingLevel: Int? = nil,
3136
parseTransition: IncrementalParseTransition? = nil
3237
) -> SourceFileSyntax {
33-
var parser = Parser(source)
38+
var parser = Parser(source, maximumNestingLevel: maximumNestingLevel)
3439
// Extended lifetime is required because `SyntaxArena` in the parser must
3540
// be alive until `Syntax(raw:)` retains the arena.
3641
return withExtendedLifetime(parser) {
@@ -122,6 +127,23 @@ public struct Parser: TokenConsumer {
122127
@_spi(RawSyntax)
123128
public var currentToken: Lexer.Lexeme
124129

130+
/// The current nesting level, i.e. the number of tokens that
131+
/// `startNestingLevel` minus the number of tokens that `endNestingLevel`
132+
/// which have been consumed so far.
133+
public var nestingLevel: Int = 0
134+
135+
/// When this nesting level is exceeded, the parser should stop parsing.
136+
public let maximumNestingLevel: Int
137+
138+
/// A default maximum nesting level that is used if the client didn't
139+
/// explicitly specify one. Debug builds of the parser comume a lot more stack
140+
/// space and thus have a lower default maximum nesting level.
141+
#if DEBUG
142+
public static let defaultMaximumNestingLevel = 25
143+
#else
144+
public static let defaultMaximumNestingLevel = 256
145+
#endif
146+
125147
/// Initializes a Parser from the given input buffer.
126148
///
127149
/// The lexer will copy any string data it needs from the resulting buffer
@@ -133,7 +155,9 @@ public struct Parser: TokenConsumer {
133155
/// arena is created automatically, and `input` copied into the
134156
/// arena. If non-`nil`, `input` must be the registered source
135157
/// buffer of `arena` or a slice of the source buffer.
136-
public init(_ input: UnsafeBufferPointer<UInt8>, arena: SyntaxArena? = nil) {
158+
public init(_ input: UnsafeBufferPointer<UInt8>, maximumNestingLevel: Int? = nil, arena: SyntaxArena? = nil) {
159+
self.maximumNestingLevel = maximumNestingLevel ?? Self.defaultMaximumNestingLevel
160+
137161
var sourceBuffer: UnsafeBufferPointer<UInt8>
138162
if let arena = arena {
139163
self.arena = arena
@@ -150,6 +174,7 @@ public struct Parser: TokenConsumer {
150174

151175
@_spi(RawSyntax)
152176
public mutating func missingToken(_ kind: RawTokenKind, text: SyntaxText? = nil) -> RawTokenSyntax {
177+
adjustNestingLevel(for: kind)
153178
return RawTokenSyntax(missing: kind, text: text, arena: self.arena)
154179
}
155180

@@ -158,6 +183,12 @@ public struct Parser: TokenConsumer {
158183
/// - Returns: The token that was consumed.
159184
@_spi(RawSyntax)
160185
public mutating func consumeAnyToken() -> RawTokenSyntax {
186+
adjustNestingLevel(for: self.currentToken.tokenKind)
187+
return self.consumeAnyTokenWithoutAdjustingNestingLevel()
188+
}
189+
190+
@_spi(RawSyntax)
191+
public mutating func consumeAnyTokenWithoutAdjustingNestingLevel() -> RawTokenSyntax {
161192
let tok = self.currentToken
162193
self.currentToken = self.lexemes.advance()
163194
return RawTokenSyntax(
@@ -168,6 +199,17 @@ public struct Parser: TokenConsumer {
168199
arena: arena
169200
)
170201
}
202+
203+
private mutating func adjustNestingLevel(for tokenKind: RawTokenKind) {
204+
switch tokenKind {
205+
case .leftAngle, .leftBrace, .leftParen, .leftSquareBracket, .poundIfKeyword:
206+
nestingLevel += 1
207+
case .rightAngle, .rightBrace, .rightParen, .rightSquareBracket, .poundEndifKeyword:
208+
nestingLevel -= 1
209+
default:
210+
break
211+
}
212+
}
171213
}
172214

173215
// MARK: Inspecting Tokens
@@ -279,7 +321,7 @@ extension Parser {
279321
if handle.unexpectedTokens > 0 {
280322
var unexpectedTokens = [RawSyntax]()
281323
for _ in 0..<handle.unexpectedTokens {
282-
unexpectedTokens.append(RawSyntax(self.consumeAnyToken()))
324+
unexpectedTokens.append(RawSyntax(self.consumeAnyTokenWithoutAdjustingNestingLevel()))
283325
}
284326
unexpectedNodes = RawUnexpectedNodesSyntax(elements: unexpectedTokens, arena: self.arena)
285327
} else {
@@ -512,6 +554,8 @@ extension Parser {
512554
arena: self.arena
513555
)
514556

557+
self.adjustNestingLevel(for: tokenKind)
558+
515559
// ... or a multi-character token with the first N characters being the one
516560
// that we want to consume as a separate token.
517561
// Careful: We need to reset the lexer to a point just before it saw the

Sources/SwiftParser/Patterns.swift

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,18 @@ extension Parser {
179179
/// tuple-pattern-element-list → tuple-pattern-element | tuple-pattern-element ',' tuple-pattern-element-list
180180
/// tuple-pattern-element → pattern | identifier ':' pattern
181181
mutating func parsePatternTupleElements() -> RawTuplePatternElementListSyntax {
182+
if let remainingTokens = remainingTokensIfMaximumNestingLevelReached() {
183+
return RawTuplePatternElementListSyntax(elements: [
184+
RawTuplePatternElementSyntax(
185+
remainingTokens,
186+
labelName: nil,
187+
labelColon: nil,
188+
pattern: RawPatternSyntax(RawMissingPatternSyntax(arena: self.arena)),
189+
trailingComma: nil,
190+
arena: self.arena
191+
)
192+
], arena: self.arena)
193+
}
182194
var elements = [RawTuplePatternElementSyntax]()
183195
do {
184196
var keepGoing = true

Sources/SwiftParser/TopLevel.swift

Lines changed: 37 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,27 @@
1313
@_spi(RawSyntax) import SwiftSyntax
1414

1515
extension Parser {
16+
/// Consumes and returns all remaining tokens in the source file.
17+
mutating func consumeRemainingTokens() -> [RawSyntax] {
18+
var extraneousTokens = [RawSyntax]()
19+
while !self.at(.eof) {
20+
extraneousTokens.append(RawSyntax(consumeAnyToken()))
21+
}
22+
return extraneousTokens
23+
}
24+
25+
/// If the maximum nesting level has been reached, return the remaining tokens in the source file
26+
/// as unexpected nodes that have the `isMaximumNestingLevelOverflow` bit set.
27+
/// Check this in places that are likely to cause deep recursion and if this returns non-nil, abort parsing.
28+
mutating func remainingTokensIfMaximumNestingLevelReached() -> RawUnexpectedNodesSyntax? {
29+
if nestingLevel > self.maximumNestingLevel && self.currentToken.tokenKind != .eof {
30+
let remainingTokens = self.consumeRemainingTokens()
31+
return RawUnexpectedNodesSyntax(elements: remainingTokens, isMaximumNestingLevelOverflow: true, arena: self.arena)
32+
} else {
33+
return nil
34+
}
35+
}
36+
1637
/// Parse the top level items in a file into a source file.
1738
///
1839
/// This function is the true parsing entrypoint that the high-level
@@ -26,13 +47,14 @@ extension Parser {
2647
@_spi(RawSyntax)
2748
public mutating func parseSourceFile() -> RawSourceFileSyntax {
2849
let items = self.parseTopLevelCodeBlockItems()
29-
var extraneousTokens = [RawSyntax]()
30-
while !self.at(.eof) {
31-
extraneousTokens.append(RawSyntax(consumeAnyToken()))
32-
}
33-
let unexpectedBeforeEof = extraneousTokens.isEmpty ? nil : RawUnexpectedNodesSyntax(elements: extraneousTokens, arena: self.arena)
50+
let unexpectedBeforeEof = consumeRemainingTokens()
3451
let eof = self.consume(if: .eof)!
35-
return .init(statements: items, unexpectedBeforeEof, eofToken: eof, arena: self.arena)
52+
return .init(
53+
statements: items,
54+
RawUnexpectedNodesSyntax(unexpectedBeforeEof, arena: self.arena),
55+
eofToken: eof,
56+
arena: self.arena
57+
)
3658
}
3759
}
3860

@@ -119,6 +141,15 @@ extension Parser {
119141
/// statements → statement statements?
120142
@_spi(RawSyntax)
121143
public mutating func parseCodeBlockItem(isAtTopLevel: Bool = false, allowInitDecl: Bool = true) -> RawCodeBlockItemSyntax? {
144+
if let remainingTokens = remainingTokensIfMaximumNestingLevelReached() {
145+
return RawCodeBlockItemSyntax(
146+
remainingTokens,
147+
item: RawSyntax(RawMissingExprSyntax(arena: self.arena)),
148+
semicolon: nil,
149+
errorTokens: nil,
150+
arena: self.arena
151+
)
152+
}
122153
if self.at(any: [.caseKeyword, .defaultKeyword]) {
123154
// 'case' and 'default' are invalid in code block items.
124155
// Parse them and put them in their own CodeBlockItem but as an unexpected node.

Sources/SwiftParser/Types.swift

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,16 @@ extension Parser {
392392
/// element-name → identifier
393393
@_spi(RawSyntax)
394394
public mutating func parseTupleTypeBody() -> RawTupleTypeSyntax {
395+
if let remainingTokens = remainingTokensIfMaximumNestingLevelReached() {
396+
return RawTupleTypeSyntax(
397+
remainingTokens,
398+
leftParen: missingToken(.leftParen),
399+
elements: RawTupleTypeElementListSyntax(elements: [], arena: self.arena),
400+
rightParen: missingToken(.rightParen),
401+
arena: self.arena
402+
)
403+
}
404+
395405
let (unexpectedBeforeLParen, lparen) = self.expect(.leftParen)
396406
var elements = [RawTupleTypeElementSyntax]()
397407
do {
@@ -501,6 +511,16 @@ extension Parser {
501511
/// dictionary-type → '[' type ':' type ']'
502512
@_spi(RawSyntax)
503513
public mutating func parseCollectionType() -> RawTypeSyntax {
514+
if let remaingingTokens = remainingTokensIfMaximumNestingLevelReached() {
515+
return RawTypeSyntax(RawArrayTypeSyntax(
516+
remaingingTokens,
517+
leftSquareBracket: missingToken(.leftSquareBracket),
518+
elementType: RawTypeSyntax(RawMissingTypeSyntax(arena: self.arena)),
519+
rightSquareBracket: missingToken(.rightSquareBracket),
520+
arena: self.arena
521+
))
522+
}
523+
504524
let (unexpectedBeforeLSquare, lsquare) = self.expect(.leftSquareBracket)
505525
let firstType = self.parseType()
506526
if let colon = self.consume(if: .colon) {

Sources/SwiftParserDiagnostics/ParseDiagnosticsGenerator.swift

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@ public class ParseDiagnosticsGenerator: SyntaxAnyVisitor {
2121
/// method and that should thus not be visited.
2222
private var handledNodes: [SyntaxIdentifier] = []
2323

24+
/// When set to `true`, no more diagnostics will be emitted.
25+
/// Useful to stop any diagnostics after a maximum nesting level overflow was detected.
26+
private var suppressRemainingDiagnostics: Bool = false
27+
2428
private init() {
2529
super.init(viewMode: .all)
2630
}
@@ -61,6 +65,9 @@ public class ParseDiagnosticsGenerator: SyntaxAnyVisitor {
6165
fixIts: [FixIt] = [],
6266
handledNodes: [SyntaxIdentifier] = []
6367
) {
68+
if suppressRemainingDiagnostics {
69+
return
70+
}
6471
diagnostics.removeAll(where: { handledNodes.contains($0.node.id) })
6572
diagnostics.append(Diagnostic(node: Syntax(node), position: position, message: message, highlights: highlights, notes: notes, fixIts: fixIts))
6673
self.handledNodes.append(contentsOf: handledNodes)
@@ -162,6 +169,11 @@ public class ParseDiagnosticsGenerator: SyntaxAnyVisitor {
162169
if node.allSatisfy({ handledNodes.contains($0.id) }) {
163170
return .skipChildren
164171
}
172+
if node.hasMaximumNestingLevelOverflow {
173+
addDiagnostic(node, .maximumNestingLevelOverflow)
174+
suppressRemainingDiagnostics = true
175+
return .skipChildren
176+
}
165177
if let tryKeyword = node.onlyToken(where: { $0.tokenKind == .tryKeyword }),
166178
let nextToken = tryKeyword.nextToken(viewMode: .sourceAccurate),
167179
nextToken.tokenKind.isKeyword {

Sources/SwiftParserDiagnostics/ParserDiagnosticMessages.swift

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,9 @@ extension DiagnosticMessage where Self == StaticParserError {
143143
public static var standaloneSemicolonStatement: Self {
144144
.init("standalone ';' statements are not allowed")
145145
}
146+
public static var maximumNestingLevelOverflow: Self {
147+
.init("parsing has exceeded the maximum nesting level")
148+
}
146149
public static var subscriptsCannotHaveNames: Self {
147150
.init("subscripts cannot have a name")
148151
}

Sources/SwiftSyntax/Raw/RawSyntax.swift

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ struct RecursiveRawSyntaxFlags: OptionSet {
2626
/// Whether the tree contained by this layout has any missing or unexpected nodes.
2727
static let hasError = RecursiveRawSyntaxFlags(rawValue: 1 << 0)
2828
static let hasSequenceExpr = RecursiveRawSyntaxFlags(rawValue: 1 << 1)
29+
static let hasMaximumNestingLevelOverflow = RecursiveRawSyntaxFlags(rawValue: 1 << 2)
2930
}
3031

3132
/// Node data for RawSyntax tree. Tagged union plus common data.
@@ -633,6 +634,7 @@ extension RawSyntax {
633634
public static func makeLayout(
634635
kind: SyntaxKind,
635636
uninitializedCount count: Int,
637+
isMaximumNestingLevelOverflow: Bool = false,
636638
arena: SyntaxArena,
637639
initializingWith initializer: (UnsafeMutableBufferPointer<RawSyntax?>) -> Void
638640
) -> RawSyntax {
@@ -656,6 +658,9 @@ extension RawSyntax {
656658
if kind == .sequenceExpr {
657659
recursiveFlags.insert(.hasSequenceExpr)
658660
}
661+
if isMaximumNestingLevelOverflow {
662+
recursiveFlags.insert(.hasMaximumNestingLevelOverflow)
663+
}
659664
return .layout(
660665
kind: kind,
661666
layout: RawSyntaxBuffer(layoutBuffer),

Sources/SwiftSyntax/Syntax.swift

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,10 @@ public extension SyntaxProtocol {
278278
return raw.recursiveFlags.contains(.hasSequenceExpr)
279279
}
280280

281+
var hasMaximumNestingLevelOverflow: Bool {
282+
return raw.recursiveFlags.contains(.hasMaximumNestingLevelOverflow)
283+
}
284+
281285
/// The parent of this syntax node, or `nil` if this node is the root.
282286
var parent: Syntax? {
283287
return data.parent.map(Syntax.init(_:))

0 commit comments

Comments
 (0)