Skip to content

Commit a78012d

Browse files
authored
Merge pull request #1030 from ahoppen/ahoppen/prevent-stack-overflow
Restrict the maximum nesting level in the parser to avoid stack overflows
2 parents 5881fce + 213ddc6 commit a78012d

File tree

14 files changed

+297
-115
lines changed

14 files changed

+297
-115
lines changed

Sources/SwiftParser/Declarations.swift

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -363,8 +363,18 @@ extension Parser {
363363

364364
@_spi(RawSyntax)
365365
public mutating func parseGenericParameters() -> RawGenericParameterClauseSyntax {
366-
assert(self.currentToken.starts(with: "<"))
366+
if let remainingTokens = remainingTokensIfMaximumNestingLevelReached() {
367+
return RawGenericParameterClauseSyntax(
368+
remainingTokens,
369+
leftAngleBracket: missingToken(.leftAngle),
370+
genericParameterList: RawGenericParameterListSyntax(elements: [], arena: self.arena),
371+
genericWhereClause: nil,
372+
rightAngleBracket: missingToken(.rightAngle),
373+
arena: self.arena
374+
)
375+
}
367376

377+
assert(self.currentToken.starts(with: "<"))
368378
let langle = self.consumeAnyToken(remapping: .leftAngle)
369379
var elements = [RawGenericParameterSyntax]()
370380
do {
@@ -621,6 +631,16 @@ extension Parser {
621631
extension Parser {
622632
@_spi(RawSyntax)
623633
public mutating func parseMemberDeclListItem() -> RawMemberDeclListItemSyntax? {
634+
if let remainingTokens = remainingTokensIfMaximumNestingLevelReached() {
635+
let item = RawMemberDeclListItemSyntax(
636+
remainingTokens,
637+
decl: RawDeclSyntax(RawMissingDeclSyntax(attributes: nil, modifiers: nil, arena: self.arena)),
638+
semicolon: nil,
639+
arena: self.arena
640+
)
641+
return item
642+
}
643+
624644
let decl: RawDeclSyntax
625645
if self.at(.poundSourceLocationKeyword) {
626646
decl = RawDeclSyntax(self.parsePoundSourceLocationDirective())

Sources/SwiftParser/Directives.swift

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,15 @@ extension Parser {
7070
addSemicolonIfNeeded: (_ lastElement: Element, _ newItemAtStartOfLine: Bool, _ parser: inout Parser) -> Element? = { _, _, _ in nil },
7171
syntax: (inout Parser, [Element]) -> RawIfConfigClauseSyntax.Elements?
7272
) -> RawIfConfigDeclSyntax {
73+
if let remainingTokens = remainingTokensIfMaximumNestingLevelReached() {
74+
return RawIfConfigDeclSyntax(
75+
remainingTokens,
76+
clauses: RawIfConfigClauseListSyntax(elements: [], arena: self.arena),
77+
poundEndif: missingToken(.poundEndifKeyword),
78+
arena: self.arena
79+
)
80+
}
81+
7382
var clauses = [RawIfConfigClauseSyntax]()
7483
do {
7584
var firstIteration = true

Sources/SwiftParser/Expressions.swift

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1757,6 +1757,16 @@ extension Parser {
17571757
/// dictionary-literal-items → dictionary-literal-item ','? | dictionary-literal-item ',' dictionary-literal-items
17581758
@_spi(RawSyntax)
17591759
public mutating func parseCollectionLiteral() -> RawExprSyntax {
1760+
if let remainingTokens = remainingTokensIfMaximumNestingLevelReached() {
1761+
return RawExprSyntax(RawArrayExprSyntax(
1762+
remainingTokens,
1763+
leftSquare: missingToken(.leftSquareBracket),
1764+
elements: RawArrayElementListSyntax(elements: [], arena: self.arena),
1765+
rightSquare: missingToken(.rightSquareBracket),
1766+
arena: self.arena
1767+
))
1768+
}
1769+
17601770
let (unexpectedBeforeLSquare, lsquare) = self.expect(.leftSquareBracket)
17611771

17621772
if let rsquare = self.consume(if: .rightSquareBracket) {
@@ -2177,6 +2187,17 @@ extension Parser {
21772187
/// tuple-element → expression | identifier ':' expression
21782188
@_spi(RawSyntax)
21792189
public mutating func parseArgumentListElements(pattern: PatternContext) -> [RawTupleExprElementSyntax] {
2190+
if let remainingTokens = remainingTokensIfMaximumNestingLevelReached() {
2191+
return [RawTupleExprElementSyntax(
2192+
remainingTokens,
2193+
label: nil,
2194+
colon: nil,
2195+
expression: RawExprSyntax(RawMissingExprSyntax(arena: self.arena)),
2196+
trailingComma: nil,
2197+
arena: self.arena
2198+
)]
2199+
}
2200+
21802201
guard !self.at(.rightParen) else {
21812202
return []
21822203
}

Sources/SwiftParser/Lookahead.swift

Lines changed: 87 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -335,9 +335,7 @@ extension Parser.Lookahead {
335335

336336
extension Parser.Lookahead {
337337
mutating func skipUntil(_ t1: RawTokenKind, _ t2: RawTokenKind) {
338-
while !self.at(any: [.eof, t1, t2, .poundEndifKeyword, .poundElseKeyword, .poundElseifKeyword]) {
339-
self.skipSingle()
340-
}
338+
return skip(initialState: .skipUntil(t1, t2))
341339
}
342340

343341
mutating func skipUntilEndOfLine() {
@@ -347,70 +345,99 @@ extension Parser.Lookahead {
347345
}
348346

349347
mutating func skipSingle() {
350-
enum BracketedTokens: RawTokenKindSubset {
351-
case leftParen
352-
case leftBrace
353-
case leftSquareBracket
354-
case poundIfKeyword
355-
case poundElseKeyword
356-
case poundElseifKeyword
357-
358-
init?(lexeme: Lexer.Lexeme) {
359-
switch lexeme.tokenKind {
360-
case .leftParen: self = .leftParen
361-
case .leftBrace: self = .leftBrace
362-
case .leftSquareBracket: self = .leftSquareBracket
363-
case .poundIfKeyword: self = .poundIfKeyword
364-
case .poundElseKeyword: self = .poundElseKeyword
365-
case .poundElseifKeyword: self = .poundElseifKeyword
366-
default: return nil
367-
}
348+
return skip(initialState: .skipSingle)
349+
}
350+
351+
private enum BracketedTokens: RawTokenKindSubset {
352+
case leftParen
353+
case leftBrace
354+
case leftSquareBracket
355+
case poundIfKeyword
356+
case poundElseKeyword
357+
case poundElseifKeyword
358+
359+
init?(lexeme: Lexer.Lexeme) {
360+
switch lexeme.tokenKind {
361+
case .leftParen: self = .leftParen
362+
case .leftBrace: self = .leftBrace
363+
case .leftSquareBracket: self = .leftSquareBracket
364+
case .poundIfKeyword: self = .poundIfKeyword
365+
case .poundElseKeyword: self = .poundElseKeyword
366+
case .poundElseifKeyword: self = .poundElseifKeyword
367+
default: return nil
368368
}
369+
}
369370

370-
var rawTokenKind: RawTokenKind {
371-
switch self {
372-
case .leftParen: return .leftParen
373-
case .leftBrace: return .leftBrace
374-
case .leftSquareBracket: return .leftSquareBracket
375-
case .poundIfKeyword: return .poundIfKeyword
376-
case .poundElseKeyword: return .poundElseKeyword
377-
case .poundElseifKeyword: return .poundElseifKeyword
378-
}
371+
var rawTokenKind: RawTokenKind {
372+
switch self {
373+
case .leftParen: return .leftParen
374+
case .leftBrace: return .leftBrace
375+
case .leftSquareBracket: return .leftSquareBracket
376+
case .poundIfKeyword: return .poundIfKeyword
377+
case .poundElseKeyword: return .poundElseKeyword
378+
case .poundElseifKeyword: return .poundElseifKeyword
379379
}
380380
}
381+
}
381382

382-
switch self.at(anyIn: BracketedTokens.self) {
383-
case (.leftParen, let handle)?:
384-
self.eat(handle)
385-
self.skipUntil(.rightParen, .rightBrace)
386-
self.consume(if: .rightParen)
387-
return
388-
case (.leftBrace, let handle)?:
389-
self.eat(handle)
390-
self.skipUntil(.rightBrace, .rightBrace)
391-
self.consume(if: .rightBrace)
392-
return
393-
case (.leftSquareBracket, let handle)?:
394-
self.eat(handle)
395-
self.skipUntil(.rightSquareBracket, .rightSquareBracket)
396-
self.consume(if: .rightSquareBracket)
397-
return
398-
case (.poundIfKeyword, let handle)?,
399-
(.poundElseKeyword, let handle)?,
400-
(.poundElseifKeyword, let handle)?:
401-
self.eat(handle)
402-
// skipUntil also implicitly stops at tok::pound_endif.
403-
self.skipUntil(.poundElseKeyword, .poundElseifKeyword)
383+
private enum SkippingState {
384+
/// Equivalent to a call to `skipSingle`. Skip a single token.
385+
/// If that token is bracketed, skip until the closing bracket
386+
case skipSingle
387+
/// Execute code after skipping bracketed tokens detected from `skipSingle`.
388+
case skipSinglePost(start: BracketedTokens)
389+
/// Skip until either `t1` or `t2`.
390+
case skipUntil(_ t1: RawTokenKind, _ t2: RawTokenKind)
391+
}
404392

405-
if self.at(any: [.poundElseKeyword, .poundElseifKeyword]) {
406-
self.skipSingle()
407-
} else {
408-
self.consume(if: .poundElseifKeyword)
393+
/// A non-recursie function to skip tokens.
394+
private mutating func skip(initialState: SkippingState) {
395+
var stack: [SkippingState] = [initialState]
396+
397+
while let state = stack.popLast() {
398+
switch state {
399+
case .skipSingle:
400+
let t = self.at(anyIn: BracketedTokens.self)
401+
switch t {
402+
case (.leftParen, let handle)?:
403+
self.eat(handle)
404+
stack += [.skipSinglePost(start: .leftParen), .skipUntil(.rightParen, .rightBrace)]
405+
case (.leftBrace, let handle)?:
406+
self.eat(handle)
407+
stack += [.skipSinglePost(start: .leftBrace), .skipUntil(.rightBrace, .rightBrace)]
408+
case (.leftSquareBracket, let handle)?:
409+
self.eat(handle)
410+
stack += [.skipSinglePost(start: .leftSquareBracket), .skipUntil(.rightSquareBracket, .rightSquareBracket)]
411+
case (.poundIfKeyword, let handle)?,
412+
(.poundElseKeyword, let handle)?,
413+
(.poundElseifKeyword, let handle)?:
414+
self.eat(handle)
415+
// skipUntil also implicitly stops at tok::pound_endif.
416+
stack += [.skipSinglePost(start: t!.0), .skipUntil(.poundElseKeyword, .poundElseifKeyword)]
417+
case nil:
418+
self.consumeAnyToken()
419+
}
420+
case .skipSinglePost(start: let start):
421+
switch start {
422+
case .leftParen:
423+
self.consume(if: .rightParen)
424+
case .leftBrace:
425+
self.consume(if: .rightBrace)
426+
case .leftSquareBracket:
427+
self.consume(if: .rightSquareBracket)
428+
case .poundIfKeyword, .poundElseKeyword, .poundElseifKeyword:
429+
if self.at(any: [.poundElseKeyword, .poundElseifKeyword]) {
430+
stack += [.skipSingle]
431+
} else {
432+
self.consume(if: .poundElseifKeyword)
433+
}
434+
return
435+
}
436+
case .skipUntil(let t1, let t2):
437+
if !self.at(any: [.eof, t1, t2, .poundEndifKeyword, .poundElseKeyword, .poundElseifKeyword]) {
438+
stack += [.skipUntil(t1, t2), .skipSingle]
439+
}
409440
}
410-
return
411-
case nil:
412-
self.consumeAnyToken()
413-
return
414441
}
415442
}
416443
}

Sources/SwiftParser/Parser.swift

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,16 @@ extension Parser {
2626
}
2727

2828
/// Parse the source code in the given string as Swift source file.
29+
/// If `maximumNestingLevel` is set, the parser will stop if a nesting level
30+
/// that is greater than this value is reached to avoid overflowing the stack.
31+
/// The nesting level is increased whenever a bracketed expression like `(`
32+
/// or `{` is stared.
2933
public static func parse(
3034
source: UnsafeBufferPointer<UInt8>,
35+
maximumNestingLevel: Int? = nil,
3136
parseTransition: IncrementalParseTransition? = nil
3237
) -> SourceFileSyntax {
33-
var parser = Parser(source)
38+
var parser = Parser(source, maximumNestingLevel: maximumNestingLevel)
3439
// Extended lifetime is required because `SyntaxArena` in the parser must
3540
// be alive until `Syntax(raw:)` retains the arena.
3641
return withExtendedLifetime(parser) {
@@ -122,6 +127,23 @@ public struct Parser: TokenConsumer {
122127
@_spi(RawSyntax)
123128
public var currentToken: Lexer.Lexeme
124129

130+
/// The current nesting level, i.e. the number of tokens that
131+
/// `startNestingLevel` minus the number of tokens that `endNestingLevel`
132+
/// which have been consumed so far.
133+
public var nestingLevel: Int = 0
134+
135+
/// When this nesting level is exceeded, the parser should stop parsing.
136+
public let maximumNestingLevel: Int
137+
138+
/// A default maximum nesting level that is used if the client didn't
139+
/// explicitly specify one. Debug builds of the parser comume a lot more stack
140+
/// space and thus have a lower default maximum nesting level.
141+
#if DEBUG
142+
public static let defaultMaximumNestingLevel = 25
143+
#else
144+
public static let defaultMaximumNestingLevel = 256
145+
#endif
146+
125147
/// Initializes a Parser from the given input buffer.
126148
///
127149
/// The lexer will copy any string data it needs from the resulting buffer
@@ -133,7 +155,9 @@ public struct Parser: TokenConsumer {
133155
/// arena is created automatically, and `input` copied into the
134156
/// arena. If non-`nil`, `input` must be the registered source
135157
/// buffer of `arena` or a slice of the source buffer.
136-
public init(_ input: UnsafeBufferPointer<UInt8>, arena: SyntaxArena? = nil) {
158+
public init(_ input: UnsafeBufferPointer<UInt8>, maximumNestingLevel: Int? = nil, arena: SyntaxArena? = nil) {
159+
self.maximumNestingLevel = maximumNestingLevel ?? Self.defaultMaximumNestingLevel
160+
137161
var sourceBuffer: UnsafeBufferPointer<UInt8>
138162
if let arena = arena {
139163
self.arena = arena
@@ -150,6 +174,7 @@ public struct Parser: TokenConsumer {
150174

151175
@_spi(RawSyntax)
152176
public mutating func missingToken(_ kind: RawTokenKind, text: SyntaxText? = nil) -> RawTokenSyntax {
177+
adjustNestingLevel(for: kind)
153178
return RawTokenSyntax(missing: kind, text: text, arena: self.arena)
154179
}
155180

@@ -158,6 +183,12 @@ public struct Parser: TokenConsumer {
158183
/// - Returns: The token that was consumed.
159184
@_spi(RawSyntax)
160185
public mutating func consumeAnyToken() -> RawTokenSyntax {
186+
adjustNestingLevel(for: self.currentToken.tokenKind)
187+
return self.consumeAnyTokenWithoutAdjustingNestingLevel()
188+
}
189+
190+
@_spi(RawSyntax)
191+
public mutating func consumeAnyTokenWithoutAdjustingNestingLevel() -> RawTokenSyntax {
161192
let tok = self.currentToken
162193
self.currentToken = self.lexemes.advance()
163194
return RawTokenSyntax(
@@ -168,6 +199,17 @@ public struct Parser: TokenConsumer {
168199
arena: arena
169200
)
170201
}
202+
203+
private mutating func adjustNestingLevel(for tokenKind: RawTokenKind) {
204+
switch tokenKind {
205+
case .leftAngle, .leftBrace, .leftParen, .leftSquareBracket, .poundIfKeyword:
206+
nestingLevel += 1
207+
case .rightAngle, .rightBrace, .rightParen, .rightSquareBracket, .poundEndifKeyword:
208+
nestingLevel -= 1
209+
default:
210+
break
211+
}
212+
}
171213
}
172214

173215
// MARK: Inspecting Tokens
@@ -279,7 +321,7 @@ extension Parser {
279321
if handle.unexpectedTokens > 0 {
280322
var unexpectedTokens = [RawSyntax]()
281323
for _ in 0..<handle.unexpectedTokens {
282-
unexpectedTokens.append(RawSyntax(self.consumeAnyToken()))
324+
unexpectedTokens.append(RawSyntax(self.consumeAnyTokenWithoutAdjustingNestingLevel()))
283325
}
284326
unexpectedNodes = RawUnexpectedNodesSyntax(elements: unexpectedTokens, arena: self.arena)
285327
} else {
@@ -512,6 +554,8 @@ extension Parser {
512554
arena: self.arena
513555
)
514556

557+
self.adjustNestingLevel(for: tokenKind)
558+
515559
// ... or a multi-character token with the first N characters being the one
516560
// that we want to consume as a separate token.
517561
// Careful: We need to reset the lexer to a point just before it saw the

Sources/SwiftParser/Patterns.swift

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,18 @@ extension Parser {
179179
/// tuple-pattern-element-list → tuple-pattern-element | tuple-pattern-element ',' tuple-pattern-element-list
180180
/// tuple-pattern-element → pattern | identifier ':' pattern
181181
mutating func parsePatternTupleElements() -> RawTuplePatternElementListSyntax {
182+
if let remainingTokens = remainingTokensIfMaximumNestingLevelReached() {
183+
return RawTuplePatternElementListSyntax(elements: [
184+
RawTuplePatternElementSyntax(
185+
remainingTokens,
186+
labelName: nil,
187+
labelColon: nil,
188+
pattern: RawPatternSyntax(RawMissingPatternSyntax(arena: self.arena)),
189+
trailingComma: nil,
190+
arena: self.arena
191+
)
192+
], arena: self.arena)
193+
}
182194
var elements = [RawTuplePatternElementSyntax]()
183195
do {
184196
var keepGoing = true

0 commit comments

Comments
 (0)