Skip to content

Simplify consumption of prefixes in a token #1910

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 11 additions & 38 deletions Sources/SwiftParser/Declarations.swift
Original file line number Diff line number Diff line change
Expand Up @@ -407,22 +407,6 @@ extension Parser {
}

extension Parser {
/// Attempt to consume an ellipsis prefix, splitting the current token if
/// necessary.
mutating func tryConsumeEllipsisPrefix() -> RawTokenSyntax? {
// It is not sufficient to check currentToken.isEllipsis here, as we may
// have something like '...>'.
// TODO: Recovery for different numbers of dots (which also needs to be
// done for regular variadics).
guard self.at(anyIn: Operator.self) != nil else { return nil }
let text = self.currentToken.tokenText
guard text.hasPrefix("...") else { return nil }
return self.consumePrefix(
SyntaxText(rebasing: text.prefix(3)),
as: .ellipsis
)
}

mutating func parseGenericParameters() -> RawGenericParameterClauseSyntax {
if let remainingTokens = remainingTokensIfMaximumNestingLevelReached() {
return RawGenericParameterClauseSyntax(
Expand All @@ -435,12 +419,7 @@ extension Parser {
)
}

let langle: RawTokenSyntax
if self.currentToken.starts(with: "<") {
langle = self.consumePrefix("<", as: .leftAngle)
} else {
langle = missingToken(.leftAngle)
}
let langle = self.expectWithoutRecovery(prefix: "<", as: .leftAngle)
var elements = [RawGenericParameterSyntax]()
do {
var keepGoing: RawTokenSyntax? = nil
Expand All @@ -452,14 +431,13 @@ extension Parser {
var each = self.consume(if: .keyword(.each))

let (unexpectedBetweenEachAndName, name) = self.expectIdentifier(allowSelfOrCapitalSelfAsIdentifier: true)
if attributes == nil && each == nil && unexpectedBetweenEachAndName == nil && name.isMissing && elements.isEmpty && !self.currentToken.starts(with: ">")
{
if attributes == nil && each == nil && unexpectedBetweenEachAndName == nil && name.isMissing && elements.isEmpty && !self.at(prefix: ">") {
break
}

// Parse the unsupported ellipsis for a type parameter pack 'T...'.
let unexpectedBetweenNameAndColon: RawUnexpectedNodesSyntax?
if let ellipsis = tryConsumeEllipsisPrefix() {
if let ellipsis = self.consume(ifPrefix: "...", as: .ellipsis) {
unexpectedBetweenNameAndColon = RawUnexpectedNodesSyntax([ellipsis], arena: self.arena)
if each == nil {
each = missingToken(.each)
Expand Down Expand Up @@ -518,12 +496,7 @@ extension Parser {
whereClause = nil
}

let rangle: RawTokenSyntax
if self.currentToken.starts(with: ">") {
rangle = self.consumePrefix(">", as: .rightAngle)
} else {
rangle = RawTokenSyntax(missing: .rightAngle, arena: self.arena)
}
let rangle = expectWithoutRecovery(prefix: ">", as: .rightAngle)

let parameters: RawGenericParameterListSyntax
if elements.isEmpty && rangle.isMissing {
Expand Down Expand Up @@ -934,7 +907,7 @@ extension Parser {
}

// Detect an attempt to use (early syntax) type parameter pack.
let ellipsis = tryConsumeEllipsisPrefix()
let ellipsis = self.consume(ifPrefix: "...", as: .ellipsis)

// Parse optional inheritance clause.
let inheritance: RawTypeInheritanceClauseSyntax?
Expand Down Expand Up @@ -1011,7 +984,7 @@ extension Parser {
}

let generics: RawGenericParameterClauseSyntax?
if self.currentToken.starts(with: "<") {
if self.at(prefix: "<") {
generics = self.parseGenericParameters()
} else {
generics = nil
Expand Down Expand Up @@ -1145,7 +1118,7 @@ extension Parser {
}

let genericParams: RawGenericParameterClauseSyntax?
if self.currentToken.starts(with: "<") {
if self.at(prefix: "<") {
genericParams = self.parseGenericParameters()
} else {
genericParams = nil
Expand Down Expand Up @@ -1229,14 +1202,14 @@ extension Parser {
let (unexpectedBeforeSubscriptKeyword, subscriptKeyword) = self.eat(handle)

let unexpectedName: RawTokenSyntax?
if self.at(.identifier) && self.peek().starts(with: "<") || self.peek().rawTokenKind == .leftParen {
if self.at(.identifier) && self.peek().tokenText.hasPrefix("<") || self.peek().rawTokenKind == .leftParen {
unexpectedName = self.consumeAnyToken()
} else {
unexpectedName = nil
}

let genericParameterClause: RawGenericParameterClauseSyntax?
if self.currentToken.starts(with: "<") {
if self.at(prefix: "<") {
genericParameterClause = self.parseGenericParameters()
} else {
genericParameterClause = nil
Expand Down Expand Up @@ -1617,7 +1590,7 @@ extension Parser {

// Parse a generic parameter list if it is present.
let generics: RawGenericParameterClauseSyntax?
if self.currentToken.starts(with: "<") {
if self.at(prefix: "<") {
generics = self.parseGenericParameters()
} else {
generics = nil
Expand Down Expand Up @@ -2012,7 +1985,7 @@ extension Parser {

// Optional generic parameters.
let genericParams: RawGenericParameterClauseSyntax?
if self.currentToken.starts(with: "<") {
if self.at(prefix: "<") {
genericParams = self.parseGenericParameters()
} else {
genericParams = nil
Expand Down
19 changes: 5 additions & 14 deletions Sources/SwiftParser/Expressions.swift
Original file line number Diff line number Diff line change
Expand Up @@ -1023,21 +1023,12 @@ extension Parser {

for _ in 0..<numComponents {
// Consume a period, if there is one.
let period: RawTokenSyntax?
if self.currentToken.starts(with: ".") {
period = self.consumePrefix(".", as: .period)
} else {
period = nil
}
let period = self.consume(ifPrefix: ".", as: .period)

// Consume the '!' or '?'.
let questionOrExclaim: RawTokenSyntax
if self.currentToken.starts(with: "!") {
questionOrExclaim = self.consumePrefix("!", as: .exclamationMark)
} else {
precondition(self.currentToken.starts(with: "?"))
questionOrExclaim = self.consumePrefix("?", as: .postfixQuestionMark)
}
let questionOrExclaim =
self.consume(ifPrefix: "!", as: .exclamationMark)
?? self.expectWithoutRecovery(prefix: "?", as: .postfixQuestionMark)

components.append(
RawKeyPathComponentSyntax(
Expand Down Expand Up @@ -1078,7 +1069,7 @@ extension Parser {
// operator token. Since keypath allows '.!' '.?' and '.[', consume '.'
// the token is an operator starts with '.', or the following token is '['.
let rootType: RawTypeSyntax?
if !self.currentToken.starts(with: ".") {
if !self.at(prefix: ".") {
rootType = self.parseSimpleType(stopAtFirstPeriod: true)
} else {
rootType = nil
Expand Down
4 changes: 4 additions & 0 deletions Sources/SwiftParser/Lookahead.swift
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,10 @@ extension Parser.Lookahead {
///
/// <TOKEN> ... -> consumePrefix(<TOK>) -> [ <TOK> ] <EN> ...
mutating func consumePrefix(_ prefix: SyntaxText, as tokenKind: RawTokenKind) {
precondition(
tokenKind.defaultText == nil || prefix == tokenKind.defaultText!,
"If tokenKind has a defaultText, the prefix needs to match it"
)
let tokenText = self.currentToken.tokenText

if tokenText == prefix {
Expand Down
10 changes: 1 addition & 9 deletions Sources/SwiftParser/Names.swift
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ extension Parser.Lookahead {
guard lookahead.canParseTypeIdentifier() else {
return false
}
return lookahead.currentToken.starts(with: ".")
return lookahead.at(prefix: ".")
}
}

Expand Down Expand Up @@ -289,12 +289,4 @@ extension Lexer.Lexeme {
// constructed from them.
return self.rawTokenKind == .keyword
}

func starts(with symbol: SyntaxText) -> Bool {
guard Operator(lexeme: self) != nil || self.rawTokenKind.isPunctuation else {
return false
}

return self.tokenText.hasPrefix(symbol)
}
}
12 changes: 2 additions & 10 deletions Sources/SwiftParser/Nominals.swift
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ extension Parser {
}

let primaryOrGenerics: T.PrimaryOrGenerics?
if self.currentToken.starts(with: "<") {
if self.at(prefix: "<") {
primaryOrGenerics = T.parsePrimaryOrGenerics(&self)
} else {
primaryOrGenerics = nil
Expand Down Expand Up @@ -352,18 +352,10 @@ extension Parser {
)
} while keepGoing != nil && loopProgress.evaluate(currentToken)
}
let unexpectedBeforeRangle: RawUnexpectedNodesSyntax?
let rangle: RawTokenSyntax
if self.currentToken.starts(with: ">") {
unexpectedBeforeRangle = nil
rangle = self.consumePrefix(">", as: .rightAngle)
} else {
(unexpectedBeforeRangle, rangle) = self.expect(.rightAngle)
}
let rangle = self.expectWithoutRecovery(prefix: ">", as: .rightAngle)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you remove the recovery because you found it didn't actually work as expected?

I was expecting us to parse protocol Test<Foo Bar> {} mostly properly, ie. with just Bar as unexpected. But that doesn't happen as the > in recovery is parsed as a postfix operator (not a right angle) and thus what actually happens is that we skip up to the {. Did you decide it wasn't worth handling that case?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I couldn’t find any examples where the recovery kicked in.

We certainly could have expect(prefix:as:) but it’s implementation is non-trivial because non of the current recovery infrastructure supports recovery a to somewhere inside a token. I filed #1923 and maybe we can implement it in a follow-up PR.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, I wonder if there's other cases where this is happening (ie. no recovery because we're looking for a prefix).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That’s happening everywhere we currently expect a prefix. It would be nice to fix at some point.

return RawPrimaryAssociatedTypeClauseSyntax(
leftAngle: langle,
primaryAssociatedTypeList: RawPrimaryAssociatedTypeListSyntax(elements: associatedTypes, arena: self.arena),
unexpectedBeforeRangle,
rightAngle: rangle,
arena: self.arena
)
Expand Down
15 changes: 15 additions & 0 deletions Sources/SwiftParser/Parser.swift
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,17 @@ extension Parser {
)
}

/// If the current token starts with the given prefix, consume the prefis as the given token kind.
///
/// Otherwise, synthesize a missing token of the given kind.
mutating func expectWithoutRecovery(prefix: SyntaxText, as tokenKind: RawTokenKind) -> Token {
if self.at(prefix: prefix) {
return consumePrefix(prefix, as: tokenKind)
} else {
return missingToken(tokenKind)
}
}

/// - Parameters:
/// - keywordRecovery: If set to `true` and the parser is currently
/// positioned at a keyword instead of an identifier, this method recovers
Expand Down Expand Up @@ -594,6 +605,10 @@ extension Parser {
_ prefix: SyntaxText,
as tokenKind: RawTokenKind
) -> RawTokenSyntax {
precondition(
tokenKind.defaultText == nil || prefix == tokenKind.defaultText!,
"If tokenKind has a defaultText, the prefix needs to match it"
)
let current = self.currentToken
// Current token can be either one-character token we want to consume...
let tokenText = current.tokenText
Expand Down
24 changes: 24 additions & 0 deletions Sources/SwiftParser/TokenConsumer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,13 @@ protocol TokenConsumer {
/// Consume the current token and change its token kind to `remappedTokenKind`.
mutating func consumeAnyToken(remapping remappedTokenKind: RawTokenKind) -> Token

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we ever take a prefix where it's not just the default text of the token? Could we just pass the token in rather than the prefix as well?

Copy link
Member Author

@ahoppen ahoppen Jul 18, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If they're the minority, could still be worth having a consumePrefix that only takes the token kind instead. Maybe this is more clear as to what's happening though 🤷

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem is that this method doesn’t have a clearly defined meaning if defaultText == nil. I prefer being explicit here and passing the prefix.

/// Consumes a given token, or splits the current token into a leading token
/// matching the given token and a trailing token and consumes the leading
/// token.
///
/// <TOKEN> ... -> consumePrefix(<TOK>) -> [ <TOK> ] <EN> ...
mutating func consumePrefix(_ prefix: SyntaxText, as tokenKind: RawTokenKind) -> Token

/// Synthesize a missing token with `kind`.
/// If `text` is not `nil`, use it for the token's text, otherwise use the token's default text.
mutating func missingToken(_ kind: RawTokenKind, text: SyntaxText?) -> Token
Expand Down Expand Up @@ -137,6 +144,12 @@ extension TokenConsumer {
return nil
}

/// Whether the current token’s text starts with the given prefix.
@inline(__always)
mutating func at(prefix: SyntaxText) -> Bool {
return self.currentToken.tokenText.hasPrefix(prefix)
}

/// Eat a token that we know we are currently positioned at, based on `at(anyIn:)`.
@inline(__always)
mutating func eat(_ handle: TokenConsumptionHandle) -> Token {
Expand Down Expand Up @@ -248,6 +261,17 @@ extension TokenConsumer {
return nil
}
}

/// If the current token starts with the given prefix, consume the prefis as the given token kind.
///
/// Otherwise, return `nil`.
mutating func consume(ifPrefix prefix: SyntaxText, as tokenKind: RawTokenKind) -> Token? {
if self.at(prefix: prefix) {
return consumePrefix(prefix, as: tokenKind)
} else {
return nil
}
}
}

// MARK: Convenience functions
Expand Down
29 changes: 8 additions & 21 deletions Sources/SwiftParser/Types.swift
Original file line number Diff line number Diff line change
Expand Up @@ -422,8 +422,7 @@ extension Parser {
/// generic-argument-list → generic-argument | generic-argument ',' generic-argument-list
/// generic-argument → type
mutating func parseGenericArguments() -> RawGenericArgumentClauseSyntax {
precondition(self.currentToken.starts(with: "<"))
let langle = self.consumePrefix("<", as: .leftAngle)
let langle = self.expectWithoutRecovery(prefix: "<", as: .leftAngle)
var arguments = [RawGenericArgumentSyntax]()
do {
var keepGoing: RawTokenSyntax? = nil
Expand All @@ -444,12 +443,7 @@ extension Parser {
} while keepGoing != nil && loopProgress.evaluate(currentToken)
}

let rangle: RawTokenSyntax
if self.currentToken.starts(with: ">") {
rangle = self.consumePrefix(">", as: .rightAngle)
} else {
rangle = RawTokenSyntax(missing: .rightAngle, arena: self.arena)
}
let rangle = self.expectWithoutRecovery(prefix: ">", as: .rightAngle)

let args: RawGenericArgumentListSyntax
if arguments.isEmpty && rangle.isMissing {
Expand Down Expand Up @@ -656,7 +650,7 @@ extension Parser.Lookahead {
return false
}

if self.currentToken.isEllipsis {
if self.atContextualPunctuator("...") {
self.consumeAnyToken()
}

Expand Down Expand Up @@ -884,7 +878,7 @@ extension Parser.Lookahead {
self.consumeAnyToken()

// Parse an optional generic argument list.
if self.currentToken.starts(with: "<") && !self.consumeGenericArguments() {
if self.at(prefix: "<") && !self.consumeGenericArguments() {
return false
}

Expand All @@ -905,13 +899,11 @@ extension Parser.Lookahead {

mutating func consumeGenericArguments() -> Bool {
// Parse the opening '<'.
guard self.currentToken.starts(with: "<") else {
guard self.consume(ifPrefix: "<", as: .leftAngle) != nil else {
return false
}

self.consumePrefix("<", as: .leftAngle)

if !self.currentToken.starts(with: ">") {
if !self.at(prefix: ">") {
var loopProgress = LoopProgressCondition()
repeat {
guard self.canParseType() else {
Expand All @@ -921,11 +913,10 @@ extension Parser.Lookahead {
} while self.consume(if: .comma) != nil && loopProgress.evaluate(currentToken)
}

guard self.currentToken.starts(with: ">") else {
guard self.consume(ifPrefix: ">", as: .rightAngle) != nil else {
return false
}

self.consumePrefix(">", as: .rightAngle)
return true
}
}
Expand Down Expand Up @@ -1001,7 +992,7 @@ extension Parser {

extension Parser {
mutating func parseResultType() -> RawTypeSyntax {
if self.currentToken.starts(with: "<") {
if self.at(prefix: "<") {
let generics = self.parseGenericParameters()
let baseType = self.parseType()
return RawTypeSyntax(
Expand Down Expand Up @@ -1076,10 +1067,6 @@ extension Lexer.Lexeme {
|| self.rawTokenKind == .prefixOperator
}

var isEllipsis: Bool {
return self.isAnyOperator && self.tokenText == "..."
}

var isGenericTypeDisambiguatingToken: Bool {
switch self.rawTokenKind {
case .rightParen,
Expand Down