Skip to content

[Parser] Add 'atContextualKeywordPrefixedSyntax' method #3023

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 6 additions & 40 deletions Sources/SwiftParser/Expressions.swift
Original file line number Diff line number Diff line change
Expand Up @@ -381,20 +381,6 @@ extension Parser {
}
}

/// Whether the current token is a valid contextual exprssion modifier like
/// `copy`, `consume`.
///
/// `copy` etc. are only contextually a keyword if they are followed by an
/// identifier or keyword on the same line. We do this to ensure that we do
/// not break any copy functions defined by users.
private mutating func atContextualExpressionModifier() -> Bool {
return self.peek(
isAt: TokenSpec(.identifier, allowAtStartOfLine: false),
TokenSpec(.dollarIdentifier, allowAtStartOfLine: false),
TokenSpec(.self, allowAtStartOfLine: false)
)
}

/// Parse an expression sequence element.
mutating func parseSequenceExpressionElement(
flavor: ExprFlavor,
Expand Down Expand Up @@ -445,27 +431,7 @@ extension Parser {
)
)
case (.unsafe, let handle)?:
if self.peek().isAtStartOfLine
// Closing paired syntax
|| self.peek(isAt: .rightParen, .rightSquare, .rightBrace)
// Assignment
|| self.peek(isAt: .equal)
// As an argument label or in a list context.
|| self.peek(isAt: .colon, .comma)
// Start of a closure in a context where it should be interpreted as
// being part of a statement.
|| (flavor == .stmtCondition && self.peek(isAt: .leftBrace))
// Avoid treating as an "unsafe" expression when there is no trivia
// following the "unsafe" and the following token could either be a
// postfix expression or a subexpression:
// - Member access vs. leading .
// - Call vs. tuple expression.
// - Subscript vs. array or dictionary expression
|| (self.peek(isAt: .period, .leftParen, .leftSquare) && self.peek().leadingTriviaByteLength == 0
&& self.currentToken.trailingTriviaByteLength == 0)
// End of file
|| self.peek(isAt: .endOfFile)
{
if !atContextualKeywordPrefixedSyntax(exprFlavor: flavor, acceptClosure: true, preferPostfixExpr: false) {
break EXPR_PREFIX
}

Expand All @@ -486,7 +452,7 @@ extension Parser {
assert(self.experimentalFeatures.contains(.oldOwnershipOperatorSpellings))
fallthrough
case (.borrow, let handle)?:
if !atContextualExpressionModifier() {
if !atContextualKeywordPrefixedSyntax(exprFlavor: flavor) {
break EXPR_PREFIX
}
let borrowTok = self.eat(handle)
Expand All @@ -503,7 +469,7 @@ extension Parser {
)

case (.copy, let handle)?:
if !atContextualExpressionModifier() {
if !atContextualKeywordPrefixedSyntax(exprFlavor: flavor) {
break EXPR_PREFIX
}

Expand All @@ -524,7 +490,7 @@ extension Parser {
assert(self.experimentalFeatures.contains(.oldOwnershipOperatorSpellings))
fallthrough
case (.consume, let handle)?:
if !atContextualExpressionModifier() {
if !atContextualKeywordPrefixedSyntax(exprFlavor: flavor) {
break EXPR_PREFIX
}

Expand All @@ -546,7 +512,7 @@ extension Parser {
return RawExprSyntax(parsePackExpansionExpr(repeatHandle: handle, flavor: flavor, pattern: pattern))

case (.each, let handle)?:
if !atContextualExpressionModifier() {
if !atContextualKeywordPrefixedSyntax(exprFlavor: flavor) {
break EXPR_PREFIX
}

Expand All @@ -561,7 +527,7 @@ extension Parser {
)

case (.any, _)?:
if !atContextualExpressionModifier() && !self.peek().isContextualPunctuator("~") {
if !atContextualKeywordPrefixedSyntax(exprFlavor: flavor) && !self.peek().isContextualPunctuator("~") {
break EXPR_PREFIX
}

Expand Down
212 changes: 133 additions & 79 deletions Sources/SwiftParser/Statements.swift
Original file line number Diff line number Diff line change
Expand Up @@ -900,6 +900,128 @@ extension Parser {
}
}

extension TokenConsumer {
/// Disambiguate the word at the cursor looks like a keyword-prefixed syntax.
///
/// - Parameters:
/// - exprFlavor: The expression context. When using this function for a statement, e.g. 'yield',
/// use `.basic`.
/// - acceptClosure: When the next token is '{' and it looks like a closure, use this value as the result.
/// - preferPostfixExpr: When the next token is '.', '(', or '[' and there is a space between the word,
/// use `!preferPostfixExpr` as the result.
/// - allowNextLineOperand: Whether the keyword-prefixed syntax accepts the operand on the next line.
mutating func atContextualKeywordPrefixedSyntax(
exprFlavor: Parser.ExprFlavor,
acceptClosure: Bool = false,
preferPostfixExpr: Bool = true,
allowNextLineOperand: Bool = false
) -> Bool {
let next = peek()

// The next token must be at the same line.
if next.isAtStartOfLine && !allowNextLineOperand {
return false
}

switch next.rawTokenKind {

case .identifier, .dollarIdentifier, .wildcard:
// E.g. <word> foo
return true

case .integerLiteral, .floatLiteral,
.stringQuote, .multilineStringQuote, .singleQuote, .rawStringPoundDelimiter,
.regexSlash, .regexPoundDelimiter:
// E.g. <word> 1
return true

case .prefixAmpersand, .prefixOperator, .atSign, .backslash, .pound:
// E.g. <word> !<expr>
return true

case .keyword:
// Some lexer-classified keywords can start expressions.
switch Keyword(next.tokenText) {
case .Any, .Self, .self, .super, .`init`, .true, .false, .nil:
return true
case .repeat, .try:
return true
case .if, .switch:
return true
case .do where self.experimentalFeatures.contains(.doExpressions):
return true

default:
return false
}

case .binaryOperator, .equal, .arrow, .infixQuestionMark:
// E.g. <word> != <expr>
return false
case .postfixOperator, .postfixQuestionMark, .exclamationMark, .ellipsis:
// E.g. <word>++
return false
case .rightBrace, .rightParen, .rightSquare:
// E.g. <word>]
return false
case .colon, .comma:
// E.g. <word>,
return false
case .semicolon, .endOfFile, .poundElse, .poundElseif, .poundEndif:
return false

case .leftAngle, .rightAngle:
// Lexer never produce these token kinds.
return false

case .stringSegment, .regexLiteralPattern:
// Calling this function inside a string/regex literal?
return false

case .backtick, .poundAvailable, .poundUnavailable,
.poundSourceLocation, .poundIf, .shebang, .unknown:
// These are invalid for both cases
// E.g. <word> #available
return false

case .period, .leftParen, .leftSquare:
// These are truly ambiguous. They can be both start of postfix expression
// suffix or start of primary expression:
//
// - Member access vs. implicit member expression
// - Call vs. tuple expression
// - Subscript vs. collection literal
//
if preferPostfixExpr {
return false
}

// If there's no space between the tokens, consider it's an expression.
// Otherwise, it looks like a keyword followed by an expression.
return (next.leadingTriviaByteLength + currentToken.trailingTriviaByteLength) != 0

case .leftBrace:
// E.g. <word> { ... }
// Trailing closure is also ambiguous:
//
// - Trailing closure vs. immediately-invoked closure
//
if !acceptClosure {
return false
}

// Checking whitespace between the word cannot help this because people
// usually put a space before trailing closures. Even though that is source
// breaking, we prefer parsing it as a keyword if the syntax accepts
// expressions starting with a closure. E.g. 'unsafe { ... }()'
return self.withLookahead {
$0.consumeAnyToken()
return $0.atValidTrailingClosure(flavor: exprFlavor)
}
}
}
}

// MARK: Lookahead

extension Parser.Lookahead {
Expand Down Expand Up @@ -949,91 +1071,23 @@ extension Parser.Lookahead {
// FIXME: 'repeat' followed by '{' could be a pack expansion
// with a closure pattern.
return self.peek().rawTokenKind == .leftBrace
case .yield?:
switch self.peek().rawTokenKind {
case .prefixAmpersand:
// "yield &" always denotes a yield statement.
return true
case .leftParen:
// "yield (", by contrast, must be disambiguated with additional
// context. We always consider it an apply expression of a function
// called `yield` for the purposes of the parse.
return false
case .binaryOperator:
// 'yield &= x' treats yield as an identifier.
return false
default:
// "yield" followed immediately by any other token is likely a
// yield statement of some singular expression.
return !self.peek().isAtStartOfLine
}
case .discard?:
let next = peek()
// The thing to be discarded must be on the same line as `discard`.
if next.isAtStartOfLine {
return false
}
switch next.rawTokenKind {
case .identifier, .keyword:
// Since some identifiers like "self" are classified as keywords,
// we want to recognize those too, to handle "discard self". We also
// accept any identifier since we want to emit a nice error message
// later on during type checking.
return true
default:
// any other token following "discard" means it's not the statement.
// For example, could be the function call "discard()".
return false
}

case .then:
return atStartOfThenStatement(preferExpr: preferExpr)
case .yield?, .discard?:
return atContextualKeywordPrefixedSyntax(
exprFlavor: .basic,
preferPostfixExpr: true
)
case .then?:
return atContextualKeywordPrefixedSyntax(
exprFlavor: .basic,
preferPostfixExpr: false,
allowNextLineOperand: !preferExpr
)

case nil:
return false
}
}

/// Whether we're currently at a `then` token that should be parsed as a
/// `then` statement.
mutating func atStartOfThenStatement(preferExpr: Bool) -> Bool {
guard self.at(.keyword(.then)) else {
return false
}

// If we prefer an expr and aren't at the start of a newline, then don't
// parse a ThenStmt.
if preferExpr && !self.atStartOfLine {
return false
}

// If 'then' is followed by a binary or postfix operator, prefer to parse as
// an expr.
if peek(isAtAnyIn: BinaryOperatorLike.self) != nil || peek(isAtAnyIn: PostfixOperatorLike.self) != nil {
return false
}

switch PrepareForKeywordMatch(peek()) {
case TokenSpec(.is), TokenSpec(.as):
// Treat 'is' and 'as' like the binary operator case, and parse as an
// expr.
return false

case .leftBrace:
// This is a trailing closure.
return false

case .leftParen, .leftSquare, .period:
// These are handled based on whether there is trivia between the 'then'
// and the token. If so, it's a 'then' statement. Otherwise it should
// be treated as an expression, e.g `then(...)`, `then[...]`, `then.foo`.
return !self.currentToken.trailingTriviaText.isEmpty || !peek().leadingTriviaText.isEmpty
default:
break
}
return true
}

/// Returns whether the parser's current position is the start of a switch case,
/// given that we're in the middle of a switch already.
mutating func atStartOfSwitchCase(allowRecovery: Bool = false) -> Bool {
Expand Down
11 changes: 6 additions & 5 deletions Tests/SwiftParserTest/StatementTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -609,21 +609,22 @@ final class StatementTests: ParserTestCase {

assertParse(
"""
discard 1️⃣case
discard1️⃣ 2️⃣case
""",
diagnostics: [
DiagnosticSpec(
locationMarker: "1️⃣",
message: "expected expression in 'discard' statement",
fixIts: ["insert expression"]
message: "consecutive statements on a line must be separated by newline or ';'",
fixIts: ["insert newline", "insert ';'"]
),
DiagnosticSpec(
locationMarker: "1️⃣",
locationMarker: "2️⃣",
message: "'case' can only appear inside a 'switch' statement or 'enum' declaration"
),
],
fixedSource: """
discard <#expression#>case
discard
case
"""
)

Expand Down
20 changes: 4 additions & 16 deletions Tests/SwiftParserTest/ThenStatementTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -295,30 +295,18 @@ final class ThenStatementTests: ParserTestCase {
func testThenStmt22() {
assertParse(
"""
then1️⃣
then
""",
diagnostics: [
DiagnosticSpec(
message: "expected expression in 'then' statement",
fixIts: ["insert expression"]
)
],
fixedSource: "then <#expression#>"
substructure: DeclReferenceExprSyntax(baseName: .identifier("then"))
)
}

func testThenStmt23() {
assertParse(
"""
then1️⃣;
then;
""",
diagnostics: [
DiagnosticSpec(
message: "expected expression in 'then' statement",
fixIts: ["insert expression"]
)
],
fixedSource: "then <#expression#>;"
substructure: DeclReferenceExprSyntax(baseName: .identifier("then"))
)
}

Expand Down