Skip to content

[Parse] Split prefix operators from regex in the lexer #58505

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions include/swift/Parse/Lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -565,10 +565,17 @@ class Lexer {
void operator=(const SILBodyRAII&) = delete;
};

/// Attempt to re-lex a regex literal with forward slashes `/.../` from a
/// given lexing state. If \p mustBeRegex is set to true, a regex literal will
/// always be lexed. Otherwise, it will not be lexed if it may be ambiguous.
void tryLexForwardSlashRegexLiteralFrom(State S, bool mustBeRegex);
/// A RAII object for switching the lexer into forward slash regex `/.../`
/// lexing mode.
class ForwardSlashRegexRAII final {
llvm::SaveAndRestore<LexerForwardSlashRegexMode> Scope;

public:
ForwardSlashRegexRAII(Lexer &L, bool MustBeRegex)
: Scope(L.ForwardSlashRegexMode,
MustBeRegex ? LexerForwardSlashRegexMode::Always
: LexerForwardSlashRegexMode::Tentative) {}
};

private:
/// Nul character meaning kind.
Expand Down
1 change: 0 additions & 1 deletion include/swift/Parse/Parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -1758,7 +1758,6 @@ class Parser {
ParserResult<Expr>
parseExprPoundCodeCompletion(Optional<StmtKind> ParentKind);

UnresolvedDeclRefExpr *makeExprOperator(const Token &opToken);
UnresolvedDeclRefExpr *parseExprOperator();

/// Try re-lex a '/' operator character as a regex literal. This should be
Expand Down
19 changes: 7 additions & 12 deletions lib/Parse/Lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -815,6 +815,13 @@ void Lexer::lexOperatorIdentifier() {
rangeContainsPlaceholderEnd(CurPtr + 2, BufferEnd)) {
break;
}

// If we are lexing a `/.../` regex literal, we don't consider `/` to be an
// operator character.
if (ForwardSlashRegexMode != LexerForwardSlashRegexMode::None &&
*CurPtr == '/') {
break;
}
} while (advanceIfValidContinuationOfOperator(CurPtr, BufferEnd));

if (CurPtr-TokStart > 2) {
Expand Down Expand Up @@ -2080,18 +2087,6 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
return true;
}

void Lexer::tryLexForwardSlashRegexLiteralFrom(State S, bool mustBeRegex) {
if (!LangOpts.EnableBareSlashRegexLiterals)
return;

// Try re-lex with forward slash enabled.
llvm::SaveAndRestore<LexerForwardSlashRegexMode> RegexLexingScope(
ForwardSlashRegexMode, mustBeRegex
? LexerForwardSlashRegexMode::Always
: LexerForwardSlashRegexMode::Tentative);
restoreState(S, /*enableDiagnostics*/ true);
}

/// lexEscapedIdentifier:
/// identifier ::= '`' identifier '`'
///
Expand Down
73 changes: 40 additions & 33 deletions lib/Parse/ParseExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -546,19 +546,6 @@ ParserResult<Expr> Parser::parseExprUnary(Diag<> Message, bool isExprBasic) {
break;
}
case tok::oper_prefix: {
// Check to see if we can split a prefix operator containing `/`, e.g `!/`,
// which might be a prefix operator on a regex literal.
if (Context.LangOpts.EnableBareSlashRegexLiterals) {
auto slashIdx = Tok.getText().find("/");
if (slashIdx != StringRef::npos) {
auto prefix = Tok.getText().take_front(slashIdx);
if (!prefix.empty()) {
Operator = makeExprOperator({Tok.getKind(), prefix});
consumeStartingCharacterOfCurrentToken(Tok.getKind(), prefix.size());
break;
}
}
}
Operator = parseExprOperator();
break;
}
Expand Down Expand Up @@ -880,45 +867,65 @@ static DeclRefKind getDeclRefKindForOperator(tok kind) {
}
}

UnresolvedDeclRefExpr *Parser::makeExprOperator(const Token &Tok) {
/// parseExprOperator - Parse an operator reference expression. These
/// are not "proper" expressions; they can only appear in binary/unary
/// operators.
UnresolvedDeclRefExpr *Parser::parseExprOperator() {
assert(Tok.isAnyOperator());
DeclRefKind refKind = getDeclRefKindForOperator(Tok.getKind());
SourceLoc loc = Tok.getLoc();
DeclNameRef name(Context.getIdentifier(Tok.getText()));
consumeToken();
// Bypass local lookup.
return new (Context) UnresolvedDeclRefExpr(name, refKind, DeclNameLoc(loc));
}

/// parseExprOperator - Parse an operator reference expression. These
/// are not "proper" expressions; they can only appear in binary/unary
/// operators.
UnresolvedDeclRefExpr *Parser::parseExprOperator() {
auto *op = makeExprOperator(Tok);
consumeToken();
return op;
}

void Parser::tryLexRegexLiteral(bool mustBeRegex) {
if (!Context.LangOpts.EnableBareSlashRegexLiterals)
return;

// Check to see if we have the start of a regex literal `/.../`.
// Check to see if we have a regex literal `/.../`, optionally with a prefix
// operator e.g `!/.../`.
switch (Tok.getKind()) {
case tok::oper_prefix:
case tok::oper_binary_spaced:
case tok::oper_binary_unspaced: {
if (!Tok.getText().startswith("/"))
// Check to see if we have an operator containing '/'.
auto slashIdx = Tok.getText().find("/");
if (slashIdx == StringRef::npos)
break;

// Try re-lex as a `/.../` regex literal.
auto state = getParserPosition().LS;
L->tryLexForwardSlashRegexLiteralFrom(state, mustBeRegex);

// Discard the current token, which will be replaced by the re-lexed token,
// which may or may not be a regex literal token.
discardToken();
CancellableBacktrackingScope backtrack(*this);
{
Optional<Lexer::ForwardSlashRegexRAII> regexScope;
regexScope.emplace(*L, mustBeRegex);

// Try re-lex as a `/.../` regex literal, this will split an operator if
// necessary.
L->restoreState(getParserPosition().LS, /*enableDiagnostics*/ true);

// If we didn't split a prefix operator, reset the regex lexing scope.
// Otherwise, we want to keep it in place for the next token.
auto didSplit = L->peekNextToken().getLength() == slashIdx;
if (!didSplit)
regexScope.reset();

// Discard the current token, which will be replaced by the re-lexed
// token, which will either be a regex literal token, a prefix operator,
// or the original unchanged token.
discardToken();

// If we split a prefix operator from the regex literal, and are not sure
// whether this should be a regex, backtrack if we didn't end up lexing a
// regex literal.
if (didSplit && !mustBeRegex &&
!L->peekNextToken().is(tok::regex_literal)) {
return;
}

assert(Tok.getText().startswith("/"));
// Otherwise, accept the result.
backtrack.cancelBacktrack();
}
break;
}
default:
Expand Down
34 changes: 34 additions & 0 deletions test/StringProcessing/Parse/forward-slash-regex-disabled.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// RUN: %target-typecheck-verify-swift -enable-experimental-string-processing -disable-availability-checking

// REQUIRES: swift_in_compiler

prefix operator /
prefix operator ^/
prefix operator /^/

precedencegroup P {
associativity: left
}

// The divisions in the body of the below operators make sure we don't try and
// consider them to be ending delimiters of a regex.
infix operator /^/ : P
func /^/ (lhs: Int, rhs: Int) -> Int { 1 / 2 }

infix operator /^ : P
func /^ (lhs: Int, rhs: Int) -> Int { 1 / 2 }

infix operator ^^/ : P
func ^^/ (lhs: Int, rhs: Int) -> Int { 1 / 2 }

_ = #/x/#

_ = /x/
// expected-error@-1 {{'/' is not a prefix unary operator}}
// expected-error@-2 {{cannot find 'x' in scope}}
// expected-error@-3 {{'/' is not a postfix unary operator}}

func baz(_ x: (Int, Int) -> Int, _ y: (Int, Int) -> Int) {}
baz(/, /)
baz(/^, /)
baz(^^/, /)
118 changes: 95 additions & 23 deletions test/StringProcessing/Parse/forward-slash-regex.swift
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,26 @@ prefix operator / // expected-error {{prefix operator may not contain '/'}}
prefix operator ^/ // expected-error {{prefix operator may not contain '/'}}
prefix operator /^/ // expected-error {{prefix operator may not contain '/'}}

prefix operator !!
prefix func !! <T>(_ x: T) -> T { x }

prefix operator ^^
prefix func ^^ <T>(_ x: T) -> T { x }

precedencegroup P {
associativity: left
}

// Fine.
// The divisions in the body of the below operators make sure we don't try and
// consider them to be ending delimiters of a regex.
infix operator /^/ : P
func /^/ (lhs: Int, rhs: Int) -> Int { 0 }
func /^/ (lhs: Int, rhs: Int) -> Int { 1 / 2 }

infix operator /^ : P
func /^ (lhs: Int, rhs: Int) -> Int { 1 / 2 }

infix operator ^^/ : P
func ^^/ (lhs: Int, rhs: Int) -> Int { 1 / 2 }

let i = 0 /^/ 1/^/3

Expand All @@ -22,32 +35,53 @@ _ = /x/.self
_ = /\//
_ = /\\/

// These unfortunately become infix `=/`. We could likely improve the diagnostic
// though.
let z=/0/
// expected-error@-1 {{type annotation missing in pattern}}
// expected-error@-2 {{consecutive statements on a line must be separated by ';'}}
// expected-error@-3 {{expected expression after unary operator}}
// expected-error@-4 {{cannot find operator '=/' in scope}}
// expected-error@-5 {{'/' is not a postfix unary operator}}
_=/0/
// expected-error@-1 {{'_' can only appear in a pattern or on the left side of an assignment}}
// expected-error@-2 {{cannot find operator '=/' in scope}}
// expected-error@-3 {{'/' is not a postfix unary operator}}
// This is just here to appease typo correction.
let y = 0

// These unfortunately become prefix `=` and infix `=/` respectively. We could
// likely improve the diagnostic though.
do {
let z=/0/
// expected-error@-1 {{type annotation missing in pattern}}
// expected-error@-2 {{consecutive statements on a line must be separated by ';'}}
// expected-error@-3 {{expected expression}}
}
do {
_=/0/
// expected-error@-1 {{'_' can only appear in a pattern or on the left side of an assignment}}
// expected-error@-2 {{cannot find operator '=/' in scope}}
// expected-error@-3 {{'/' is not a postfix unary operator}}
}

_ = /x
// expected-error@-1 {{unterminated regex literal}}

_ = !/x/
// expected-error@-1 {{cannot convert value of type 'Regex<Substring>' to expected argument type 'Bool'}}

_ = (!/x/)
// expected-error@-1 {{cannot convert value of type 'Regex<Substring>' to expected argument type 'Bool'}}

_ = !/ /
// expected-error@-1 {{regex literal may not start with space; add backslash to escape}}
// expected-error@-2 {{cannot convert value of type 'Regex<Substring>' to expected argument type 'Bool'}}

_ = !!/ /
// expected-error@-1 {{regex literal may not start with space; add backslash to escape}}

_ = !!/x/
_ = (!!/x/)

_ = /^)
// expected-error@-1 {{unterminated regex literal}}
// expected-error@-2 {{closing ')' does not balance any groups openings}}

_ = /x/! // expected-error {{cannot force unwrap value of non-optional type 'Regex<Substring>'}}
_ = /x/ + /y/ // expected-error {{binary operator '+' cannot be applied to two 'Regex<Substring>' operands}}

_ = /x/+/y/
// expected-error@-1 {{cannot find operator '+/' in scope}}
// expected-error@-2 {{'/' is not a postfix unary operator}}
// expected-error@-3 {{cannot find 'y' in scope}}

_ = /x/?.blah
// expected-error@-1 {{cannot use optional chaining on non-optional value of type 'Regex<Substring>'}}
Expand All @@ -74,7 +108,6 @@ _ = /x/ ... /y/ // expected-error {{referencing operator function '...' on 'Comp
_ = /x/.../y/
// expected-error@-1 {{missing whitespace between '...' and '/' operators}}
// expected-error@-2 {{'/' is not a postfix unary operator}}
// expected-error@-3 {{cannot find 'y' in scope}}

_ = /x /...
// expected-error@-1 {{unary operator '...' cannot be applied to an operand of type 'Regex<Substring>'}}
Expand All @@ -92,12 +125,7 @@ func foo<T>(_ x: T, y: T) {}
foo(/abc/, y: /abc /)

func bar<T>(_ x: inout T) {}

// TODO: We split this into a prefix '&', but inout is handled specially when
// parsing an argument list. This shouldn't matter anyway, but we should at
// least have a custom diagnostic.
bar(&/x/)
// expected-error@-1 {{'&' is not a prefix unary operator}}
bar(&/x/) // expected-error {{cannot pass immutable value as inout argument: literals are not mutable}}

struct S {
subscript(x: Regex<Substring>) -> Void { () }
Expand Down Expand Up @@ -231,7 +259,7 @@ _ = /x/*comment*/
// expected-error@-1 {{unterminated regex literal}}

// These become regex literals, unless surrounded in parens.
func baz(_ x: (Int, Int) -> Int, _ y: (Int, Int) -> Int) {} // expected-note 2{{'baz' declared here}}
func baz(_ x: (Int, Int) -> Int, _ y: (Int, Int) -> Int) {} // expected-note 4{{'baz' declared here}}
baz(/, /)
// expected-error@-1 {{cannot convert value of type 'Regex<Substring>' to expected argument type '(Int, Int) -> Int'}}
// expected-error@-2 {{missing argument for parameter #2 in call}}
Expand All @@ -240,8 +268,22 @@ baz(/,/)
// expected-error@-2 {{missing argument for parameter #2 in call}}
baz((/), /)

baz(/^, /)
// expected-error@-1 {{cannot convert value of type 'Regex<Substring>' to expected argument type '(Int, Int) -> Int'}}
// expected-error@-2 {{missing argument for parameter #2 in call}}

do {
baz((/^), /)
// expected-error@-1 {{closing ')' does not balance any groups openings}}
// expected-note@-2 {{to match this opening '('}}
} // expected-error {{expected ')' in expression list}}

baz(^^/, /) // expected-error {{missing argument for parameter #2 in call}}
baz((^^/), /)

func bazbaz(_ x: (Int, Int) -> Int, _ y: Int) {}
bazbaz(/, 0)
bazbaz(^^/, 0)

func qux<T>(_ x: (Int, Int) -> Int, _ y: T) -> Int { 0 }
do {
Expand All @@ -255,6 +297,24 @@ do {
// expected-error@-2:21 {{expected ',' separator}}
}
_ = qux(/, 1) // this comment tests to make sure we don't try and end the regex on the starting '/' of '//'.
_ = qux(/, 1) /* same thing with a block comment */

func quxqux(_ x: (Int, Int) -> Int) {}
quxqux(/^/) // expected-error {{cannot convert value of type 'Regex<Substring>' to expected argument type '(Int, Int) -> Int'}}
quxqux((/^/)) // expected-error {{cannot convert value of type 'Regex<Substring>' to expected argument type '(Int, Int) -> Int'}}
quxqux({ $0 /^/ $1 })

quxqux(!/^/)
// expected-error@-1 {{cannot convert value of type 'Bool' to expected argument type '(Int, Int) -> Int'}}
// expected-error@-2 {{cannot convert value of type 'Regex<Substring>' to expected argument type 'Bool'}}

quxqux(/^)

do {
quxqux(/^) / 1
// expected-error@-1 {{closing ')' does not balance any groups openings}}
// expected-error@-2 {{expected ',' separator}}
}

let arr: [Double] = [2, 3, 4]
_ = arr.reduce(1, /) / 3
Expand Down Expand Up @@ -282,3 +342,15 @@ _ = /0oG/
_ = /"/
_ = /'/
_ = /<#placeholder#>/

_ = ^^/0xG/
_ = ^^/0oG/
_ = ^^/"/
_ = ^^/'/
_ = ^^/<#placeholder#>/

_ = (^^/0xG/)
_ = (^^/0oG/)
_ = (^^/"/)
_ = (^^/'/)
_ = (^^/<#placeholder#>/)