Skip to content

Commit 6ea33f0

Browse files
committed
[Parse] Lenient prefix / parsing
Treat a prefix operator containing `/` the same as the unapplied infix operator case, where we tentatively lex. This means that we bail if there is no closing `/` or the starting character is invalid. This leaves binary operator containing `/` in expression position as the last place where we know that we definitely have a regex literal.
1 parent 04ffc93 commit 6ea33f0

File tree

4 files changed

+69
-59
lines changed

4 files changed

+69
-59
lines changed

include/swift/Parse/Parser.h

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1762,10 +1762,7 @@ class Parser {
17621762
/// Try re-lex a '/' operator character as a regex literal. This should be
17631763
/// called when parsing in an expression position to ensure a regex literal is
17641764
/// correctly parsed.
1765-
///
1766-
/// If \p mustBeRegex is set to true, a regex literal will always be lexed if
1767-
/// enabled. Otherwise, it will not be lexed if it may be ambiguous.
1768-
void tryLexRegexLiteral(bool mustBeRegex);
1765+
void tryLexRegexLiteral(bool forUnappliedOperator);
17691766

17701767
void validateCollectionElement(ParserResult<Expr> element);
17711768

lib/Parse/ParseExpr.cpp

Lines changed: 60 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -513,7 +513,7 @@ ParserResult<Expr> Parser::parseExprUnary(Diag<> Message, bool isExprBasic) {
513513
UnresolvedDeclRefExpr *Operator;
514514

515515
// First check to see if we have the start of a regex literal `/.../`.
516-
tryLexRegexLiteral(/*mustBeRegex*/ true);
516+
tryLexRegexLiteral(/*forUnappliedOperator*/ false);
517517

518518
switch (Tok.getKind()) {
519519
default:
@@ -880,56 +880,64 @@ UnresolvedDeclRefExpr *Parser::parseExprOperator() {
880880
return new (Context) UnresolvedDeclRefExpr(name, refKind, DeclNameLoc(loc));
881881
}
882882

883-
void Parser::tryLexRegexLiteral(bool mustBeRegex) {
883+
void Parser::tryLexRegexLiteral(bool forUnappliedOperator) {
884884
if (!Context.LangOpts.EnableBareSlashRegexLiterals)
885885
return;
886886

887887
// Check to see if we have a regex literal `/.../`, optionally with a prefix
888888
// operator e.g `!/.../`.
889+
bool mustBeRegex = false;
889890
switch (Tok.getKind()) {
890891
case tok::oper_prefix:
892+
// Prefix operators may contain `/` characters, so this may not be a regex,
893+
// and as such need to make sure we have a closing `/`.
894+
break;
891895
case tok::oper_binary_spaced:
892-
case tok::oper_binary_unspaced: {
893-
// Check to see if we have an operator containing '/'.
894-
auto slashIdx = Tok.getText().find("/");
895-
if (slashIdx == StringRef::npos)
896-
break;
896+
case tok::oper_binary_unspaced:
897+
// When re-lexing for a unary expression, binary operators are always
898+
// invalid, so we can be confident in always lexing a regex literal.
899+
mustBeRegex = !forUnappliedOperator;
900+
break;
901+
default:
902+
// We only re-lex regex literals for operator tokens.
903+
return;
904+
}
897905

898-
CancellableBacktrackingScope backtrack(*this);
899-
{
900-
Optional<Lexer::ForwardSlashRegexRAII> regexScope;
901-
regexScope.emplace(*L, mustBeRegex);
902-
903-
// Try re-lex as a `/.../` regex literal, this will split an operator if
904-
// necessary.
905-
L->restoreState(getParserPosition().LS, /*enableDiagnostics*/ true);
906-
907-
// If we didn't split a prefix operator, reset the regex lexing scope.
908-
// Otherwise, we want to keep it in place for the next token.
909-
auto didSplit = L->peekNextToken().getLength() == slashIdx;
910-
if (!didSplit)
911-
regexScope.reset();
912-
913-
// Discard the current token, which will be replaced by the re-lexed
914-
// token, which will either be a regex literal token, a prefix operator,
915-
// or the original unchanged token.
916-
discardToken();
917-
918-
// If we split a prefix operator from the regex literal, and are not sure
919-
// whether this should be a regex, backtrack if we didn't end up lexing a
920-
// regex literal.
921-
if (didSplit && !mustBeRegex &&
922-
!L->peekNextToken().is(tok::regex_literal)) {
923-
return;
924-
}
906+
// Check to see if we have an operator containing '/'.
907+
auto slashIdx = Tok.getText().find("/");
908+
if (slashIdx == StringRef::npos)
909+
return;
910+
911+
CancellableBacktrackingScope backtrack(*this);
912+
{
913+
Optional<Lexer::ForwardSlashRegexRAII> regexScope;
914+
regexScope.emplace(*L, mustBeRegex);
925915

926-
// Otherwise, accept the result.
927-
backtrack.cancelBacktrack();
916+
// Try re-lex as a `/.../` regex literal, this will split an operator if
917+
// necessary.
918+
L->restoreState(getParserPosition().LS, /*enableDiagnostics*/ true);
919+
920+
// If we didn't split a prefix operator, reset the regex lexing scope.
921+
// Otherwise, we want to keep it in place for the next token.
922+
auto didSplit = L->peekNextToken().getLength() == slashIdx;
923+
if (!didSplit)
924+
regexScope.reset();
925+
926+
// Discard the current token, which will be replaced by the re-lexed
927+
// token, which will either be a regex literal token, a prefix operator,
928+
// or the original unchanged token.
929+
discardToken();
930+
931+
// If we split a prefix operator from the regex literal, and are not sure
932+
// whether this should be a regex, backtrack if we didn't end up lexing a
933+
// regex literal.
934+
if (didSplit && !mustBeRegex &&
935+
!L->peekNextToken().is(tok::regex_literal)) {
936+
return;
928937
}
929-
break;
930-
}
931-
default:
932-
break;
938+
939+
// Otherwise, accept the result.
940+
backtrack.cancelBacktrack();
933941
}
934942
}
935943

@@ -3220,17 +3228,23 @@ ParserStatus Parser::parseExprList(tok leftTok, tok rightTok,
32203228
SourceLoc FieldNameLoc;
32213229
parseOptionalArgumentLabel(FieldName, FieldNameLoc);
32223230

3223-
// First check to see if we have the start of a regex literal `/.../`. We
3224-
// need to do this before handling unapplied operator references, as e.g
3225-
// `(/, /)` might be a regex literal.
3226-
tryLexRegexLiteral(/*mustBeRegex*/ false);
3227-
32283231
// See if we have an operator decl ref '(<op>)'. The operator token in
32293232
// this case lexes as a binary operator because it neither leads nor
32303233
// follows a proper subexpression.
3234+
auto isUnappliedOperator = [&]() {
3235+
return Tok.isBinaryOperator() && peekToken().isAny(rightTok, tok::comma);
3236+
};
3237+
3238+
if (isUnappliedOperator()) {
3239+
// Check to see if we have the start of a regex literal `/.../`. We need
3240+
// to do this for an unapplied operator reference, as e.g `(/, /)` might
3241+
// be a regex literal.
3242+
tryLexRegexLiteral(/*forUnappliedOperator*/ true);
3243+
}
3244+
32313245
ParserStatus Status;
32323246
Expr *SubExpr = nullptr;
3233-
if (Tok.isBinaryOperator() && peekToken().isAny(rightTok, tok::comma)) {
3247+
if (isUnappliedOperator()) {
32343248
SyntaxParsingContext operatorContext(SyntaxContext,
32353249
SyntaxKind::IdentifierExpr);
32363250
DeclNameLoc Loc;

test/StringProcessing/Parse/forward-slash-regex.swift

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,9 @@ do {
5353
// expected-error@-3 {{'/' is not a postfix unary operator}}
5454
}
5555

56+
// No closing '/' so a prefix operator.
5657
_ = /x
57-
// expected-error@-1 {{unterminated regex literal}}
58+
// expected-error@-1 {{'/' is not a prefix unary operator}}
5859

5960
_ = !/x/
6061
// expected-error@-1 {{cannot convert value of type 'Regex<Substring>' to expected argument type 'Bool'}}
@@ -250,13 +251,13 @@ _ = await /x / // expected-warning {{no 'async' operations occur within 'await'
250251
// written a comment and is still in the middle of writing the characters before
251252
// it.
252253
_ = /x// comment
253-
// expected-error@-1 {{unterminated regex literal}}
254+
// expected-error@-1 {{'/' is not a prefix unary operator}}
254255

255256
_ = /x // comment
256-
// expected-error@-1 {{unterminated regex literal}}
257+
// expected-error@-1 {{'/' is not a prefix unary operator}}
257258

258259
_ = /x/*comment*/
259-
// expected-error@-1 {{unterminated regex literal}}
260+
// expected-error@-1 {{'/' is not a prefix unary operator}}
260261

261262
// These become regex literals, unless surrounded in parens.
262263
func baz(_ x: (Int, Int) -> Int, _ y: (Int, Int) -> Int) {} // expected-note 4{{'baz' declared here}}

test/StringProcessing/Parse/regex_parse_error.swift

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,17 +30,15 @@ _ = #/\(?'abc/#
3030
do {
3131
_ = /\
3232
/
33-
// expected-error@-2:7 {{unterminated regex literal}}
34-
// expected-error@-3:9 {{expected escape sequence}}
35-
} // expected-error@:1 {{expected expression after operator}}
33+
// expected-error@-1:3 {{expected expression path in Swift key path}}
34+
}
3635

3736
do {
3837
_ = #/\
3938
/#
4039
// expected-error@-2:7 {{unterminated regex literal}}
4140
// expected-error@-3:10 {{expected escape sequence}}
42-
// expected-error@-3:3 {{unterminated regex literal}}
43-
// expected-warning@-4:3 {{regular expression literal is unused}}
41+
// expected-error@-3:4 {{expected expression}}
4442
}
4543

4644
func foo<T>(_ x: T, _ y: T) {}

0 commit comments

Comments
 (0)