Skip to content

[Parse] Ban trailing space for /.../ regex literals #59525

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions include/swift/AST/DiagnosticsParse.def
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,9 @@ ERROR(lex_invalid_closing_delimiter,none,
ERROR(lex_regex_literal_invalid_starting_char,none,
"regex literal may not start with %0; add backslash to escape",
(StringRef))
ERROR(lex_regex_literal_invalid_ending_char,none,
"regex literal may not end with %0; use extended literal instead",
(StringRef))
ERROR(lex_regex_literal_unterminated,none,
"unterminated regex literal", ())

Expand Down
152 changes: 83 additions & 69 deletions lib/Parse/Lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2048,6 +2048,14 @@ const char *Lexer::tryScanRegexLiteral(const char *TokStart, bool MustBeRegex,

bool IsForwardSlash = (*TokStart == '/');

auto spaceOrTabDescription = [](char c) -> StringRef {
switch (c) {
case ' ': return "space";
case '\t': return "tab";
default: llvm_unreachable("Unhandled case");
}
};

// Check if we're able to lex a `/.../` regex.
if (IsForwardSlash) {
// For `/.../` regex literals, we need to ban space and tab at the start of
Expand All @@ -2063,33 +2071,17 @@ const char *Lexer::tryScanRegexLiteral(const char *TokStart, bool MustBeRegex,
// TODO: This heuristic should be sunk into the Swift library once we have a
// way of doing fix-its from there.
auto *RegexContentStart = TokStart + 1;
switch (*RegexContentStart) {
case ' ':
case '\t': {
if (*RegexContentStart == ' ' || *RegexContentStart == '\t') {
if (!MustBeRegex)
return nullptr;

if (Diags) {
// We must have a regex, so emit an error for space and tab.
StringRef DiagChar;
switch (*RegexContentStart) {
case ' ':
DiagChar = "space";
break;
case '\t':
DiagChar = "tab";
break;
default:
llvm_unreachable("Unhandled case");
}
Diags->diagnose(getSourceLoc(RegexContentStart),
diag::lex_regex_literal_invalid_starting_char, DiagChar)
diag::lex_regex_literal_invalid_starting_char,
spaceOrTabDescription(*RegexContentStart))
.fixItInsert(getSourceLoc(RegexContentStart), "\\");
}
break;
}
default:
break;
}
}

Expand All @@ -2106,60 +2098,82 @@ const char *Lexer::tryScanRegexLiteral(const char *TokStart, bool MustBeRegex,
if (Ptr == TokStart)
return nullptr;

// If we're lexing `/.../`, error if we ended on the opening of a comment.
// We prefer to lex the comment as it's more likely than not that is what
// the user is expecting.
// TODO: This should be sunk into the Swift library.
if (IsForwardSlash && Ptr[-1] == '/' && (*Ptr == '*' || *Ptr == '/')) {
if (!MustBeRegex)
return nullptr;
// Perform some additional heuristics to see if we can lex `/.../`.
// TODO: These should all be sunk into the Swift library.
if (IsForwardSlash) {
// If we're lexing `/.../`, error if we ended on the opening of a comment.
// We prefer to lex the comment as it's more likely than not that is what
// the user is expecting.
if (Ptr[-1] == '/' && (*Ptr == '*' || *Ptr == '/')) {
if (!MustBeRegex)
return nullptr;

if (Diags) {
Diags->diagnose(getSourceLoc(TokStart),
diag::lex_regex_literal_unterminated);
}
// Move the pointer back to the '/' of the comment.
Ptr--;
}

// If we're tentatively lexing `/.../`, scan to make sure we don't have any
// unbalanced ')'s. This helps avoid ambiguity with unapplied operator
// references e.g `reduce(1, /)` and `foo(/, 0) / 2`. This would be invalid
// regex syntax anyways. This ensures users can surround their operator ref
// in parens `(/)` to fix the issue. This also applies to prefix operators
// that can be disambiguated as e.g `(/S.foo)`. Note we need to track whether
// or not we're in a custom character class `[...]`, as parens are literal
// there.
// TODO: This should be sunk into the Swift library.
if (IsForwardSlash && !MustBeRegex) {
unsigned CharClassDepth = 0;
unsigned GroupDepth = 0;
for (auto *Cursor = TokStart + 1; Cursor < Ptr - 1; Cursor++) {
switch (*Cursor) {
case '\\':
// Skip over the next character of an escape.
Cursor++;
break;
case '(':
if (CharClassDepth == 0)
GroupDepth += 1;
break;
case ')':
if (CharClassDepth != 0)
if (Diags) {
Diags->diagnose(getSourceLoc(TokStart),
diag::lex_regex_literal_unterminated);
}
// Move the pointer back to the '/' of the comment.
Ptr--;
}
auto *TokEnd = Ptr - 1;
auto *ContentEnd = TokEnd - 1;

// We also ban unescaped space and tab at the end of a `/.../` literal.
if (*TokEnd == '/' && (TokEnd - TokStart > 2) && ContentEnd[-1] != '\\' &&
(*ContentEnd == ' ' || *ContentEnd == '\t')) {
if (!MustBeRegex)
return nullptr;

if (Diags) {
// Diagnose and suggest using a `#/.../#` literal instead. We could
// suggest escaping, but that would be wrong if the user has written (?x).
// TODO: Should we suggest this for space-as-first character too?
Diags->diagnose(getSourceLoc(ContentEnd),
diag::lex_regex_literal_invalid_ending_char,
spaceOrTabDescription(*ContentEnd))
.fixItInsert(getSourceLoc(TokStart), "#")
.fixItInsert(getSourceLoc(Ptr), "#");
}
}

// If we're tentatively lexing `/.../`, scan to make sure we don't have any
// unbalanced ')'s. This helps avoid ambiguity with unapplied operator
// references e.g `reduce(1, /)` and `foo(/, 0) / 2`. This would be invalid
// regex syntax anyways. This ensures users can surround their operator ref
// in parens `(/)` to fix the issue. This also applies to prefix operators
// that can be disambiguated as e.g `(/S.foo)`. Note we need to track whether
// or not we're in a custom character class `[...]`, as parens are literal
// there.
if (!MustBeRegex) {
unsigned CharClassDepth = 0;
unsigned GroupDepth = 0;
for (auto *Cursor = TokStart + 1; Cursor < TokEnd; Cursor++) {
switch (*Cursor) {
case '\\':
// Skip over the next character of an escape.
Cursor++;
break;
case '(':
if (CharClassDepth == 0)
GroupDepth += 1;
break;
case ')':
if (CharClassDepth != 0)
break;

// Invalid, so bail.
if (GroupDepth == 0)
return nullptr;
// Invalid, so bail.
if (GroupDepth == 0)
return nullptr;

GroupDepth -= 1;
break;
case '[':
CharClassDepth += 1;
break;
case ']':
if (CharClassDepth != 0)
CharClassDepth -= 1;
GroupDepth -= 1;
break;
case '[':
CharClassDepth += 1;
break;
case ']':
if (CharClassDepth != 0)
CharClassDepth -= 1;
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ func f() {
(/E.e).foo(/0)

func foo<T, U>(_ x: T, _ y: U) {}
foo(/E.e, /E.e)
foo((/E.e), /E.e)
foo((/)(E.e), /E.e)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,28 @@ func m() {
// Unbalanced `}`, make sure we don't consider the string literal `{`.
func n() { / "{"}/ } // expected-error {{regex literal may not start with space; add backslash to escape}}

func o() {
_ = {
0
/x}}} /
2
} // expected-error {{extraneous '}' at top level}}
// expected-error@-3 {{extraneous '}' at top level}}
// expected-error@-4 {{consecutive statements on a line must be separated by ';'}}
// expected-error@-5 {{unterminated regex literal}}
// expected-warning@-6 {{regular expression literal is unused}}
// expected-warning@-6 {{integer literal is unused}}
} // expected-error {{extraneous '}' at top level}}

func p() {
_ = 2
/x} /
.bitWidth
// expected-error@-2 {{consecutive statements on a line must be separated by ';'}}
// expected-error@-3 {{unterminated regex literal}}
// expected-error@-3 {{value of type 'Regex<Substring>' has no member 'bitWidth'}}
} // expected-error {{extraneous '}' at top level}}

func err1() { _ = / 0xG}/ }
// expected-error@-1 {{regex literal may not start with space; add backslash to escape}}
func err2() { _ = / 0oG}/ }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ func i() {
func j() {
_ = {
0
/x}}} /
/x}}}/
2
}
}
Expand All @@ -69,7 +69,7 @@ func m() {
}
func n() {
_ = 2
/x} /
/x}/
.bitWidth
}
func o() {
Expand Down Expand Up @@ -105,11 +105,6 @@ enum E {
func foo<T>(_ x: T) {}
}

func a6() {
func foo<T, U>(_ x: T, _ y: U) {}
foo(/E.e, /E.e) // expected-error {{expected ',' separator}}
}

func a7() { _ = /\/}/ }

// Make sure we don't emit errors for these.
Expand Down
Loading