Skip to content

Commit 472f7b6

Browse files
authored
Merge pull request #59778 from hamishknight/watch-this-space-5.7
2 parents 5f19fdf + 56025ab commit 472f7b6

File tree

8 files changed

+232
-117
lines changed

8 files changed

+232
-117
lines changed

include/swift/AST/DiagnosticsParse.def

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,9 @@ ERROR(lex_invalid_closing_delimiter,none,
143143
ERROR(lex_regex_literal_invalid_starting_char,none,
144144
"regex literal may not start with %0; add backslash to escape",
145145
(StringRef))
146+
ERROR(lex_regex_literal_invalid_ending_char,none,
147+
"regex literal may not end with %0; use extended literal instead",
148+
(StringRef))
146149
ERROR(lex_regex_literal_unterminated,none,
147150
"unterminated regex literal", ())
148151

lib/Parse/Lexer.cpp

Lines changed: 83 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -2040,6 +2040,14 @@ const char *Lexer::tryScanRegexLiteral(const char *TokStart, bool MustBeRegex,
20402040

20412041
bool IsForwardSlash = (*TokStart == '/');
20422042

2043+
auto spaceOrTabDescription = [](char c) -> StringRef {
2044+
switch (c) {
2045+
case ' ': return "space";
2046+
case '\t': return "tab";
2047+
default: llvm_unreachable("Unhandled case");
2048+
}
2049+
};
2050+
20432051
// Check if we're able to lex a `/.../` regex.
20442052
if (IsForwardSlash) {
20452053
// For `/.../` regex literals, we need to ban space and tab at the start of
@@ -2055,33 +2063,17 @@ const char *Lexer::tryScanRegexLiteral(const char *TokStart, bool MustBeRegex,
20552063
// TODO: This heuristic should be sunk into the Swift library once we have a
20562064
// way of doing fix-its from there.
20572065
auto *RegexContentStart = TokStart + 1;
2058-
switch (*RegexContentStart) {
2059-
case ' ':
2060-
case '\t': {
2066+
if (*RegexContentStart == ' ' || *RegexContentStart == '\t') {
20612067
if (!MustBeRegex)
20622068
return nullptr;
20632069

20642070
if (Diags) {
20652071
// We must have a regex, so emit an error for space and tab.
2066-
StringRef DiagChar;
2067-
switch (*RegexContentStart) {
2068-
case ' ':
2069-
DiagChar = "space";
2070-
break;
2071-
case '\t':
2072-
DiagChar = "tab";
2073-
break;
2074-
default:
2075-
llvm_unreachable("Unhandled case");
2076-
}
20772072
Diags->diagnose(getSourceLoc(RegexContentStart),
2078-
diag::lex_regex_literal_invalid_starting_char, DiagChar)
2073+
diag::lex_regex_literal_invalid_starting_char,
2074+
spaceOrTabDescription(*RegexContentStart))
20792075
.fixItInsert(getSourceLoc(RegexContentStart), "\\");
20802076
}
2081-
break;
2082-
}
2083-
default:
2084-
break;
20852077
}
20862078
}
20872079

@@ -2098,60 +2090,82 @@ const char *Lexer::tryScanRegexLiteral(const char *TokStart, bool MustBeRegex,
20982090
if (Ptr == TokStart)
20992091
return nullptr;
21002092

2101-
// If we're lexing `/.../`, error if we ended on the opening of a comment.
2102-
// We prefer to lex the comment as it's more likely than not that is what
2103-
// the user is expecting.
2104-
// TODO: This should be sunk into the Swift library.
2105-
if (IsForwardSlash && Ptr[-1] == '/' && (*Ptr == '*' || *Ptr == '/')) {
2106-
if (!MustBeRegex)
2107-
return nullptr;
2093+
// Perform some additional heuristics to see if we can lex `/.../`.
2094+
// TODO: These should all be sunk into the Swift library.
2095+
if (IsForwardSlash) {
2096+
// If we're lexing `/.../`, error if we ended on the opening of a comment.
2097+
// We prefer to lex the comment as it's more likely than not that is what
2098+
// the user is expecting.
2099+
if (Ptr[-1] == '/' && (*Ptr == '*' || *Ptr == '/')) {
2100+
if (!MustBeRegex)
2101+
return nullptr;
21082102

2109-
if (Diags) {
2110-
Diags->diagnose(getSourceLoc(TokStart),
2111-
diag::lex_regex_literal_unterminated);
2112-
}
2113-
// Move the pointer back to the '/' of the comment.
2114-
Ptr--;
2115-
}
2116-
2117-
// If we're tentatively lexing `/.../`, scan to make sure we don't have any
2118-
// unbalanced ')'s. This helps avoid ambiguity with unapplied operator
2119-
// references e.g `reduce(1, /)` and `foo(/, 0) / 2`. This would be invalid
2120-
// regex syntax anyways. This ensures users can surround their operator ref
2121-
// in parens `(/)` to fix the issue. This also applies to prefix operators
2122-
// that can be disambiguated as e.g `(/S.foo)`. Note we need to track whether
2123-
// or not we're in a custom character class `[...]`, as parens are literal
2124-
// there.
2125-
// TODO: This should be sunk into the Swift library.
2126-
if (IsForwardSlash && !MustBeRegex) {
2127-
unsigned CharClassDepth = 0;
2128-
unsigned GroupDepth = 0;
2129-
for (auto *Cursor = TokStart + 1; Cursor < Ptr - 1; Cursor++) {
2130-
switch (*Cursor) {
2131-
case '\\':
2132-
// Skip over the next character of an escape.
2133-
Cursor++;
2134-
break;
2135-
case '(':
2136-
if (CharClassDepth == 0)
2137-
GroupDepth += 1;
2138-
break;
2139-
case ')':
2140-
if (CharClassDepth != 0)
2103+
if (Diags) {
2104+
Diags->diagnose(getSourceLoc(TokStart),
2105+
diag::lex_regex_literal_unterminated);
2106+
}
2107+
// Move the pointer back to the '/' of the comment.
2108+
Ptr--;
2109+
}
2110+
auto *TokEnd = Ptr - 1;
2111+
auto *ContentEnd = TokEnd - 1;
2112+
2113+
// We also ban unescaped space and tab at the end of a `/.../` literal.
2114+
if (*TokEnd == '/' && (TokEnd - TokStart > 2) && ContentEnd[-1] != '\\' &&
2115+
(*ContentEnd == ' ' || *ContentEnd == '\t')) {
2116+
if (!MustBeRegex)
2117+
return nullptr;
2118+
2119+
if (Diags) {
2120+
// Diagnose and suggest using a `#/.../#` literal instead. We could
2121+
// suggest escaping, but that would be wrong if the user has written (?x).
2122+
// TODO: Should we suggest this for space-as-first character too?
2123+
Diags->diagnose(getSourceLoc(ContentEnd),
2124+
diag::lex_regex_literal_invalid_ending_char,
2125+
spaceOrTabDescription(*ContentEnd))
2126+
.fixItInsert(getSourceLoc(TokStart), "#")
2127+
.fixItInsert(getSourceLoc(Ptr), "#");
2128+
}
2129+
}
2130+
2131+
// If we're tentatively lexing `/.../`, scan to make sure we don't have any
2132+
// unbalanced ')'s. This helps avoid ambiguity with unapplied operator
2133+
// references e.g `reduce(1, /)` and `foo(/, 0) / 2`. This would be invalid
2134+
// regex syntax anyways. This ensures users can surround their operator ref
2135+
// in parens `(/)` to fix the issue. This also applies to prefix operators
2136+
// that can be disambiguated as e.g `(/S.foo)`. Note we need to track whether
2137+
// or not we're in a custom character class `[...]`, as parens are literal
2138+
// there.
2139+
if (!MustBeRegex) {
2140+
unsigned CharClassDepth = 0;
2141+
unsigned GroupDepth = 0;
2142+
for (auto *Cursor = TokStart + 1; Cursor < TokEnd; Cursor++) {
2143+
switch (*Cursor) {
2144+
case '\\':
2145+
// Skip over the next character of an escape.
2146+
Cursor++;
2147+
break;
2148+
case '(':
2149+
if (CharClassDepth == 0)
2150+
GroupDepth += 1;
21412151
break;
2152+
case ')':
2153+
if (CharClassDepth != 0)
2154+
break;
21422155

2143-
// Invalid, so bail.
2144-
if (GroupDepth == 0)
2145-
return nullptr;
2156+
// Invalid, so bail.
2157+
if (GroupDepth == 0)
2158+
return nullptr;
21462159

2147-
GroupDepth -= 1;
2148-
break;
2149-
case '[':
2150-
CharClassDepth += 1;
2151-
break;
2152-
case ']':
2153-
if (CharClassDepth != 0)
2154-
CharClassDepth -= 1;
2160+
GroupDepth -= 1;
2161+
break;
2162+
case '[':
2163+
CharClassDepth += 1;
2164+
break;
2165+
case ']':
2166+
if (CharClassDepth != 0)
2167+
CharClassDepth -= 1;
2168+
}
21552169
}
21562170
}
21572171
}

test/StringProcessing/Parse/forward-slash-regex-skipping-allowed.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ func f() {
4343
(/E.e).foo(/0)
4444

4545
func foo<T, U>(_ x: T, _ y: U) {}
46+
foo(/E.e, /E.e)
4647
foo((/E.e), /E.e)
4748
foo((/)(E.e), /E.e)
4849

test/StringProcessing/Parse/forward-slash-regex-skipping-invalid.swift

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,28 @@ func m() {
5858
// Unbalanced `}`, make sure we don't consider the string literal `{`.
5959
func n() { / "{"}/ } // expected-error {{regex literal may not start with space; add backslash to escape}}
6060
61+
func o() {
62+
_ = {
63+
0
64+
/x}}} /
65+
2
66+
} // expected-error {{extraneous '}' at top level}}
67+
// expected-error@-3 {{extraneous '}' at top level}}
68+
// expected-error@-4 {{consecutive statements on a line must be separated by ';'}}
69+
// expected-error@-5 {{unterminated regex literal}}
70+
// expected-warning@-6 {{regular expression literal is unused}}
71+
// expected-warning@-6 {{integer literal is unused}}
72+
} // expected-error {{extraneous '}' at top level}}
73+
74+
func p() {
75+
_ = 2
76+
/x} /
77+
.bitWidth
78+
// expected-error@-2 {{consecutive statements on a line must be separated by ';'}}
79+
// expected-error@-3 {{unterminated regex literal}}
80+
// expected-error@-3 {{value of type 'Regex<Substring>' has no member 'bitWidth'}}
81+
} // expected-error {{extraneous '}' at top level}}
82+
6183
func err1() { _ = / 0xG}/ }
6284
// expected-error@-1 {{regex literal may not start with space; add backslash to escape}}
6385
func err2() { _ = / 0oG}/ }

test/StringProcessing/Parse/forward-slash-regex-skipping.swift

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ func i() {
4949
func j() {
5050
_ = {
5151
0
52-
/x}}} /
52+
/x}}}/
5353
2
5454
}
5555
}
@@ -69,7 +69,7 @@ func m() {
6969
}
7070
func n() {
7171
_ = 2
72-
/x} /
72+
/x}/
7373
.bitWidth
7474
}
7575
func o() {
@@ -105,10 +105,7 @@ enum E {
105105
func foo<T>(_ x: T) {}
106106
}
107107

108-
func a6() {
109-
func foo<T, U>(_ x: T, _ y: U) {}
110-
foo(/E.e, /E.e) // expected-error {{expected ',' separator}}
111-
}
108+
func a7() { _ = /\/}/ }
112109

113110
// Make sure we don't emit errors for these.
114111
func err1() { _ = /0xG/ }

0 commit comments

Comments
 (0)