Skip to content

Commit 9713d34

Browse files
committed
[Parse] Expand unbalanced ) regex literal heuristic
Previously we would only check for a starting character of `)` when performing a tentative lex of a regex literal. Expand this to cover the entire range of the regex literal, ensuring to take escapes and custom character classes into account.
1 parent 6ea33f0 commit 9713d34

File tree

3 files changed

+152
-30
lines changed

3 files changed

+152
-30
lines changed

lib/Parse/Lexer.cpp

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2000,23 +2000,11 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
20002000
// 2
20012001
// }
20022002
//
2003-
// This takes advantage of the consistent operator spacing rule. We also
2004-
// need to ban ')' to avoid ambiguity with unapplied operator references e.g
2005-
// `reduce(1, /)`. This would be invalid regex syntax anyways. Note this
2006-
// doesn't totally save us from e.g `foo(/, 0)`, but it should at least
2007-
// help, and it ensures users can always surround their operator ref in
2008-
// parens `(/)` to fix the issue.
2003+
// This takes advantage of the consistent operator spacing rule.
20092004
// TODO: This heuristic should be sunk into the Swift library once we have a
20102005
// way of doing fix-its from there.
20112006
auto *RegexContentStart = TokStart + 1;
20122007
switch (*RegexContentStart) {
2013-
case ')': {
2014-
if (!MustBeRegex)
2015-
return false;
2016-
2017-
// ')' is invalid anyway, so we can let the parser diagnose it.
2018-
break;
2019-
}
20202008
case ' ':
20212009
case '\t': {
20222010
if (!MustBeRegex)
@@ -2072,6 +2060,48 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
20722060
Ptr--;
20732061
}
20742062

2063+
// If we're tentatively lexing `/.../`, scan to make sure we don't have any
2064+
// unbalanced ')'s. This helps avoid ambiguity with unapplied operator
2065+
// references e.g `reduce(1, /)` and `foo(/, 0) / 2`. This would be invalid
2066+
// regex syntax anyways. This ensures users can surround their operator ref
2067+
// in parens `(/)` to fix the issue. This also applies to prefix operators
2068+
// that can be disambiguated as e.g `(/S.foo)`. Note we need to track whether
2069+
// or not we're in a custom character class `[...]`, as parens are literal
2070+
// there.
2071+
// TODO: This should be sunk into the Swift library.
2072+
if (IsForwardSlash && !MustBeRegex) {
2073+
unsigned CharClassDepth = 0;
2074+
unsigned GroupDepth = 0;
2075+
for (auto *Cursor = TokStart + 1; Cursor < Ptr - 1; Cursor++) {
2076+
switch (*Cursor) {
2077+
case '\\':
2078+
// Skip over the next character of an escape.
2079+
Cursor++;
2080+
break;
2081+
case '(':
2082+
if (CharClassDepth == 0)
2083+
GroupDepth += 1;
2084+
break;
2085+
case ')':
2086+
if (CharClassDepth != 0)
2087+
break;
2088+
2089+
// Invalid, so bail.
2090+
if (GroupDepth == 0)
2091+
return false;
2092+
2093+
GroupDepth -= 1;
2094+
break;
2095+
case '[':
2096+
CharClassDepth += 1;
2097+
break;
2098+
case ']':
2099+
if (CharClassDepth != 0)
2100+
CharClassDepth -= 1;
2101+
}
2102+
}
2103+
}
2104+
20752105
// Update to point to where we ended regex lexing.
20762106
assert(Ptr > TokStart && Ptr <= BufferEnd);
20772107
CurPtr = Ptr;

test/StringProcessing/Parse/forward-slash-regex.swift

Lines changed: 86 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ prefix operator /
66
prefix operator ^/
77
prefix operator /^/
88

9+
prefix func ^/ <T> (_ x: T) -> T { x } // expected-note {{'^/' declared here}}
10+
911
prefix operator !!
1012
prefix func !! <T>(_ x: T) -> T { x }
1113

@@ -259,6 +261,8 @@ _ = /x // comment
259261
_ = /x/*comment*/
260262
// expected-error@-1 {{'/' is not a prefix unary operator}}
261263

264+
// MARK: Unapplied operators
265+
262266
// These become regex literals, unless surrounded in parens.
263267
func baz(_ x: (Int, Int) -> Int, _ y: (Int, Int) -> Int) {} // expected-note 4{{'baz' declared here}}
264268
baz(/, /)
@@ -273,11 +277,7 @@ baz(/^, /)
273277
// expected-error@-1 {{cannot convert value of type 'Regex<Substring>' to expected argument type '(Int, Int) -> Int'}}
274278
// expected-error@-2 {{missing argument for parameter #2 in call}}
275279

276-
do {
277-
baz((/^), /)
278-
// expected-error@-1 {{closing ')' does not balance any groups openings}}
279-
// expected-note@-2 {{to match this opening '('}}
280-
} // expected-error {{expected ')' in expression list}}
280+
baz((/^), /)
281281

282282
baz(^^/, /) // expected-error {{missing argument for parameter #2 in call}}
283283
baz((^^/), /)
@@ -287,20 +287,18 @@ bazbaz(/, 0)
287287
bazbaz(^^/, 0)
288288

289289
func qux<T>(_ x: (Int, Int) -> Int, _ y: T) -> Int { 0 }
290-
do {
291-
_ = qux(/, 1) / 2
292-
// expected-error@-1:15 {{cannot parse regular expression: closing ')' does not balance any groups openings}}
293-
// expected-error@-2:19 {{expected ',' separator}}
294-
}
290+
_ = qux(/, 1) / 2
295291
do {
296292
_ = qux(/, "(") / 2
297293
// expected-error@-1 {{cannot convert value of type 'Regex<(Substring, Substring)>' to expected argument type '(Int, Int) -> Int'}}
298294
// expected-error@-2:21 {{expected ',' separator}}
299295
}
296+
_ = qux((/), "(") / 2
300297
_ = qux(/, 1) // this comment tests to make sure we don't try and end the regex on the starting '/' of '//'.
301298
_ = qux(/, 1) /* same thing with a block comment */
302299

303-
func quxqux(_ x: (Int, Int) -> Int) {}
300+
@discardableResult
301+
func quxqux(_ x: (Int, Int) -> Int) -> Int { 0 }
304302
quxqux(/^/) // expected-error {{cannot convert value of type 'Regex<Substring>' to expected argument type '(Int, Int) -> Int'}}
305303
quxqux((/^/)) // expected-error {{cannot convert value of type 'Regex<Substring>' to expected argument type '(Int, Int) -> Int'}}
306304
quxqux({ $0 /^/ $1 })
@@ -310,17 +308,88 @@ quxqux(!/^/)
310308
// expected-error@-2 {{cannot convert value of type 'Regex<Substring>' to expected argument type 'Bool'}}
311309

312310
quxqux(/^)
313-
314-
do {
315-
quxqux(/^) / 1
316-
// expected-error@-1 {{closing ')' does not balance any groups openings}}
317-
// expected-error@-2 {{expected ',' separator}}
318-
}
311+
_ = quxqux(/^) / 1
319312

320313
let arr: [Double] = [2, 3, 4]
321314
_ = arr.reduce(1, /) / 3
322315
_ = arr.reduce(1, /) + arr.reduce(1, /)
323316

317+
// MARK: ')' disambiguation behavior
318+
319+
_ = (/x)
320+
// expected-error@-1 {{'/' is not a prefix unary operator}}
321+
322+
_ = (/x)/
323+
// expected-error@-1 {{'/' is not a prefix unary operator}}
324+
// expected-error@-2 {{'/' is not a postfix unary operator}}
325+
326+
_ = (/[(0)])/
327+
// expected-error@-1 {{'/' is not a prefix unary operator}}
328+
// expected-error@-2 {{'/' is not a postfix unary operator}}
329+
330+
_ = /[(0)]/
331+
_ = /(x)/
332+
_ = /[)]/
333+
_ = /[a\])]/
334+
_ = /([)])/
335+
_ = /]]][)]/
336+
337+
_ = /
338+
// expected-error@-1 {{unterminated regex literal}}
339+
340+
_ = /)
341+
// expected-error@-1 {{unterminated regex literal}}
342+
// expected-error@-2 {{closing ')' does not balance any groups openings}}
343+
344+
let fn: (Int, Int) -> Int = (/)
345+
346+
_ = /\()/
347+
// expected-error@-1 {{'/' is not a prefix unary operator}}
348+
// expected-error@-2 {{'/' is not a postfix unary operator}}
349+
// expected-error@-3 {{invalid component of Swift key path}}
350+
351+
do {
352+
let _: Regex = (/whatever\)/
353+
// expected-note@-1 {{to match this opening '('}}
354+
} // expected-error {{expected ')' in expression list}}
355+
do {
356+
_ = /(()()))/
357+
// expected-error@-1 {{'/' is not a prefix unary operator}}
358+
// expected-error@-2 {{consecutive statements on a line must be separated by ';'}}
359+
// expected-error@-3 {{expected expression}}
360+
}
361+
do {
362+
_ = /[x])/
363+
// expected-error@-1 {{'/' is not a prefix unary operator}}
364+
// expected-error@-2 {{consecutive statements on a line must be separated by ';'}}
365+
// expected-error@-3 {{expected expression}}
366+
}
367+
do {
368+
_ = /[\]])/
369+
// expected-error@-1 {{expected expression path in Swift key path}}
370+
}
371+
372+
_ = ^/x/
373+
// expected-error@-1 {{'^' is not a prefix unary operator}}
374+
375+
_ = (^/x)/
376+
// expected-error@-1 {{'/' is not a postfix unary operator}}
377+
378+
_ = (!!/x/)
379+
380+
_ = ^/"/"
381+
// expected-error@-1 {{'^' is not a prefix unary operator}}
382+
// expected-error@-2 {{unterminated string literal}}
383+
384+
_ = ^/"[/"
385+
// expected-error@-1 {{'^' is not a prefix unary operator}}
386+
// expected-error@-2 {{unterminated string literal}}
387+
// expected-error@-3 {{expected custom character class members}}
388+
389+
_ = (^/)("/")
390+
391+
// MARK: Starting characters
392+
324393
// Fine.
325394
_ = /./
326395

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
// RUN: %target-typecheck-verify-swift -enable-bare-slash-regex -disable-availability-checking
2+
// REQUIRES: swift_in_compiler
3+
4+
// Test the behavior of prefix '/' with regex literals enabled.
5+
6+
prefix operator /
7+
prefix func / <T> (_ x: T) -> T { x }
8+
9+
enum E {
10+
case e
11+
func foo<T>(_ x: T) {}
12+
}
13+
14+
_ = /E.e
15+
(/E.e).foo(/0)
16+
17+
func foo<T, U>(_ x: T, _ y: U) {}
18+
foo(/E.e, /E.e) // expected-error {{expected ',' separator}}
19+
foo((/E.e), /E.e)
20+
foo((/)(E.e), /E.e)
21+
22+
func bar<T>(_ x: T) -> Int { 0 }
23+
_ = bar(/E.e) / 2

0 commit comments

Comments
 (0)