swiftlang
diff --git a/‎include/swift/AST/DiagnosticsParse.def
Lines changed: 3 additions & 0 deletions b/‎include/swift/AST/DiagnosticsParse.def
Lines changed: 3 additions & 0 deletions
diff --git a/‎lib/Parse/Lexer.cpp
Lines changed: 83 additions & 69 deletions b/‎lib/Parse/Lexer.cpp
Lines changed: 83 additions & 69 deletions
diff --git a/‎test/StringProcessing/Parse/forward-slash-regex-skipping-allowed.swift
Lines changed: 1 addition & 0 deletions b/‎test/StringProcessing/Parse/forward-slash-regex-skipping-allowed.swift
Lines changed: 1 addition & 0 deletions
diff --git a/‎test/StringProcessing/Parse/forward-slash-regex-skipping-invalid.swift
Lines changed: 22 additions & 0 deletions b/‎test/StringProcessing/Parse/forward-slash-regex-skipping-invalid.swift
Lines changed: 22 additions & 0 deletions
diff --git a/‎test/StringProcessing/Parse/forward-slash-regex-skipping.swift
Lines changed: 3 additions & 6 deletions b/‎test/StringProcessing/Parse/forward-slash-regex-skipping.swift
Lines changed: 3 additions & 6 deletions
@@ -143,6 +143,9 @@ ERROR(lex_invalid_closing_delimiter,none,
 ERROR(lex_regex_literal_invalid_starting_char,none,
       "regex literal may not start with %0; add backslash to escape",
       (StringRef))
+ERROR(lex_regex_literal_invalid_ending_char,none,
+      "regex literal may not end with %0; use extended literal instead",
+      (StringRef))
 ERROR(lex_regex_literal_unterminated,none,
       "unterminated regex literal", ())
 
 
@@ -2040,6 +2040,14 @@ const char *Lexer::tryScanRegexLiteral(const char *TokStart, bool MustBeRegex,
 
   bool IsForwardSlash = (*TokStart == '/');
 
+  auto spaceOrTabDescription = [](char c) -> StringRef {
+    switch (c) {
+    case ' ':  return "space";
+    case '\t': return "tab";
+    default:   llvm_unreachable("Unhandled case");
+    }
+  };
+
   // Check if we're able to lex a `/.../` regex.
   if (IsForwardSlash) {
     // For `/.../` regex literals, we need to ban space and tab at the start of
@@ -2055,33 +2063,17 @@ const char *Lexer::tryScanRegexLiteral(const char *TokStart, bool MustBeRegex,
     // TODO: This heuristic should be sunk into the Swift library once we have a
     // way of doing fix-its from there.
     auto *RegexContentStart = TokStart + 1;
-    switch (*RegexContentStart) {
-    case ' ':
-    case '\t': {
+    if (*RegexContentStart == ' ' || *RegexContentStart == '\t') {
       if (!MustBeRegex)
         return nullptr;
 
       if (Diags) {
         // We must have a regex, so emit an error for space and tab.
-        StringRef DiagChar;
-        switch (*RegexContentStart) {
-        case ' ':
-          DiagChar = "space";
-          break;
-        case '\t':
-          DiagChar = "tab";
-          break;
-        default:
-          llvm_unreachable("Unhandled case");
-        }
         Diags->diagnose(getSourceLoc(RegexContentStart),
-                        diag::lex_regex_literal_invalid_starting_char, DiagChar)
+                        diag::lex_regex_literal_invalid_starting_char,
+                        spaceOrTabDescription(*RegexContentStart))
             .fixItInsert(getSourceLoc(RegexContentStart), "\\");
       }
-      break;
-    }
-    default:
-      break;
     }
   }
 
@@ -2098,60 +2090,82 @@ const char *Lexer::tryScanRegexLiteral(const char *TokStart, bool MustBeRegex,
   if (Ptr == TokStart)
     return nullptr;
 
-  // If we're lexing `/.../`, error if we ended on the opening of a comment.
-  // We prefer to lex the comment as it's more likely than not that is what
-  // the user is expecting.
-  // TODO: This should be sunk into the Swift library.
-  if (IsForwardSlash && Ptr[-1] == '/' && (*Ptr == '*' || *Ptr == '/')) {
-    if (!MustBeRegex)
-      return nullptr;
+  // Perform some additional heuristics to see if we can lex `/.../`.
+  // TODO: These should all be sunk into the Swift library.
+  if (IsForwardSlash) {
+    // If we're lexing `/.../`, error if we ended on the opening of a comment.
+    // We prefer to lex the comment as it's more likely than not that is what
+    // the user is expecting.
+    if (Ptr[-1] == '/' && (*Ptr == '*' || *Ptr == '/')) {
+      if (!MustBeRegex)
+        return nullptr;
 
-    if (Diags) {
-      Diags->diagnose(getSourceLoc(TokStart),
-                      diag::lex_regex_literal_unterminated);
-    }
-    // Move the pointer back to the '/' of the comment.
-    Ptr--;
-  }
-
-  // If we're tentatively lexing `/.../`, scan to make sure we don't have any
-  // unbalanced ')'s. This helps avoid ambiguity with unapplied operator
-  // references e.g `reduce(1, /)` and `foo(/, 0) / 2`. This would be invalid
-  // regex syntax anyways. This ensures users can surround their operator ref
-  // in parens `(/)` to fix the issue. This also applies to prefix operators
-  // that can be disambiguated as e.g `(/S.foo)`. Note we need to track whether
-  // or not we're in a custom character class `[...]`, as parens are literal
-  // there.
-  // TODO: This should be sunk into the Swift library.
-  if (IsForwardSlash && !MustBeRegex) {
-    unsigned CharClassDepth = 0;
-    unsigned GroupDepth = 0;
-    for (auto *Cursor = TokStart + 1; Cursor < Ptr - 1; Cursor++) {
-      switch (*Cursor) {
-      case '\\':
-        // Skip over the next character of an escape.
-        Cursor++;
-        break;
-      case '(':
-        if (CharClassDepth == 0)
-          GroupDepth += 1;
-        break;
-      case ')':
-        if (CharClassDepth != 0)
+      if (Diags) {
+        Diags->diagnose(getSourceLoc(TokStart),
+                        diag::lex_regex_literal_unterminated);
+      }
+      // Move the pointer back to the '/' of the comment.
+      Ptr--;
+    }
+    auto *TokEnd = Ptr - 1;
+    auto *ContentEnd = TokEnd - 1;
+
+    // We also ban unescaped space and tab at the end of a `/.../` literal.
+    if (*TokEnd == '/' && (TokEnd - TokStart > 2) && ContentEnd[-1] != '\\' &&
+        (*ContentEnd == ' ' || *ContentEnd == '\t')) {
+      if (!MustBeRegex)
+        return nullptr;
+
+      if (Diags) {
+        // Diagnose and suggest using a `#/.../#` literal instead. We could
+        // suggest escaping, but that would be wrong if the user has written (?x).
+        // TODO: Should we suggest this for space-as-first character too?
+        Diags->diagnose(getSourceLoc(ContentEnd),
+                        diag::lex_regex_literal_invalid_ending_char,
+                        spaceOrTabDescription(*ContentEnd))
+            .fixItInsert(getSourceLoc(TokStart), "#")
+            .fixItInsert(getSourceLoc(Ptr), "#");
+      }
+    }
+
+    // If we're tentatively lexing `/.../`, scan to make sure we don't have any
+    // unbalanced ')'s. This helps avoid ambiguity with unapplied operator
+    // references e.g `reduce(1, /)` and `foo(/, 0) / 2`. This would be invalid
+    // regex syntax anyways. This ensures users can surround their operator ref
+    // in parens `(/)` to fix the issue. This also applies to prefix operators
+    // that can be disambiguated as e.g `(/S.foo)`. Note we need to track whether
+    // or not we're in a custom character class `[...]`, as parens are literal
+    // there.
+    if (!MustBeRegex) {
+      unsigned CharClassDepth = 0;
+      unsigned GroupDepth = 0;
+      for (auto *Cursor = TokStart + 1; Cursor < TokEnd; Cursor++) {
+        switch (*Cursor) {
+        case '\\':
+          // Skip over the next character of an escape.
+          Cursor++;
+          break;
+        case '(':
+          if (CharClassDepth == 0)
+            GroupDepth += 1;
           break;
+        case ')':
+          if (CharClassDepth != 0)
+            break;
 
-        // Invalid, so bail.
-        if (GroupDepth == 0)
-          return nullptr;
+          // Invalid, so bail.
+          if (GroupDepth == 0)
+            return nullptr;
 
-        GroupDepth -= 1;
-        break;
-      case '[':
-        CharClassDepth += 1;
-        break;
-      case ']':
-        if (CharClassDepth != 0)
-          CharClassDepth -= 1;
+          GroupDepth -= 1;
+          break;
+        case '[':
+          CharClassDepth += 1;
+          break;
+        case ']':
+          if (CharClassDepth != 0)
+            CharClassDepth -= 1;
+        }
       }
     }
   }
 
@@ -43,6 +43,7 @@ func f() {
   (/E.e).foo(/0)
 
   func foo<T, U>(_ x: T, _ y: U) {}
+  foo(/E.e, /E.e)
   foo((/E.e), /E.e)
   foo((/)(E.e), /E.e)
 
 
@@ -58,6 +58,28 @@ func m() {
 // Unbalanced `}`, make sure we don't consider the string literal `{`.
 func n() { / "{"}/ } // expected-error {{regex literal may not start with space; add backslash to escape}}
 
+func o() {
+  _ = {
+    0
+    /x}}} /
+    2
+  } // expected-error {{extraneous '}' at top level}}
+  // expected-error@-3 {{extraneous '}' at top level}}
+  // expected-error@-4 {{consecutive statements on a line must be separated by ';'}}
+  // expected-error@-5 {{unterminated regex literal}}
+  // expected-warning@-6 {{regular expression literal is unused}}
+  // expected-warning@-6 {{integer literal is unused}}
+} // expected-error {{extraneous '}' at top level}}
+
+func p() {
+  _ = 2
+  /x} /
+    .bitWidth
+  // expected-error@-2 {{consecutive statements on a line must be separated by ';'}}
+  // expected-error@-3 {{unterminated regex literal}}
+  // expected-error@-3 {{value of type 'Regex<Substring>' has no member 'bitWidth'}}
+} // expected-error {{extraneous '}' at top level}}
+
 func err1() { _ = / 0xG}/ }
 // expected-error@-1 {{regex literal may not start with space; add backslash to escape}}
 func err2() { _ = / 0oG}/ }
 
@@ -49,7 +49,7 @@ func i() {
 func j() {
   _ = {
     0
-    /x}}} /
+    /x}}}/ 
     2
   }
 }
@@ -69,7 +69,7 @@ func m() {
 }
 func n() {
   _ = 2
-  /x} /
+  /x}/
     .bitWidth
 }
 func o() {
@@ -105,10 +105,7 @@ enum E {
   func foo<T>(_ x: T) {}
 }
 
-func a6() {
-  func foo<T, U>(_ x: T, _ y: U) {}
-  foo(/E.e, /E.e) // expected-error {{expected ',' separator}}
-}
+func a7() { _ = /\/}/ }
 
 // Make sure we don't emit errors for these.
 func err1() { _ = /0xG/ }
Original file line number	Diff line number	Diff line change
`@@ -49,7 +49,7 @@ func i() {`
`49`	`49`	`func j() {`
`50`	`50`	`_ = {`
`51`	`51`	`0`
`52`		`- /x}}} /`
	`52`	`+ /x}}}/`
`53`	`53`	`2`
`54`	`54`	`}`
`55`	`55`	`}`
`@@ -69,7 +69,7 @@ func m() {`
`69`	`69`	`}`
`70`	`70`	`func n() {`
`71`	`71`	`_ = 2`
`72`		`- /x} /`
	`72`	`+ /x}/`
`73`	`73`	`.bitWidth`
`74`	`74`	`}`
`75`	`75`	`func o() {`
`@@ -105,10 +105,7 @@ enum E {`
`105`	`105`	`func foo<T>(_ x: T) {}`
`106`	`106`	`}`
`107`	`107`
`108`		`-func a6() {`
`109`		`- func foo<T, U>(_ x: T, _ y: U) {}`
`110`		`- foo(/E.e, /E.e) // expected-error {{expected ',' separator}}`
`111`		`-}`
	`108`	`+func a7() { _ = /\/}/ }`
`112`	`109`
`113`	`110`	`// Make sure we don't emit errors for these.`
`114`	`111`	`func err1() { _ = /0xG/ }`