swiftlang · hamishknight · Jun 15, 2022 · May 26, 2022 · milseman · Jun 1, 2022
diff --git a/Sources/_RegexParser/Regex/Parse/Diagnostics.swift b/Sources/_RegexParser/Regex/Parse/Diagnostics.swift
@@ -44,6 +44,8 @@ enum ParseError: Error, Hashable {
   case invalidEscape(Character)
   case confusableCharacter(Character)
 
+  case quoteMayNotSpanMultipleLines
+
   case cannotReferToWholePattern
 
   case quantifierRequiresOperand(String)
@@ -138,6 +140,8 @@ extension ParseError: CustomStringConvertible {
       return "invalid escape sequence '\\\(c)'"
     case .confusableCharacter(let c):
       return "'\(c)' is confusable for a metacharacter; use '\\u{...}' instead"
+    case .quoteMayNotSpanMultipleLines:
+      return "quoted sequence may not span multiple lines in multi-line literal"
     case .cannotReferToWholePattern:
       return "cannot refer to whole pattern here"
     case .quantifierRequiresOperand(let q):

diff --git a/Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift b/Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift
@@ -579,7 +579,7 @@ extension Source {
 
   /// Try to consume quoted content
   ///
-  ///     Quote -> '\Q' (!'\E' .)* '\E'
+  ///     Quote -> '\Q' (!'\E' .)* '\E'?
   ///
   /// With `SyntaxOptions.experimentalQuotes`, also accepts
   ///
@@ -592,9 +592,24 @@ extension Source {
   mutating func lexQuote(context: ParsingContext) throws -> AST.Quote? {
     let str = try recordLoc { src -> String? in
       if src.tryEat(sequence: #"\Q"#) {
-        return try src.expectQuoted(endingWith: #"\E"#).value
+        let contents = src.lexUntil { src in
+          src.isEmpty || src.tryEat(sequence: #"\E"#)
+        }.value
+
+        // In multi-line literals, the quote may not span multiple lines.
+        if context.syntax.contains(.multilineExtendedSyntax),
+            contents.spansMultipleLinesInRegexLiteral {
+          throw ParseError.quoteMayNotSpanMultipleLines
+        }
+
+        // The sequence must not be empty in a custom character class.
+        if context.isInCustomCharacterClass && contents.isEmpty {
+          throw ParseError.expectedNonEmptyContents
+        }
+        return contents
       }
       if context.experimentalQuotes, src.tryEat("\"") {
+        // TODO: Can experimental quotes be empty?
         return try src.expectQuoted(endingWith: "\"", ignoreEscaped: true).value
       }
       return nil

diff --git a/Sources/_RegexParser/Regex/Parse/Parse.swift b/Sources/_RegexParser/Regex/Parse/Parse.swift
@@ -592,6 +592,13 @@ public func parse<S: StringProtocol>(
   return ast
 }
 
+extension String {
+  /// Whether the given string is considered multi-line for a regex literal.
+  var spansMultipleLinesInRegexLiteral: Bool {
+    unicodeScalars.contains(where: { $0 == "\n" || $0 == "\r" })
+  }
+}
+
 /// Retrieve the default set of syntax options that a delimiter and literal
 /// contents indicates.
 fileprivate func defaultSyntaxOptions(
@@ -601,8 +608,7 @@ fileprivate func defaultSyntaxOptions(
   case .forwardSlash:
     // For an extended syntax forward slash e.g #/.../#, extended syntax is
     // permitted if it spans multiple lines.
-    if delim.poundCount > 0 &&
-        contents.unicodeScalars.contains(where: { $0 == "\n" || $0 == "\r" }) {
+    if delim.poundCount > 0 && contents.spansMultipleLinesInRegexLiteral {
       return .multilineExtendedSyntax
     }
     return .traditional

diff --git a/Tests/RegexTests/ParseTests.swift b/Tests/RegexTests/ParseTests.swift
@@ -754,6 +754,14 @@ extension RegexTests {
     // This follows the PCRE behavior.
     parseTest(#"\Q\\E"#, quote("\\"))
 
+    // ICU allows quotes to be empty outside of custom character classes.
+    parseTest(#"\Q\E"#, quote(""))
+
+    // Quotes may be unterminated.
+    parseTest(#"\Qab"#, quote("ab"))
+    parseTest(#"\Q"#, quote(""))
+    parseTest("\\Qab\\", quote("ab\\"))
+
     parseTest(#"a" ."b"#, concat("a", quote(" ."), "b"),
               syntax: .experimental)
     parseTest(#"a" .""b""#, concat("a", quote(" ."), quote("b")),
@@ -2539,8 +2547,6 @@ extension RegexTests {
     diagnosticTest(#"(?P"#, .expected(")"))
     diagnosticTest(#"(?R"#, .expected(")"))
 
-    diagnosticTest(#"\Qab"#, .expected("\\E"))
-    diagnosticTest("\\Qab\\", .expected("\\E"))
     diagnosticTest(#""ab"#, .expected("\""), syntax: .experimental)
     diagnosticTest(#""ab\""#, .expected("\""), syntax: .experimental)
     diagnosticTest("\"ab\\", .expectedEscape, syntax: .experimental)
@@ -2619,6 +2625,9 @@ extension RegexTests {
     // TODO: Custom diagnostic for missing '\Q'
     diagnosticTest(#"\E"#, .invalidEscape("E"))
 
+    diagnosticTest(#"[\Q\E]"#, .expectedNonEmptyContents)
+    diagnosticTest(#"[\Q]"#, .expected("]"))
+
     // PCRE treats these as octal, but we require a `0` prefix.
     diagnosticTest(#"[\1]"#, .invalidEscape("1"))
     diagnosticTest(#"[\123]"#, .invalidEscape("1"))
@@ -2711,6 +2720,26 @@ extension RegexTests {
       """, .cannotRemoveExtendedSyntaxInMultilineMode
     )
 
+    diagnosticWithDelimitersTest(#"""
+      #/
+      \Q
+      \E
+      /#
+      """#, .quoteMayNotSpanMultipleLines)
+
+    diagnosticWithDelimitersTest(#"""
+      #/
+        \Qabc
+          \E
+      /#
+      """#, .quoteMayNotSpanMultipleLines)
+
+    diagnosticWithDelimitersTest(#"""
+      #/
+        \Q
+      /#
+      """#, .quoteMayNotSpanMultipleLines)
+
     // MARK: Group specifiers
 
     diagnosticTest(#"(*"#, .unknownGroupKind("*"))