swiftlang · johnno1962 · Jul 2, 2018
diff --git a/include/swift/AST/DiagnosticsParse.def b/include/swift/AST/DiagnosticsParse.def
@@ -138,6 +138,10 @@ ERROR(lex_invalid_u_escape,none,
       "\\u{...} escape sequence expects between 1 and 8 hex digits", ())
 ERROR(lex_invalid_u_escape_rbrace,none,
       "expected '}' in \\u{...} escape sequence", ())
+ERROR(lex_invalid_escape_delimiter,none,
+      "too many '#' characters in delimited escape", ())
+ERROR(lex_invalid_closing_delimiter,none,
+      "too many '#' characters in closing delimiter", ())
 
 ERROR(lex_invalid_unicode_scalar,none,
       "invalid unicode scalar", ())
@@ -1302,7 +1306,9 @@ ERROR(swift_native_objc_runtime_base_must_be_identifier,none,
       "@_swift_native_objc_runtime_base class name must be an identifier", ())
 
 ERROR(attr_interpolated_string,none,
-"%0 cannot be an interpolated string literal", (StringRef))
+"'%0' cannot be an interpolated string literal", (StringRef))
+ERROR(attr_extended_escaping_string,none,
+"'%0' cannot be an extended escaping string literal", (StringRef))
 
 ERROR(attr_only_at_non_local_scope, none,
       "attribute '%0' can only be used in a non-local scope", (StringRef))

diff --git a/include/swift/Parse/Lexer.h b/include/swift/Parse/Lexer.h
@@ -346,19 +346,21 @@ class Lexer {
     enum : char { Literal, Expr } Kind;
     // Loc+Length for the segment inside the string literal, without quotes.
     SourceLoc Loc;
-    unsigned Length, IndentToStrip;
+    unsigned Length, IndentToStrip, CustomDelimiterLen;
     bool IsFirstSegment, IsLastSegment;
 
     static StringSegment getLiteral(SourceLoc Loc, unsigned Length,
                                     bool IsFirstSegment, bool IsLastSegment,
-                                    unsigned IndentToStrip) {
+                                    unsigned IndentToStrip,
+                                    unsigned CustomDelimiterLen) {
       StringSegment Result;
       Result.Kind = Literal;
       Result.Loc = Loc;
       Result.Length = Length;
       Result.IsFirstSegment = IsFirstSegment;
       Result.IsLastSegment = IsLastSegment;
       Result.IndentToStrip = IndentToStrip;
+      Result.CustomDelimiterLen = CustomDelimiterLen;
       return Result;
     }
 
@@ -370,6 +372,7 @@ class Lexer {
       Result.IsFirstSegment = false;
       Result.IsLastSegment = false;
       Result.IndentToStrip = 0;
+      Result.CustomDelimiterLen = 0;
       return Result;
     }
 
@@ -378,21 +381,50 @@ class Lexer {
     }
 
   };
-
+
+  /// Implementation of getEncodedStringSegment. Note that \p Str must support
+  /// reading one byte past the end.
+  static StringRef getEncodedStringSegmentImpl(StringRef Str,
+                                               SmallVectorImpl<char> &Buffer,
+                                               bool IsFirstSegment,
+                                               bool IsLastSegment,
+                                               unsigned IndentToStrip,
+                                               unsigned CustomDelimiterLen);
+
   /// \brief Compute the bytes that the actual string literal should codegen to.
   /// If a copy needs to be made, it will be allocated out of the provided
-  /// Buffer.
-  static StringRef getEncodedStringSegment(StringRef Str,
-                                           SmallVectorImpl<char> &Buffer,
-                                           bool IsFirstSegment = false,
-                                           bool IsLastSegment = false,
-                                           unsigned IndentToStrip = 0);
+  /// \p Buffer.
   StringRef getEncodedStringSegment(StringSegment Segment,
                                     SmallVectorImpl<char> &Buffer) const {
-    return getEncodedStringSegment(
+    return getEncodedStringSegmentImpl(
         StringRef(getBufferPtrForSourceLoc(Segment.Loc), Segment.Length),
         Buffer, Segment.IsFirstSegment, Segment.IsLastSegment,
-        Segment.IndentToStrip);
+        Segment.IndentToStrip, Segment.CustomDelimiterLen);
+  }
+
+  /// \brief Given a string encoded with escapes like a string literal, compute
+  /// the byte content.
+  ///
+  /// If a copy needs to be made, it will be allocated out of the provided
+  /// \p Buffer.
+  static StringRef getEncodedStringSegment(StringRef Str,
+                                           SmallVectorImpl<char> &Buffer,
+                                           bool IsFirstSegment = false,
+                                           bool IsLastSegment = false,
+                                           unsigned IndentToStrip = 0,
+                                           unsigned CustomDelimiterLen = 0) {
+    SmallString<128> TerminatedStrBuf(Str);
+    TerminatedStrBuf.push_back('\0');
+    StringRef TerminatedStr = StringRef(TerminatedStrBuf).drop_back();
+    StringRef Result = getEncodedStringSegmentImpl(TerminatedStr, Buffer,
+                                                   IsFirstSegment,
+                                                   IsLastSegment,
+                                                   IndentToStrip,
+                                                   CustomDelimiterLen);
+    if (Result == TerminatedStr)
+      return Str;
+    assert(Result.data() == Buffer.data());
+    return Result;
   }
 
   /// \brief Given a string literal token, separate it into string/expr segments
@@ -456,7 +488,8 @@ class Lexer {
     return diagnose(Loc, Diagnostic(DiagID, std::forward<ArgTypes>(Args)...));
   }
 
-  void formToken(tok Kind, const char *TokStart, bool MultilineString = false);
+  void formToken(tok Kind, const char *TokStart, bool IsMultilineString = false,
+                 unsigned CustomDelimiterLen = 0);
   void formEscapedIdentifierToken(const char *TokStart);
 
   /// Advance to the end of the line.
@@ -480,10 +513,10 @@ class Lexer {
   void lexTrivia(syntax::Trivia &T, bool IsForTrailingTrivia);
   static unsigned lexUnicodeEscape(const char *&CurPtr, Lexer *Diags);
 
-  unsigned lexCharacter(const char *&CurPtr,
-                        char StopQuote, bool EmitDiagnostics,
-                        bool MultilineString = false);
-  void lexStringLiteral();
+  unsigned lexCharacter(const char *&CurPtr, char StopQuote,
+                        bool EmitDiagnostics, bool IsMultilineString = false,
+                        unsigned CustomDelimiterLen = 0);
+  void lexStringLiteral(unsigned CustomDelimiterLen = 0);
   void lexEscapedIdentifier();
 
   void tryLexEditorPlaceholder();

diff --git a/include/swift/Parse/Token.h b/include/swift/Parse/Token.h
@@ -45,7 +45,10 @@ class Token {
   /// Modifiers for string literals
   unsigned MultilineString : 1;
 
-  // Padding bits == 32 - sizeof(Kind) * 8 - 3;
+  /// Length of custom delimiter of "raw" string literals
+  unsigned CustomDelimiterLen : 8;
+
+  // Padding bits == 32 - 11;
 
   /// \brief The length of the comment that precedes the token.
   unsigned CommentLength;
@@ -62,8 +65,8 @@ class Token {
 public:
   Token(tok Kind, StringRef Text, unsigned CommentLength = 0)
           : Kind(Kind), AtStartOfLine(false), EscapedIdentifier(false),
-            MultilineString(false), CommentLength(CommentLength),
-            Text(Text) {}
+            MultilineString(false), CustomDelimiterLen(0),
+            CommentLength(CommentLength), Text(Text) {}
 
   Token() : Token(tok::NUM_TOKENS, {}, 0) {}
 
@@ -266,17 +269,24 @@ class Token {
 
   /// \brief Set the token to the specified kind and source range.
   void setToken(tok K, StringRef T, unsigned CommentLength = 0,
-                bool MultilineString = false) {
+                bool IsMultilineString = false, unsigned CustomDelimiterLen = 0) {
     Kind = K;
     Text = T;
     this->CommentLength = CommentLength;
     EscapedIdentifier = false;
-    this->MultilineString = MultilineString;
+    this->MultilineString = IsMultilineString;
+    this->CustomDelimiterLen = CustomDelimiterLen;
+    assert(this->CustomDelimiterLen == CustomDelimiterLen &&
+           "custom string delimiter length > 255");
   }
 
-  bool IsMultilineString() const {
+  bool isMultilineString() const {
     return MultilineString;
   }
+
+  unsigned getCustomDelimiterLen() const {
+    return CustomDelimiterLen;
+  }
 };
 
 } // end namespace swift