[Parse] An implementation for SE-0182

johnno1962 · DougGregor · commit ebffdd149fc5 · 2017-07-25T16:40:10.000-07:00
(cherry picked from commit c0fcc1a)
diff --git a/lib/Parse/Lexer.cpp b/lib/Parse/Lexer.cpp
@@ -1122,6 +1122,27 @@ unsigned Lexer::lexUnicodeEscape(const char *&CurPtr, Lexer *Diags) {
   return CharValue;
 }
 
+/// maybeConsumeNewlineEscape - Check for valid elided newline escape and
+/// move pointer passed in to the character after the end of the line.
+static bool maybeConsumeNewlineEscape(const char *&CurPtr, ssize_t Offset) {
+  const char *TmpPtr = CurPtr + Offset;
+  while (true) {
+    switch (*TmpPtr++) {
+    case ' ': case '\t':
+      continue;
+    case '\r':
+      if (*TmpPtr == '\n')
+        TmpPtr++;
+      LLVM_FALLTHROUGH;
+    case '\n':
+      CurPtr = TmpPtr;
+      return true;
+    case 0:
+    default:
+      return false;
+    }
+  }
+}
 
 /// lexCharacter - Read a character and return its UTF32 code.  If this is the
 /// end of enclosing string/character sequence (i.e. the character is equal to
@@ -1187,6 +1208,10 @@ unsigned Lexer::lexCharacter(const char *&CurPtr, char StopQuote,
   unsigned CharValue = 0;
   // Escape processing.  We already ate the "\".
   switch (*CurPtr) {
+  case ' ': case '\t': case '\n': case '\r':
+    if (MultilineString && maybeConsumeNewlineEscape(CurPtr, 0))
+      return '\n';
+    LLVM_FALLTHROUGH;
   default:  // Invalid escape.
     if (EmitDiagnostics)
       diagnose(CurPtr, diag::lex_invalid_escape);
@@ -1313,7 +1338,11 @@ static const char *skipToEndOfInterpolatedExpression(const char *CurPtr,
           // Entering a recursive interpolated expression
           OpenDelimiters.push_back('(');
           continue;
-        case '\n': case '\r': case 0:
+        case '\n': case '\r':
+          if (AllowNewline.back())
+            continue;
+          LLVM_FALLTHROUGH;
+        case 0:
           // Don't jump over newline/EOF due to preceding backslash!
           return CurPtr-1;
         default:
@@ -1816,12 +1845,14 @@ StringRef Lexer::getEncodedStringSegment(StringRef Bytes,
   // we know that there is a terminating " character.  Use BytesPtr to avoid a
   // range check subscripting on the StringRef.
   const char *BytesPtr = Bytes.begin();
+  bool IsEscapedNewline = false;
   while (BytesPtr < Bytes.end()) {
     char CurChar = *BytesPtr++;
 
     // Multiline string line ending normalization and indent stripping.
     if (CurChar == '\r' || CurChar == '\n') {
-      bool stripNewline = IsFirstSegment && BytesPtr - 1 == Bytes.begin();
+      bool stripNewline = IsEscapedNewline ||
+        (IsFirstSegment && BytesPtr - 1 == Bytes.begin());
       if (CurChar == '\r' && *BytesPtr == '\n')
         BytesPtr++;
       if (*BytesPtr != '\r' && *BytesPtr != '\n')
@@ -1830,6 +1861,7 @@ StringRef Lexer::getEncodedStringSegment(StringRef Bytes,
         stripNewline = true;
       if (!stripNewline)
         TempString.push_back('\n');
+      IsEscapedNewline = false;
       continue;
     }
 
@@ -1854,6 +1886,12 @@ StringRef Lexer::getEncodedStringSegment(StringRef Bytes,
     case '\'': TempString.push_back('\''); continue;
     case '\\': TempString.push_back('\\'); continue;
 
+    case ' ': case '\t': case '\n': case '\r':
+      if (maybeConsumeNewlineEscape(BytesPtr, -1)) {
+        IsEscapedNewline = true;
+        BytesPtr--;
+      }
+      continue;
 
     // String interpolation.
     case '(':
diff --git a/test/Parse/multiline_errors.swift b/test/Parse/multiline_errors.swift
@@ -120,3 +120,9 @@ _ = "hello\(
             """)!"
             // expected-error@-4 {{unterminated string literal}}
             // expected-error@-2 {{unterminated string literal}}
+
+_ = """
+  line one \ non-whitepace
+  line two
+  """
+  // expected-error@-3 {{invalid escape sequence in literal}}
diff --git a/test/Parse/multiline_string.swift b/test/Parse/multiline_string.swift
@@ -108,6 +108,51 @@ _ = """
 	"""
 // CHECK: "Twelve\nNu"
 
+_ = """
+  newline \
+  elided
+  """
+// CHECK: "newline elided"
+
+// contains trailing whitepsace
+_ = """
+  trailing \
+  \("""
+    substring1 \
+    \("""
+      substring2 \          
+      substring3
+      """)\
+    """) \
+  whitepsace
+  """
+// CHECK: "trailing "
+// CHECK: "substring1 "
+// CHECK: "substring2 substring3"
+// CHECK: " whitepsace"
+
+// contains trailing whitepsace
+_ = """
+    foo\ 
+
+    bar
+    """
+// CHECK: "foo\nbar"
+
+// contains trailing whitepsace
+_ = """
+    foo\ 
+    
+    bar
+    """
+// CHECK: "foo\nbar"
+
+_ = """
+    foo \
+      bar
+    """
+// CHECK: "foo   bar"
+
 _ = """
 
   ABC