@@ -1122,6 +1122,27 @@ unsigned Lexer::lexUnicodeEscape(const char *&CurPtr, Lexer *Diags) {
1122
1122
return CharValue;
1123
1123
}
1124
1124
1125
+ // / maybeConsumeNewlineEscape - Check for valid elided newline escape and
1126
+ // / move pointer passed in to the character after the end of the line.
1127
+ static bool maybeConsumeNewlineEscape (const char *&CurPtr, ssize_t Offset) {
1128
+ const char *TmpPtr = CurPtr + Offset;
1129
+ while (true ) {
1130
+ switch (*TmpPtr++) {
1131
+ case ' ' : case ' \t ' :
1132
+ continue ;
1133
+ case ' \r ' :
1134
+ if (*TmpPtr == ' \n ' )
1135
+ TmpPtr++;
1136
+ LLVM_FALLTHROUGH;
1137
+ case ' \n ' :
1138
+ CurPtr = TmpPtr;
1139
+ return true ;
1140
+ case 0 :
1141
+ default :
1142
+ return false ;
1143
+ }
1144
+ }
1145
+ }
1125
1146
1126
1147
// / lexCharacter - Read a character and return its UTF32 code. If this is the
1127
1148
// / end of enclosing string/character sequence (i.e. the character is equal to
@@ -1187,6 +1208,10 @@ unsigned Lexer::lexCharacter(const char *&CurPtr, char StopQuote,
1187
1208
unsigned CharValue = 0 ;
1188
1209
// Escape processing. We already ate the "\".
1189
1210
switch (*CurPtr) {
1211
+ case ' ' : case ' \t ' : case ' \n ' : case ' \r ' :
1212
+ if (MultilineString && maybeConsumeNewlineEscape (CurPtr, 0 ))
1213
+ return ' \n ' ;
1214
+ LLVM_FALLTHROUGH;
1190
1215
default : // Invalid escape.
1191
1216
if (EmitDiagnostics)
1192
1217
diagnose (CurPtr, diag::lex_invalid_escape);
@@ -1313,7 +1338,11 @@ static const char *skipToEndOfInterpolatedExpression(const char *CurPtr,
1313
1338
// Entering a recursive interpolated expression
1314
1339
OpenDelimiters.push_back (' (' );
1315
1340
continue ;
1316
- case ' \n ' : case ' \r ' : case 0 :
1341
+ case ' \n ' : case ' \r ' :
1342
+ if (AllowNewline.back ())
1343
+ continue ;
1344
+ LLVM_FALLTHROUGH;
1345
+ case 0 :
1317
1346
// Don't jump over newline/EOF due to preceding backslash!
1318
1347
return CurPtr-1 ;
1319
1348
default :
@@ -1816,12 +1845,14 @@ StringRef Lexer::getEncodedStringSegment(StringRef Bytes,
1816
1845
// we know that there is a terminating " character. Use BytesPtr to avoid a
1817
1846
// range check subscripting on the StringRef.
1818
1847
const char *BytesPtr = Bytes.begin ();
1848
+ bool IsEscapedNewline = false ;
1819
1849
while (BytesPtr < Bytes.end ()) {
1820
1850
char CurChar = *BytesPtr++;
1821
1851
1822
1852
// Multiline string line ending normalization and indent stripping.
1823
1853
if (CurChar == ' \r ' || CurChar == ' \n ' ) {
1824
- bool stripNewline = IsFirstSegment && BytesPtr - 1 == Bytes.begin ();
1854
+ bool stripNewline = IsEscapedNewline ||
1855
+ (IsFirstSegment && BytesPtr - 1 == Bytes.begin ());
1825
1856
if (CurChar == ' \r ' && *BytesPtr == ' \n ' )
1826
1857
BytesPtr++;
1827
1858
if (*BytesPtr != ' \r ' && *BytesPtr != ' \n ' )
@@ -1830,6 +1861,7 @@ StringRef Lexer::getEncodedStringSegment(StringRef Bytes,
1830
1861
stripNewline = true ;
1831
1862
if (!stripNewline)
1832
1863
TempString.push_back (' \n ' );
1864
+ IsEscapedNewline = false ;
1833
1865
continue ;
1834
1866
}
1835
1867
@@ -1854,6 +1886,12 @@ StringRef Lexer::getEncodedStringSegment(StringRef Bytes,
1854
1886
case ' \' ' : TempString.push_back (' \' ' ); continue ;
1855
1887
case ' \\ ' : TempString.push_back (' \\ ' ); continue ;
1856
1888
1889
+ case ' ' : case ' \t ' : case ' \n ' : case ' \r ' :
1890
+ if (maybeConsumeNewlineEscape (BytesPtr, -1 )) {
1891
+ IsEscapedNewline = true ;
1892
+ BytesPtr--;
1893
+ }
1894
+ continue ;
1857
1895
1858
1896
// String interpolation.
1859
1897
case ' (' :
0 commit comments