@@ -1189,6 +1189,27 @@ unsigned Lexer::lexUnicodeEscape(const char *&CurPtr, Lexer *Diags) {
1189
1189
return CharValue;
1190
1190
}
1191
1191
1192
+ // / maybeConsumeNewlineEscape - Check for valid elided newline escape and
1193
+ // / move pointer passed in to the character after the end of the line.
1194
+ static bool maybeConsumeNewlineEscape (const char *&CurPtr, ssize_t Offset) {
1195
+ const char *TmpPtr = CurPtr + Offset;
1196
+ while (true ) {
1197
+ switch (*TmpPtr++) {
1198
+ case ' ' : case ' \t ' :
1199
+ continue ;
1200
+ case ' \r ' :
1201
+ if (*TmpPtr == ' \n ' )
1202
+ TmpPtr++;
1203
+ LLVM_FALLTHROUGH;
1204
+ case ' \n ' :
1205
+ CurPtr = TmpPtr;
1206
+ return true ;
1207
+ case 0 :
1208
+ default :
1209
+ return false ;
1210
+ }
1211
+ }
1212
+ }
1192
1213
1193
1214
// / lexCharacter - Read a character and return its UTF32 code. If this is the
1194
1215
// / end of enclosing string/character sequence (i.e. the character is equal to
@@ -1254,6 +1275,10 @@ unsigned Lexer::lexCharacter(const char *&CurPtr, char StopQuote,
1254
1275
unsigned CharValue = 0 ;
1255
1276
// Escape processing. We already ate the "\".
1256
1277
switch (*CurPtr) {
1278
+ case ' ' : case ' \t ' : case ' \n ' : case ' \r ' :
1279
+ if (MultilineString && maybeConsumeNewlineEscape (CurPtr, 0 ))
1280
+ return ' \n ' ;
1281
+ LLVM_FALLTHROUGH;
1257
1282
default : // Invalid escape.
1258
1283
if (EmitDiagnostics)
1259
1284
diagnose (CurPtr, diag::lex_invalid_escape);
@@ -1380,7 +1405,11 @@ static const char *skipToEndOfInterpolatedExpression(const char *CurPtr,
1380
1405
// Entering a recursive interpolated expression
1381
1406
OpenDelimiters.push_back (' (' );
1382
1407
continue ;
1383
- case ' \n ' : case ' \r ' : case 0 :
1408
+ case ' \n ' : case ' \r ' :
1409
+ if (AllowNewline.back ())
1410
+ continue ;
1411
+ LLVM_FALLTHROUGH;
1412
+ case 0 :
1384
1413
// Don't jump over newline/EOF due to preceding backslash!
1385
1414
return CurPtr-1 ;
1386
1415
default :
@@ -1883,12 +1912,14 @@ StringRef Lexer::getEncodedStringSegment(StringRef Bytes,
1883
1912
// we know that there is a terminating " character. Use BytesPtr to avoid a
1884
1913
// range check subscripting on the StringRef.
1885
1914
const char *BytesPtr = Bytes.begin ();
1915
+ bool IsEscapedNewline = false ;
1886
1916
while (BytesPtr < Bytes.end ()) {
1887
1917
char CurChar = *BytesPtr++;
1888
1918
1889
1919
// Multiline string line ending normalization and indent stripping.
1890
1920
if (CurChar == ' \r ' || CurChar == ' \n ' ) {
1891
- bool stripNewline = IsFirstSegment && BytesPtr - 1 == Bytes.begin ();
1921
+ bool stripNewline = IsEscapedNewline ||
1922
+ (IsFirstSegment && BytesPtr - 1 == Bytes.begin ());
1892
1923
if (CurChar == ' \r ' && *BytesPtr == ' \n ' )
1893
1924
BytesPtr++;
1894
1925
if (*BytesPtr != ' \r ' && *BytesPtr != ' \n ' )
@@ -1897,6 +1928,7 @@ StringRef Lexer::getEncodedStringSegment(StringRef Bytes,
1897
1928
stripNewline = true ;
1898
1929
if (!stripNewline)
1899
1930
TempString.push_back (' \n ' );
1931
+ IsEscapedNewline = false ;
1900
1932
continue ;
1901
1933
}
1902
1934
@@ -1921,6 +1953,12 @@ StringRef Lexer::getEncodedStringSegment(StringRef Bytes,
1921
1953
case ' \' ' : TempString.push_back (' \' ' ); continue ;
1922
1954
case ' \\ ' : TempString.push_back (' \\ ' ); continue ;
1923
1955
1956
+ case ' ' : case ' \t ' : case ' \n ' : case ' \r ' :
1957
+ if (maybeConsumeNewlineEscape (BytesPtr, -1 )) {
1958
+ IsEscapedNewline = true ;
1959
+ BytesPtr--;
1960
+ }
1961
+ continue ;
1924
1962
1925
1963
// String interpolation.
1926
1964
case ' (' :
0 commit comments