Skip to content

Commit 1d2c426

Browse files
committed
Distinguish raw stringd from multiline delimiters [SR-10011]
1 parent 64ec60b commit 1d2c426

File tree

1 file changed

+59
-17
lines changed

1 file changed

+59
-17
lines changed

lib/Parse/Lexer.cpp

Lines changed: 59 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1241,19 +1241,6 @@ static bool diagnoseZeroWidthMatchAndAdvance(char Target, const char *&CurPtr,
12411241
return *CurPtr == Target && CurPtr++;
12421242
}
12431243

1244-
/// advanceIfMultilineDelimiter - Centralized check for multiline delimiter.
1245-
static bool advanceIfMultilineDelimiter(const char *&CurPtr,
1246-
DiagnosticEngine *Diags) {
1247-
const char *TmpPtr = CurPtr;
1248-
if (*(TmpPtr - 1) == '"' &&
1249-
diagnoseZeroWidthMatchAndAdvance('"', TmpPtr, Diags) &&
1250-
diagnoseZeroWidthMatchAndAdvance('"', TmpPtr, Diags)) {
1251-
CurPtr = TmpPtr;
1252-
return true;
1253-
}
1254-
return false;
1255-
}
1256-
12571244
/// advanceIfCustomDelimiter - Extracts/detects any custom delimiter on
12581245
/// opening a string literal, advances CurPtr if a delimiter is found and
12591246
/// returns a non-zero delimiter length. CurPtr[-1] must be '#' when called.
@@ -1300,6 +1287,37 @@ static bool delimiterMatches(unsigned CustomDelimiterLen, const char *&BytesPtr,
13001287
return true;
13011288
}
13021289

1290+
/// advanceIfMultilineDelimiter - Centralized check for multiline delimiter.
1291+
static bool advanceIfMultilineDelimiter(unsigned CustomDelimiterLen,
1292+
const char *&CurPtr,
1293+
DiagnosticEngine *Diags,
1294+
bool IsOpening = false) {
1295+
1296+
// Test for single-line string literals that resemble multiline delimiter.
1297+
const char *TmpPtr = CurPtr + 1;
1298+
if (IsOpening && CustomDelimiterLen) {
1299+
while (*TmpPtr != '\r' && *TmpPtr != '\n') {
1300+
if (*TmpPtr == '"') {
1301+
if (delimiterMatches(CustomDelimiterLen, ++TmpPtr, nullptr)) {
1302+
return false;
1303+
}
1304+
continue;
1305+
}
1306+
++TmpPtr;
1307+
}
1308+
}
1309+
1310+
TmpPtr = CurPtr;
1311+
if (*(TmpPtr - 1) == '"' &&
1312+
diagnoseZeroWidthMatchAndAdvance('"', TmpPtr, Diags) &&
1313+
diagnoseZeroWidthMatchAndAdvance('"', TmpPtr, Diags)) {
1314+
CurPtr = TmpPtr;
1315+
return true;
1316+
}
1317+
1318+
return false;
1319+
}
1320+
13031321
/// lexCharacter - Read a character and return its UTF32 code. If this is the
13041322
/// end of enclosing string/character sequence (i.e. the character is equal to
13051323
/// 'StopQuote'), this returns ~0U and advances 'CurPtr' pointing to the end of
@@ -1342,7 +1360,8 @@ unsigned Lexer::lexCharacter(const char *&CurPtr, char StopQuote,
13421360

13431361
DiagnosticEngine *D = EmitDiagnostics ? Diags : nullptr;
13441362
auto TmpPtr = CurPtr;
1345-
if (IsMultilineString && !advanceIfMultilineDelimiter(TmpPtr, D))
1363+
if (IsMultilineString &&
1364+
!advanceIfMultilineDelimiter(CustomDelimiterLen, TmpPtr, D))
13461365
return '"';
13471366
if (CustomDelimiterLen &&
13481367
!delimiterMatches(CustomDelimiterLen, TmpPtr, D, /*IsClosing=*/true))
@@ -1478,7 +1497,9 @@ static const char *skipToEndOfInterpolatedExpression(const char *CurPtr,
14781497
if (!inStringLiteral()) {
14791498
// Open string literal.
14801499
OpenDelimiters.push_back(CurPtr[-1]);
1481-
AllowNewline.push_back(advanceIfMultilineDelimiter(CurPtr, nullptr));
1500+
AllowNewline.push_back(advanceIfMultilineDelimiter(CustomDelimiterLen,
1501+
CurPtr, nullptr,
1502+
true));
14821503
CustomDelimiter.push_back(CustomDelimiterLen);
14831504
continue;
14841505
}
@@ -1490,7 +1511,8 @@ static const char *skipToEndOfInterpolatedExpression(const char *CurPtr,
14901511
continue;
14911512

14921513
// Multi-line string can only be closed by '"""'.
1493-
if (AllowNewline.back() && !advanceIfMultilineDelimiter(CurPtr, nullptr))
1514+
if (AllowNewline.back() &&
1515+
!advanceIfMultilineDelimiter(CustomDelimiterLen, CurPtr, nullptr))
14941516
continue;
14951517

14961518
// Check whether we have equivalent number of '#'s.
@@ -1827,7 +1849,27 @@ void Lexer::lexStringLiteral(unsigned CustomDelimiterLen) {
18271849
// diagnostics about changing them to double quotes.
18281850
assert((QuoteChar == '"' || QuoteChar == '\'') && "Unexpected start");
18291851

1830-
bool IsMultilineString = advanceIfMultilineDelimiter(CurPtr, Diags);
1852+
bool IsMultilineString = advanceIfMultilineDelimiter(CustomDelimiterLen,
1853+
CurPtr, Diags, true);
1854+
1855+
// Test for single-line string literals that may resemble multiline delimiter.
1856+
if (IsMultilineString && CustomDelimiterLen &&
1857+
*CurPtr != '\n' && *CurPtr != '\r') {
1858+
const char *TmpPtr = CurPtr-1;
1859+
while (*TmpPtr != '\r' && *TmpPtr != '\n') {
1860+
if (*TmpPtr == '"') {
1861+
if (delimiterMatches(CustomDelimiterLen, ++TmpPtr, nullptr)) {
1862+
// Undo effects from falsely detecting multiline delimiter.
1863+
CurPtr = CurPtr - 2;
1864+
IsMultilineString = false;
1865+
break;
1866+
}
1867+
continue;
1868+
}
1869+
++TmpPtr;
1870+
}
1871+
}
1872+
18311873
if (IsMultilineString && *CurPtr != '\n' && *CurPtr != '\r')
18321874
diagnose(CurPtr, diag::lex_illegal_multiline_string_start)
18331875
.fixItInsert(Lexer::getSourceLoc(CurPtr), "\n");

0 commit comments

Comments
 (0)