@@ -1292,9 +1292,9 @@ static bool delimiterMatches(unsigned CustomDelimiterLen, const char *&BytesPtr,
1292
1292
1293
1293
// / lexCharacter - Read a character and return its UTF32 code. If this is the
1294
1294
// / end of enclosing string/character sequence (i.e. the character is equal to
1295
- // / 'StopQuote'), this returns ~0U and leaves 'CurPtr' pointing to the terminal
1296
- // / quote. If this is a malformed character sequence, it emits a diagnostic
1297
- // / (when EmitDiagnostics is true) and returns ~1U.
1295
+ // / 'StopQuote'), this returns ~0U and advances 'CurPtr' pointing to the end of
1296
+ // / terminal quote. If this is a malformed character sequence, it emits a
1297
+ // / diagnostic (when EmitDiagnostics is true) and returns ~1U.
1298
1298
// /
1299
1299
// / character_escape ::= [\][\] | [\]t | [\]n | [\]r | [\]" | [\]' | [\]0
1300
1300
// / character_escape ::= unicode_character_escape
@@ -1305,6 +1305,7 @@ unsigned Lexer::lexCharacter(const char *&CurPtr, char StopQuote,
1305
1305
1306
1306
switch (*CurPtr++) {
1307
1307
default : {// Normal characters are part of the string.
1308
+ // Normal characters are part of the string.
1308
1309
// If this is a "high" UTF-8 character, validate it.
1309
1310
if ((signed char )(CurPtr[-1 ]) >= 0 ) {
1310
1311
if (isPrintable (CurPtr[-1 ]) == 0 )
@@ -1322,14 +1323,26 @@ unsigned Lexer::lexCharacter(const char *&CurPtr, char StopQuote,
1322
1323
}
1323
1324
case ' "' :
1324
1325
case ' \' ' :
1325
- // If we found a closing quote character, we're done.
1326
1326
if (CurPtr[-1 ] == StopQuote) {
1327
- --CurPtr;
1327
+ // Mutliline and custom escaping are only enabled for " quote.
1328
+ if (LLVM_UNLIKELY (StopQuote != ' "' ))
1329
+ return ~0U ;
1330
+ if (!IsMultilineString && !CustomDelimiterLen)
1331
+ return ~0U ;
1332
+
1333
+ DiagnosticEngine *D = EmitDiagnostics ? Diags : nullptr ;
1334
+ auto TmpPtr = CurPtr;
1335
+ if (IsMultilineString && !advanceIfMultilineDelimiter (TmpPtr, D))
1336
+ return ' "' ;
1337
+ if (CustomDelimiterLen &&
1338
+ !delimiterMatches (CustomDelimiterLen, TmpPtr, D, /* IsClosing=*/ true ))
1339
+ return ' "' ;
1340
+ CurPtr = TmpPtr;
1328
1341
return ~0U ;
1329
1342
}
1330
1343
// Otherwise, this is just a character.
1331
1344
return CurPtr[-1 ];
1332
-
1345
+
1333
1346
case 0 :
1334
1347
if (CurPtr-1 != BufferEnd) {
1335
1348
if (EmitDiagnostics)
@@ -1738,10 +1751,12 @@ static void validateMultilineIndents(const Token &Str,
1738
1751
// / string_literal ::= ["]["]["].*["]["]["] - approximately
1739
1752
// / string_literal ::= (#+)("")?".*"(\2\1) - "raw" strings
1740
1753
void Lexer::lexStringLiteral (unsigned CustomDelimiterLen) {
1741
- const char *TokStart = CurPtr-1 ;
1742
- assert ((*TokStart == ' "' || *TokStart == ' \' ' ) && " Unexpected start" );
1754
+ const char QuoteChar = CurPtr[-1 ];
1755
+ const char *TokStart = CurPtr - 1 - CustomDelimiterLen;
1756
+
1743
1757
// NOTE: We only allow single-quote string literals so we can emit useful
1744
1758
// diagnostics about changing them to double quotes.
1759
+ assert ((QuoteChar == ' "' || QuoteChar == ' \' ' ) && " Unexpected start" );
1745
1760
1746
1761
bool wasErroneous = false , IsMultilineString = false ;
1747
1762
@@ -1774,23 +1789,26 @@ void Lexer::lexStringLiteral(unsigned CustomDelimiterLen) {
1774
1789
// String literals cannot have \n or \r in them (unless multiline).
1775
1790
if (((*CurPtr == ' \r ' || *CurPtr == ' \n ' ) && !IsMultilineString)
1776
1791
|| CurPtr == BufferEnd) {
1777
- TokStart -= CustomDelimiterLen;
1778
1792
diagnose (TokStart, diag::lex_unterminated_string);
1779
1793
return formToken (tok::unknown, TokStart);
1780
1794
}
1781
1795
1782
- unsigned CharValue = lexCharacter (CurPtr, *TokStart , true ,
1796
+ unsigned CharValue = lexCharacter (CurPtr, QuoteChar , true ,
1783
1797
IsMultilineString, CustomDelimiterLen);
1784
1798
wasErroneous |= CharValue == ~1U ;
1785
1799
1786
1800
// If this is the end of string, we are done. If it is a normal character
1787
1801
// or an already-diagnosed error, just munch it.
1788
1802
if (CharValue == ~0U ) {
1789
- ++CurPtr;
1790
1803
1791
- if (*TokStart == ' \' ' ) {
1792
- // Complain about single-quote string and suggest replacement with
1793
- // double-quoted equivalent.
1804
+ if (QuoteChar == ' \' ' ) {
1805
+ // Emit diagnostics for single-quote string and suggest replacement
1806
+ // with double-quoted equivalent.
1807
+ assert (
1808
+ !IsMultilineString && CustomDelimiterLen == 0 &&
1809
+ " Single quoted string cannot have custom delimitor, nor multiline" );
1810
+ assert (*TokStart == ' \' ' && CurPtr[-1 ] == ' \' ' );
1811
+
1794
1812
StringRef orig (TokStart, CurPtr - TokStart);
1795
1813
llvm::SmallString<32 > replacement;
1796
1814
replacement += ' "' ;
@@ -1823,15 +1841,11 @@ void Lexer::lexStringLiteral(unsigned CustomDelimiterLen) {
1823
1841
}
1824
1842
1825
1843
// Is this the end of multiline/custom-delimited string literal?
1826
- if ((!IsMultilineString || advanceIfMultilineDelimiter (CurPtr, Diags)) &&
1827
- delimiterMatches (CustomDelimiterLen, CurPtr, Diags, true )) {
1828
- TokStart -= CustomDelimiterLen;
1829
- if (wasErroneous)
1830
- return formToken (tok::unknown, TokStart);
1831
-
1832
- return formStringLiteralToken (TokStart, IsMultilineString,
1833
- CustomDelimiterLen);
1834
- }
1844
+ if (wasErroneous)
1845
+ return formToken (tok::unknown, TokStart);
1846
+
1847
+ return formStringLiteralToken (TokStart, IsMultilineString,
1848
+ CustomDelimiterLen);
1835
1849
}
1836
1850
}
1837
1851
}
0 commit comments