Skip to content

Commit ba51727

Browse files
committed
[Lexer] Advance pointer to the end of end-quote in lexCharacter
This simplifies main lexStringLiteral loop
1 parent 893524e commit ba51727

File tree

1 file changed

+37
-23
lines changed

1 file changed

+37
-23
lines changed

lib/Parse/Lexer.cpp

Lines changed: 37 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1292,9 +1292,9 @@ static bool delimiterMatches(unsigned CustomDelimiterLen, const char *&BytesPtr,
12921292

12931293
/// lexCharacter - Read a character and return its UTF32 code. If this is the
12941294
/// end of enclosing string/character sequence (i.e. the character is equal to
1295-
/// 'StopQuote'), this returns ~0U and leaves 'CurPtr' pointing to the terminal
1296-
/// quote. If this is a malformed character sequence, it emits a diagnostic
1297-
/// (when EmitDiagnostics is true) and returns ~1U.
1295+
/// 'StopQuote'), this returns ~0U and advances 'CurPtr' pointing to the end of
1296+
/// terminal quote. If this is a malformed character sequence, it emits a
1297+
/// diagnostic (when EmitDiagnostics is true) and returns ~1U.
12981298
///
12991299
/// character_escape ::= [\][\] | [\]t | [\]n | [\]r | [\]" | [\]' | [\]0
13001300
/// character_escape ::= unicode_character_escape
@@ -1305,6 +1305,7 @@ unsigned Lexer::lexCharacter(const char *&CurPtr, char StopQuote,
13051305

13061306
switch (*CurPtr++) {
13071307
default: {// Normal characters are part of the string.
1308+
// Normal characters are part of the string.
13081309
// If this is a "high" UTF-8 character, validate it.
13091310
if ((signed char)(CurPtr[-1]) >= 0) {
13101311
if (isPrintable(CurPtr[-1]) == 0)
@@ -1322,14 +1323,26 @@ unsigned Lexer::lexCharacter(const char *&CurPtr, char StopQuote,
13221323
}
13231324
case '"':
13241325
case '\'':
1325-
// If we found a closing quote character, we're done.
13261326
if (CurPtr[-1] == StopQuote) {
1327-
--CurPtr;
1327+
// Mutliline and custom escaping are only enabled for " quote.
1328+
if (LLVM_UNLIKELY(StopQuote != '"'))
1329+
return ~0U;
1330+
if (!IsMultilineString && !CustomDelimiterLen)
1331+
return ~0U;
1332+
1333+
DiagnosticEngine *D = EmitDiagnostics ? Diags : nullptr;
1334+
auto TmpPtr = CurPtr;
1335+
if (IsMultilineString && !advanceIfMultilineDelimiter(TmpPtr, D))
1336+
return '"';
1337+
if (CustomDelimiterLen &&
1338+
!delimiterMatches(CustomDelimiterLen, TmpPtr, D, /*IsClosing=*/true))
1339+
return '"';
1340+
CurPtr = TmpPtr;
13281341
return ~0U;
13291342
}
13301343
// Otherwise, this is just a character.
13311344
return CurPtr[-1];
1332-
1345+
13331346
case 0:
13341347
if (CurPtr-1 != BufferEnd) {
13351348
if (EmitDiagnostics)
@@ -1738,10 +1751,12 @@ static void validateMultilineIndents(const Token &Str,
17381751
/// string_literal ::= ["]["]["].*["]["]["] - approximately
17391752
/// string_literal ::= (#+)("")?".*"(\2\1) - "raw" strings
17401753
void Lexer::lexStringLiteral(unsigned CustomDelimiterLen) {
1741-
const char *TokStart = CurPtr-1;
1742-
assert((*TokStart == '"' || *TokStart == '\'') && "Unexpected start");
1754+
const char QuoteChar = CurPtr[-1];
1755+
const char *TokStart = CurPtr - 1 - CustomDelimiterLen;
1756+
17431757
// NOTE: We only allow single-quote string literals so we can emit useful
17441758
// diagnostics about changing them to double quotes.
1759+
assert((QuoteChar == '"' || QuoteChar == '\'') && "Unexpected start");
17451760

17461761
bool wasErroneous = false, IsMultilineString = false;
17471762

@@ -1774,23 +1789,26 @@ void Lexer::lexStringLiteral(unsigned CustomDelimiterLen) {
17741789
// String literals cannot have \n or \r in them (unless multiline).
17751790
if (((*CurPtr == '\r' || *CurPtr == '\n') && !IsMultilineString)
17761791
|| CurPtr == BufferEnd) {
1777-
TokStart -= CustomDelimiterLen;
17781792
diagnose(TokStart, diag::lex_unterminated_string);
17791793
return formToken(tok::unknown, TokStart);
17801794
}
17811795

1782-
unsigned CharValue = lexCharacter(CurPtr, *TokStart, true,
1796+
unsigned CharValue = lexCharacter(CurPtr, QuoteChar, true,
17831797
IsMultilineString, CustomDelimiterLen);
17841798
wasErroneous |= CharValue == ~1U;
17851799

17861800
// If this is the end of string, we are done. If it is a normal character
17871801
// or an already-diagnosed error, just munch it.
17881802
if (CharValue == ~0U) {
1789-
++CurPtr;
17901803

1791-
if (*TokStart == '\'') {
1792-
// Complain about single-quote string and suggest replacement with
1793-
// double-quoted equivalent.
1804+
if (QuoteChar == '\'') {
1805+
// Emit diagnostics for single-quote string and suggest replacement
1806+
// with double-quoted equivalent.
1807+
assert(
1808+
!IsMultilineString && CustomDelimiterLen == 0 &&
1809+
"Single quoted string cannot have custom delimitor, nor multiline");
1810+
assert(*TokStart == '\'' && CurPtr[-1] == '\'');
1811+
17941812
StringRef orig(TokStart, CurPtr - TokStart);
17951813
llvm::SmallString<32> replacement;
17961814
replacement += '"';
@@ -1823,15 +1841,11 @@ void Lexer::lexStringLiteral(unsigned CustomDelimiterLen) {
18231841
}
18241842

18251843
// Is this the end of multiline/custom-delimited string literal?
1826-
if ((!IsMultilineString || advanceIfMultilineDelimiter(CurPtr, Diags)) &&
1827-
delimiterMatches(CustomDelimiterLen, CurPtr, Diags, true)) {
1828-
TokStart -= CustomDelimiterLen;
1829-
if (wasErroneous)
1830-
return formToken(tok::unknown, TokStart);
1831-
1832-
return formStringLiteralToken(TokStart, IsMultilineString,
1833-
CustomDelimiterLen);
1834-
}
1844+
if (wasErroneous)
1845+
return formToken(tok::unknown, TokStart);
1846+
1847+
return formStringLiteralToken(TokStart, IsMultilineString,
1848+
CustomDelimiterLen);
18351849
}
18361850
}
18371851
}

0 commit comments

Comments
 (0)