Skip to content

Commit ca19fe4

Browse files
committed
[Parse] add InLexTrivia flag to lexInvalidCharacters
1 parent 4ee8506 commit ca19fe4

File tree

2 files changed

+30
-17
lines changed

2 files changed

+30
-17
lines changed

include/swift/Parse/Lexer.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -525,7 +525,8 @@ class Lexer {
525525
void lexEscapedIdentifier();
526526

527527
void tryLexEditorPlaceholder();
528-
const char *findEndOfCurlyQuoteStringLiteral(const char*);
528+
const char *findEndOfCurlyQuoteStringLiteral(const char *,
529+
bool EmitDiagnostics);
529530

530531
/// Try to lex conflict markers by checking for the presence of the start and
531532
/// end of the marker in diff3 or Perforce style respectively.
@@ -534,7 +535,7 @@ class Lexer {
534535
NulCharacterKind getNulCharacterKind(const char *Ptr) const;
535536

536537
/// Lex invalid characters and return which it should be tokenized.
537-
bool lexInvalidCharacters(const char *&Ptr);
538+
bool lexInvalidCharacters(const char *&Ptr, bool InLexTrivia);
538539
};
539540

540541
/// Given an ordered token \param Array , get the iterator pointing to the first

lib/Parse/Lexer.cpp

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1739,8 +1739,9 @@ void Lexer::lexStringLiteral() {
17391739
/// string literal, diagnose the problem and return a pointer to the end of the
17401740
/// entire string literal. This helps us avoid parsing the body of the string
17411741
/// as program tokens, which will only lead to massive confusion.
1742-
const char *Lexer::findEndOfCurlyQuoteStringLiteral(const char *Body) {
1743-
1742+
const char *Lexer::findEndOfCurlyQuoteStringLiteral(const char *Body,
1743+
bool EmitDiagnostics) {
1744+
17441745
while (true) {
17451746
// Don't bother with string interpolations.
17461747
if (*Body == '\\' && *(Body + 1) == '(')
@@ -1752,7 +1753,7 @@ const char *Lexer::findEndOfCurlyQuoteStringLiteral(const char *Body) {
17521753

17531754
// Get the next character.
17541755
const char *CharStart = Body;
1755-
unsigned CharValue = lexCharacter(Body, '\0', false);
1756+
unsigned CharValue = lexCharacter(Body, '\0', /*EmitDiagnostics=*/false);
17561757
// If the character was incorrectly encoded, give up.
17571758
if (CharValue == ~1U) return nullptr;
17581759

@@ -1764,8 +1765,11 @@ const char *Lexer::findEndOfCurlyQuoteStringLiteral(const char *Body) {
17641765
// If we found an ending curly quote (common since this thing started with
17651766
// an opening curly quote) diagnose it with a fixit and then return.
17661767
if (CharValue == 0x0000201D) {
1767-
diagnose(CharStart, diag::lex_invalid_curly_quote)
1768-
.fixItReplaceChars(getSourceLoc(CharStart), getSourceLoc(Body), "\"");
1768+
if (EmitDiagnostics) {
1769+
diagnose(CharStart, diag::lex_invalid_curly_quote)
1770+
.fixItReplaceChars(getSourceLoc(CharStart), getSourceLoc(Body),
1771+
"\"");
1772+
}
17691773
return Body;
17701774
}
17711775

@@ -1875,15 +1879,19 @@ Lexer::NulCharacterKind Lexer::getNulCharacterKind(const char *Ptr) const {
18751879
return NulCharacterKind::Embedded;
18761880
}
18771881

1878-
bool Lexer::lexInvalidCharacters(const char *&Ptr) {
1882+
bool Lexer::lexInvalidCharacters(const char *&Ptr, bool InLexTrivia) {
1883+
// in lexTrivia, diagnose only when its should not be tokenize.
1884+
18791885
assert(Ptr != nullptr);
18801886

18811887
const char *const StartPtr = Ptr;
18821888

18831889
if (advanceIfValidContinuationOfIdentifier(Ptr, BufferEnd)) {
18841890
// If this is a valid identifier continuation, but not a valid identifier
18851891
// start, attempt to recover by eating more continuation characters.
1886-
diagnose(StartPtr, diag::lex_invalid_identifier_start_character);
1892+
if (!InLexTrivia) {
1893+
diagnose(StartPtr, diag::lex_invalid_identifier_start_character);
1894+
}
18871895
while (advanceIfValidContinuationOfIdentifier(Ptr, BufferEnd))
18881896
;
18891897
return true;
@@ -1900,18 +1908,21 @@ bool Lexer::lexInvalidCharacters(const char *&Ptr) {
19001908

19011909
if (codepoint == 0x0000201D) {
19021910
// If this is an end curly quote, just diagnose it with a fixit hint.
1903-
diagnose(CurPtr - 1, diag::lex_invalid_curly_quote)
1904-
.fixItReplaceChars(getSourceLoc(StartPtr), getSourceLoc(Ptr), "\"");
1911+
if (!InLexTrivia) {
1912+
diagnose(CurPtr - 1, diag::lex_invalid_curly_quote)
1913+
.fixItReplaceChars(getSourceLoc(StartPtr), getSourceLoc(Ptr), "\"");
1914+
}
19051915
return true;
19061916
}
19071917

19081918
if (codepoint == 0x0000201C) {
19091919
const char *const LeftQuoteEndPtr = Ptr;
1920+
bool EmitDiagnostics = !InLexTrivia;
19101921

19111922
// If this is a start curly quote, do a fuzzy match of a string literal
19121923
// to improve recovery.
19131924
if (const char *const RightQuoteEndPtr =
1914-
findEndOfCurlyQuoteStringLiteral(Ptr)) {
1925+
findEndOfCurlyQuoteStringLiteral(Ptr, EmitDiagnostics)) {
19151926
Ptr = RightQuoteEndPtr;
19161927
}
19171928

@@ -1920,10 +1931,11 @@ bool Lexer::lexInvalidCharacters(const char *&Ptr) {
19201931
// This, in turn, works better with our error recovery because we won't
19211932
// diagnose an end curly quote in the middle of a straight quoted
19221933
// literal.
1923-
diagnose(StartPtr, diag::lex_invalid_curly_quote)
1924-
.fixItReplaceChars(getSourceLoc(StartPtr),
1925-
getSourceLoc(LeftQuoteEndPtr), "\"");
1926-
1934+
if (EmitDiagnostics) {
1935+
diagnose(StartPtr, diag::lex_invalid_curly_quote)
1936+
.fixItReplaceChars(getSourceLoc(StartPtr),
1937+
getSourceLoc(LeftQuoteEndPtr), "\"");
1938+
}
19271939
return true;
19281940
}
19291941

@@ -2174,7 +2186,7 @@ void Lexer::lexImpl() {
21742186
if (advanceIfValidStartOfOperator(tmp, BufferEnd))
21752187
return lexOperatorIdentifier();
21762188

2177-
bool ShouldTokenize = lexInvalidCharacters(tmp);
2189+
bool ShouldTokenize = lexInvalidCharacters(tmp, /*InLexTrivia=*/false);
21782190
CurPtr = tmp;
21792191

21802192
if (ShouldTokenize) {

0 commit comments

Comments
 (0)