Skip to content

Commit b4192d8

Browse files
omochinkcsgexi
authored andcommitted
[Parse] EmitDiagnosticsIfToken in lexUnknown
It is needed from lexTrivia update future.
1 parent e6f42fc commit b4192d8

File tree

2 files changed

+28
-16
lines changed

2 files changed

+28
-16
lines changed

include/swift/Parse/Lexer.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -525,14 +525,15 @@ class Lexer {
525525
void lexEscapedIdentifier();
526526

527527
void tryLexEditorPlaceholder();
528-
const char *findEndOfCurlyQuoteStringLiteral(const char*);
528+
const char *findEndOfCurlyQuoteStringLiteral(const char *,
529+
bool EmitDiagnostics);
529530

530531
/// Try to lex conflict markers by checking for the presence of the start and
531532
/// end of the marker in diff3 or Perforce style respectively.
532533
bool tryLexConflictMarker(bool EatNewline);
533534

534535
/// Returns it should be tokenize.
535-
bool lexUnknown();
536+
bool lexUnknown(bool EmitDiagnosticsIfToken);
536537

537538
NulCharacterKind getNulCharacterKind(const char *Ptr) const;
538539
};

lib/Parse/Lexer.cpp

Lines changed: 25 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1739,8 +1739,9 @@ void Lexer::lexStringLiteral() {
17391739
/// string literal, diagnose the problem and return a pointer to the end of the
17401740
/// entire string literal. This helps us avoid parsing the body of the string
17411741
/// as program tokens, which will only lead to massive confusion.
1742-
const char *Lexer::findEndOfCurlyQuoteStringLiteral(const char *Body) {
1743-
1742+
const char *Lexer::findEndOfCurlyQuoteStringLiteral(const char *Body,
1743+
bool EmitDiagnostics) {
1744+
17441745
while (true) {
17451746
// Don't bother with string interpolations.
17461747
if (*Body == '\\' && *(Body + 1) == '(')
@@ -1752,7 +1753,7 @@ const char *Lexer::findEndOfCurlyQuoteStringLiteral(const char *Body) {
17521753

17531754
// Get the next character.
17541755
const char *CharStart = Body;
1755-
unsigned CharValue = lexCharacter(Body, '\0', false);
1756+
unsigned CharValue = lexCharacter(Body, '\0', /*EmitDiagnostics=*/false);
17561757
// If the character was incorrectly encoded, give up.
17571758
if (CharValue == ~1U) return nullptr;
17581759

@@ -1764,8 +1765,11 @@ const char *Lexer::findEndOfCurlyQuoteStringLiteral(const char *Body) {
17641765
// If we found an ending curly quote (common since this thing started with
17651766
// an opening curly quote) diagnose it with a fixit and then return.
17661767
if (CharValue == 0x0000201D) {
1767-
diagnose(CharStart, diag::lex_invalid_curly_quote)
1768-
.fixItReplaceChars(getSourceLoc(CharStart), getSourceLoc(Body), "\"");
1768+
if (EmitDiagnostics) {
1769+
diagnose(CharStart, diag::lex_invalid_curly_quote)
1770+
.fixItReplaceChars(getSourceLoc(CharStart), getSourceLoc(Body),
1771+
"\"");
1772+
}
17691773
return Body;
17701774
}
17711775

@@ -1864,13 +1868,15 @@ bool Lexer::tryLexConflictMarker(bool EatNewline) {
18641868
return false;
18651869
}
18661870

1867-
bool Lexer::lexUnknown() {
1871+
bool Lexer::lexUnknown(bool EmitDiagnosticsIfToken) {
18681872
const char *Tmp = CurPtr - 1;
18691873

18701874
if (advanceIfValidContinuationOfIdentifier(Tmp, BufferEnd)) {
18711875
// If this is a valid identifier continuation, but not a valid identifier
18721876
// start, attempt to recover by eating more continuation characters.
1873-
diagnose(CurPtr - 1, diag::lex_invalid_identifier_start_character);
1877+
if (EmitDiagnosticsIfToken) {
1878+
diagnose(CurPtr - 1, diag::lex_invalid_identifier_start_character);
1879+
}
18741880
while (advanceIfValidContinuationOfIdentifier(Tmp, BufferEnd))
18751881
;
18761882
CurPtr = Tmp;
@@ -1886,25 +1892,30 @@ bool Lexer::lexUnknown() {
18861892
return false; // Skip presumed whitespace.
18871893
} else if (Codepoint == 0x0000201D) {
18881894
// If this is an end curly quote, just diagnose it with a fixit hint.
1889-
diagnose(CurPtr - 1, diag::lex_invalid_curly_quote)
1890-
.fixItReplaceChars(getSourceLoc(CurPtr - 1), getSourceLoc(Tmp), "\"");
1895+
if (EmitDiagnosticsIfToken) {
1896+
diagnose(CurPtr - 1, diag::lex_invalid_curly_quote)
1897+
.fixItReplaceChars(getSourceLoc(CurPtr - 1), getSourceLoc(Tmp), "\"");
1898+
}
18911899
CurPtr = Tmp;
18921900
return true;
18931901
} else if (Codepoint == 0x0000201C) {
18941902
auto EndPtr = Tmp;
18951903
// If this is a start curly quote, do a fuzzy match of a string literal
18961904
// to improve recovery.
1897-
if (auto Tmp2 = findEndOfCurlyQuoteStringLiteral(Tmp))
1905+
if (auto Tmp2 =
1906+
findEndOfCurlyQuoteStringLiteral(Tmp, EmitDiagnosticsIfToken))
18981907
Tmp = Tmp2;
18991908

19001909
// Note, we intentionally diagnose the end quote before the start quote,
19011910
// so that the IDE suggests fixing the end quote before the start quote.
19021911
// This, in turn, works better with our error recovery because we won't
19031912
// diagnose an end curly quote in the middle of a straight quoted
19041913
// literal.
1905-
diagnose(CurPtr - 1, diag::lex_invalid_curly_quote)
1906-
.fixItReplaceChars(getSourceLoc(CurPtr - 1), getSourceLoc(EndPtr),
1907-
"\"");
1914+
if (EmitDiagnosticsIfToken) {
1915+
diagnose(CurPtr - 1, diag::lex_invalid_curly_quote)
1916+
.fixItReplaceChars(getSourceLoc(CurPtr - 1), getSourceLoc(EndPtr),
1917+
"\"");
1918+
}
19081919
CurPtr = Tmp;
19091920
return true;
19101921
}
@@ -2167,7 +2178,7 @@ void Lexer::lexImpl() {
21672178
if (advanceIfValidStartOfOperator(Tmp, BufferEnd))
21682179
return lexOperatorIdentifier();
21692180

2170-
bool ShouldTokenize = lexUnknown();
2181+
bool ShouldTokenize = lexUnknown(/*EmitDiagnosticsIfToken=*/true);
21712182
if (ShouldTokenize) {
21722183
return formToken(tok::unknown, TokStart);
21732184
}

0 commit comments

Comments
 (0)