[Parse] add InLexTrivia flag to lexInvalidCharacters

omochi · omochi · commit ca19fe4eeb43 · 2018-03-05T19:23:29.000+09:00
diff --git a/include/swift/Parse/Lexer.h b/include/swift/Parse/Lexer.h
@@ -525,7 +525,8 @@ class Lexer {
   void lexEscapedIdentifier();
 
   void tryLexEditorPlaceholder();
-  const char *findEndOfCurlyQuoteStringLiteral(const char*);
+  const char *findEndOfCurlyQuoteStringLiteral(const char *,
+                                               bool EmitDiagnostics);
 
   /// Try to lex conflict markers by checking for the presence of the start and
   /// end of the marker in diff3 or Perforce style respectively.
@@ -534,7 +535,7 @@ class Lexer {
   NulCharacterKind getNulCharacterKind(const char *Ptr) const;
 
   /// Lex invalid characters and return which it should be tokenized.
-  bool lexInvalidCharacters(const char *&Ptr);
+  bool lexInvalidCharacters(const char *&Ptr, bool InLexTrivia);
 };
   
 /// Given an ordered token \param Array , get the iterator pointing to the first
diff --git a/lib/Parse/Lexer.cpp b/lib/Parse/Lexer.cpp
@@ -1739,8 +1739,9 @@ void Lexer::lexStringLiteral() {
 /// string literal, diagnose the problem and return a pointer to the end of the
 /// entire string literal.  This helps us avoid parsing the body of the string
 /// as program tokens, which will only lead to massive confusion.
-const char *Lexer::findEndOfCurlyQuoteStringLiteral(const char *Body) {
-  
+const char *Lexer::findEndOfCurlyQuoteStringLiteral(const char *Body,
+                                                    bool EmitDiagnostics) {
+
   while (true) {
     // Don't bother with string interpolations.
     if (*Body == '\\' && *(Body + 1) == '(')
@@ -1752,7 +1753,7 @@ const char *Lexer::findEndOfCurlyQuoteStringLiteral(const char *Body) {
 
     // Get the next character.
     const char *CharStart = Body;
-    unsigned CharValue = lexCharacter(Body, '\0', false);
+    unsigned CharValue = lexCharacter(Body, '\0', /*EmitDiagnostics=*/false);
     // If the character was incorrectly encoded, give up.
     if (CharValue == ~1U) return nullptr;
     
@@ -1764,8 +1765,11 @@ const char *Lexer::findEndOfCurlyQuoteStringLiteral(const char *Body) {
     // If we found an ending curly quote (common since this thing started with
     // an opening curly quote) diagnose it with a fixit and then return.
     if (CharValue == 0x0000201D) {
-      diagnose(CharStart, diag::lex_invalid_curly_quote)
-        .fixItReplaceChars(getSourceLoc(CharStart), getSourceLoc(Body), "\"");
+      if (EmitDiagnostics) {
+        diagnose(CharStart, diag::lex_invalid_curly_quote)
+            .fixItReplaceChars(getSourceLoc(CharStart), getSourceLoc(Body),
+                               "\"");
+      }
       return Body;
     }
     
@@ -1875,15 +1879,19 @@ Lexer::NulCharacterKind Lexer::getNulCharacterKind(const char *Ptr) const {
   return NulCharacterKind::Embedded;
 }
 
-bool Lexer::lexInvalidCharacters(const char *&Ptr) {
+bool Lexer::lexInvalidCharacters(const char *&Ptr, bool InLexTrivia) {
+  // in lexTrivia, diagnose only when its should not be tokenize.
+
   assert(Ptr != nullptr);
 
   const char *const StartPtr = Ptr;
 
   if (advanceIfValidContinuationOfIdentifier(Ptr, BufferEnd)) {
     // If this is a valid identifier continuation, but not a valid identifier
     // start, attempt to recover by eating more continuation characters.
-    diagnose(StartPtr, diag::lex_invalid_identifier_start_character);
+    if (!InLexTrivia) {
+      diagnose(StartPtr, diag::lex_invalid_identifier_start_character);
+    }
     while (advanceIfValidContinuationOfIdentifier(Ptr, BufferEnd))
       ;
     return true;
@@ -1900,18 +1908,21 @@ bool Lexer::lexInvalidCharacters(const char *&Ptr) {
 
   if (codepoint == 0x0000201D) {
     // If this is an end curly quote, just diagnose it with a fixit hint.
-    diagnose(CurPtr - 1, diag::lex_invalid_curly_quote)
-        .fixItReplaceChars(getSourceLoc(StartPtr), getSourceLoc(Ptr), "\"");
+    if (!InLexTrivia) {
+      diagnose(CurPtr - 1, diag::lex_invalid_curly_quote)
+          .fixItReplaceChars(getSourceLoc(StartPtr), getSourceLoc(Ptr), "\"");
+    }
     return true;
   }
 
   if (codepoint == 0x0000201C) {
     const char *const LeftQuoteEndPtr = Ptr;
+    bool EmitDiagnostics = !InLexTrivia;
 
     // If this is a start curly quote, do a fuzzy match of a string literal
     // to improve recovery.
     if (const char *const RightQuoteEndPtr =
-            findEndOfCurlyQuoteStringLiteral(Ptr)) {
+            findEndOfCurlyQuoteStringLiteral(Ptr, EmitDiagnostics)) {
       Ptr = RightQuoteEndPtr;
     }
 
@@ -1920,10 +1931,11 @@ bool Lexer::lexInvalidCharacters(const char *&Ptr) {
     // This, in turn, works better with our error recovery because we won't
     // diagnose an end curly quote in the middle of a straight quoted
     // literal.
-    diagnose(StartPtr, diag::lex_invalid_curly_quote)
-        .fixItReplaceChars(getSourceLoc(StartPtr),
-                           getSourceLoc(LeftQuoteEndPtr), "\"");
-
+    if (EmitDiagnostics) {
+      diagnose(StartPtr, diag::lex_invalid_curly_quote)
+          .fixItReplaceChars(getSourceLoc(StartPtr),
+                             getSourceLoc(LeftQuoteEndPtr), "\"");
+    }
     return true;
   }
 
@@ -2174,7 +2186,7 @@ void Lexer::lexImpl() {
     if (advanceIfValidStartOfOperator(tmp, BufferEnd))
       return lexOperatorIdentifier();
 
-    bool ShouldTokenize = lexInvalidCharacters(tmp);
+    bool ShouldTokenize = lexInvalidCharacters(tmp, /*InLexTrivia=*/false);
     CurPtr = tmp;
 
     if (ShouldTokenize) {