19
19
#include " clang/Basic/Diagnostic.h"
20
20
#include " clang/Lex/LexDiagnostic.h"
21
21
#include " clang/Lex/Lexer.h"
22
+ #include " clang/Lex/Pragma.h"
22
23
#include " llvm/ADT/ScopeExit.h"
23
24
#include " llvm/ADT/SmallString.h"
24
25
#include " llvm/ADT/StringMap.h"
@@ -72,6 +73,8 @@ struct Scanner {
72
73
// Set the lexer to use 'tok::at' for '@', instead of 'tok::unknown'.
73
74
LangOpts.ObjC = true ;
74
75
LangOpts.LineComment = true ;
76
+ // FIXME: we do not enable C11 or C++11, so we are missing u/u8/U"" and
77
+ // R"()" literals.
75
78
return LangOpts;
76
79
}
77
80
@@ -91,6 +94,10 @@ struct Scanner {
91
94
void skipLine (const char *&First, const char *const End);
92
95
void skipDirective (StringRef Name, const char *&First, const char *const End);
93
96
97
+ // / Returns the spelling of a string literal or identifier after performing
98
+ // / any processing needed to handle \c clang::Token::NeedsCleaning.
99
+ StringRef cleanStringIfNeeded (const dependency_directives_scan::Token &Tok);
100
+
94
101
// / Lexes next token and if it is identifier returns its string, otherwise
95
102
// / it skips the current line and returns \p std::nullopt.
96
103
// /
@@ -112,13 +119,30 @@ struct Scanner {
112
119
const char *&First,
113
120
const char *const End);
114
121
122
+ // / Lexes next token and returns true iff it matches the kind \p K.
123
+ // / Otherwise it skips the current line and returns false.
124
+ // /
125
+ // / In any case (whatever the token kind) \p First and the \p Lexer will
126
+ // / advance beyond the token.
127
+ [[nodiscard]] bool isNextTokenOrSkipLine (tok::TokenKind K, const char *&First,
128
+ const char *const End);
129
+
130
+ // / Lexes next token and if it is string literal, returns its string.
131
+ // / Otherwise, it skips the current line and returns \p std::nullopt.
132
+ // /
133
+ // / In any case (whatever the token kind) \p First and the \p Lexer will
134
+ // / advance beyond the token.
135
+ [[nodiscard]] std::optional<StringRef>
136
+ tryLexStringLiteralOrSkipLine (const char *&First, const char *const End);
137
+
115
138
[[nodiscard]] bool scanImpl (const char *First, const char *const End);
116
139
[[nodiscard]] bool lexPPLine (const char *&First, const char *const End);
117
140
[[nodiscard]] bool lexAt (const char *&First, const char *const End);
118
141
[[nodiscard]] bool lexModule (const char *&First, const char *const End);
119
142
[[nodiscard]] bool lexDefine (const char *HashLoc, const char *&First,
120
143
const char *const End);
121
144
[[nodiscard]] bool lexPragma (const char *&First, const char *const End);
145
+ [[nodiscard]] bool lex_Pragma (const char *&First, const char *const End);
122
146
[[nodiscard]] bool lexEndif (const char *&First, const char *const End);
123
147
[[nodiscard]] bool lexDefault (DirectiveKind Kind, const char *&First,
124
148
const char *const End);
@@ -525,22 +549,18 @@ void Scanner::lexPPDirectiveBody(const char *&First, const char *const End) {
525
549
}
526
550
}
527
551
528
- [[nodiscard]] std::optional<StringRef>
529
- Scanner::tryLexIdentifierOrSkipLine (const char *&First, const char *const End) {
530
- const dependency_directives_scan::Token &Tok = lexToken (First, End);
531
- if (Tok.isNot (tok::raw_identifier)) {
532
- if (!Tok.is (tok::eod))
533
- skipLine (First, End);
534
- return std::nullopt;
535
- }
536
-
552
+ StringRef
553
+ Scanner::cleanStringIfNeeded (const dependency_directives_scan::Token &Tok) {
537
554
bool NeedsCleaning = Tok.Flags & clang::Token::NeedsCleaning;
538
555
if (LLVM_LIKELY (!NeedsCleaning))
539
556
return Input.slice (Tok.Offset , Tok.getEnd ());
540
557
541
558
SmallString<64 > Spelling;
542
559
Spelling.resize (Tok.Length );
543
560
561
+ // FIXME: C++11 raw string literals need special handling (see getSpellingSlow
562
+ // in the Lexer). Currently we cannot see them due to our LangOpts.
563
+
544
564
unsigned SpellingLength = 0 ;
545
565
const char *BufPtr = Input.begin () + Tok.Offset ;
546
566
const char *AfterIdent = Input.begin () + Tok.getEnd ();
@@ -555,6 +575,18 @@ Scanner::tryLexIdentifierOrSkipLine(const char *&First, const char *const End) {
555
575
.first ->first ();
556
576
}
557
577
578
+ std::optional<StringRef>
579
+ Scanner::tryLexIdentifierOrSkipLine (const char *&First, const char *const End) {
580
+ const dependency_directives_scan::Token &Tok = lexToken (First, End);
581
+ if (Tok.isNot (tok::raw_identifier)) {
582
+ if (!Tok.is (tok::eod))
583
+ skipLine (First, End);
584
+ return std::nullopt;
585
+ }
586
+
587
+ return cleanStringIfNeeded (Tok);
588
+ }
589
+
558
590
StringRef Scanner::lexIdentifier (const char *&First, const char *const End) {
559
591
std::optional<StringRef> Id = tryLexIdentifierOrSkipLine (First, End);
560
592
assert (Id && " expected identifier token" );
@@ -572,6 +604,28 @@ bool Scanner::isNextIdentifierOrSkipLine(StringRef Id, const char *&First,
572
604
return false ;
573
605
}
574
606
607
+ bool Scanner::isNextTokenOrSkipLine (tok::TokenKind K, const char *&First,
608
+ const char *const End) {
609
+ const dependency_directives_scan::Token &Tok = lexToken (First, End);
610
+ if (Tok.is (K))
611
+ return true ;
612
+ skipLine (First, End);
613
+ return false ;
614
+ }
615
+
616
+ std::optional<StringRef>
617
+ Scanner::tryLexStringLiteralOrSkipLine (const char *&First,
618
+ const char *const End) {
619
+ const dependency_directives_scan::Token &Tok = lexToken (First, End);
620
+ if (!tok::isStringLiteral (Tok.Kind )) {
621
+ if (!Tok.is (tok::eod))
622
+ skipLine (First, End);
623
+ return std::nullopt;
624
+ }
625
+
626
+ return cleanStringIfNeeded (Tok);
627
+ }
628
+
575
629
bool Scanner::lexAt (const char *&First, const char *const End) {
576
630
// Handle "@import".
577
631
@@ -629,6 +683,41 @@ bool Scanner::lexModule(const char *&First, const char *const End) {
629
683
return lexModuleDirectiveBody (Kind, First, End);
630
684
}
631
685
686
+ bool Scanner::lex_Pragma (const char *&First, const char *const End) {
687
+ if (!isNextTokenOrSkipLine (tok::l_paren, First, End))
688
+ return false ;
689
+
690
+ std::optional<StringRef> Str = tryLexStringLiteralOrSkipLine (First, End);
691
+
692
+ if (!Str || !isNextTokenOrSkipLine (tok::r_paren, First, End))
693
+ return false ;
694
+
695
+ SmallString<64 > Buffer (*Str);
696
+ prepare_PragmaString (Buffer);
697
+
698
+ // Use a new scanner instance since the tokens will be inside the allocated
699
+ // string. We should already have captured all the relevant tokens in the
700
+ // current scanner.
701
+ SmallVector<dependency_directives_scan::Token> DiscardTokens;
702
+ const char *Begin = Buffer.c_str ();
703
+ Scanner PragmaScanner{StringRef (Begin, Buffer.size ()), DiscardTokens, Diags,
704
+ InputSourceLoc};
705
+
706
+ PragmaScanner.TheLexer .setParsingPreprocessorDirective (true );
707
+ if (PragmaScanner.lexPragma (Begin, Buffer.end ()))
708
+ return true ;
709
+
710
+ DirectiveKind K = PragmaScanner.topDirective ();
711
+ if (K == pp_none) {
712
+ skipLine (First, End);
713
+ return false ;
714
+ }
715
+
716
+ assert (Begin == Buffer.end ());
717
+ pushDirective (K);
718
+ return false ;
719
+ }
720
+
632
721
bool Scanner::lexPragma (const char *&First, const char *const End) {
633
722
std::optional<StringRef> FoundId = tryLexIdentifierOrSkipLine (First, End);
634
723
if (!FoundId)
@@ -713,6 +802,7 @@ static bool isStartOfRelevantLine(char First) {
713
802
case ' i' :
714
803
case ' e' :
715
804
case ' m' :
805
+ case ' _' :
716
806
return true ;
717
807
}
718
808
return false ;
@@ -749,6 +839,12 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) {
749
839
if (*First == ' i' || *First == ' e' || *First == ' m' )
750
840
return lexModule (First, End);
751
841
842
+ if (*First == ' _' ) {
843
+ if (isNextIdentifierOrSkipLine (" _Pragma" , First, End))
844
+ return lex_Pragma (First, End);
845
+ return false ;
846
+ }
847
+
752
848
// Handle preprocessing directives.
753
849
754
850
TheLexer.setParsingPreprocessorDirective (true );
0 commit comments