Skip to content

Commit 247fa04

Browse files
committed
[clang] Add a new annotation token: annot_repl_input_end
This patch is the first part of the below RFC: https://discourse.llvm.org/t/rfc-handle-execution-results-in-clang-repl/68493 It adds an annotation token which will replace the original EOF token when we are in the incremental C++ mode. In addition, when we're parsing an ExprStmt and there's a missing semicolon after the expression, we set a marker in the annotation token and continue parsing. Eventually, we propogate this info in ParseTopLevelStmtDecl and are able to mark this Decl as something we want to do value printing. Below is a example: clang-repl> int x = 42; clang-repl> x // `x` is a TopLevelStmtDecl and without a semicolon, we should set // it's IsSemiMissing bit so we can do something interesting in // ASTConsumer::HandleTopLevelDecl. The idea about annotation toke is proposed by Richard Smith, thanks! Signed-off-by: Jun Zhang <[email protected]> Differential Revision: https://reviews.llvm.org/D148997
1 parent 8a5450d commit 247fa04

File tree

10 files changed

+62
-20
lines changed

10 files changed

+62
-20
lines changed

clang/include/clang/AST/Decl.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4324,6 +4324,7 @@ class TopLevelStmtDecl : public Decl {
43244324
friend class ASTDeclWriter;
43254325

43264326
Stmt *Statement = nullptr;
4327+
bool IsSemiMissing = false;
43274328

43284329
TopLevelStmtDecl(DeclContext *DC, SourceLocation L, Stmt *S)
43294330
: Decl(TopLevelStmt, DC, L), Statement(S) {}
@@ -4337,6 +4338,12 @@ class TopLevelStmtDecl : public Decl {
43374338
SourceRange getSourceRange() const override LLVM_READONLY;
43384339
Stmt *getStmt() { return Statement; }
43394340
const Stmt *getStmt() const { return Statement; }
4341+
void setStmt(Stmt *S) {
4342+
assert(IsSemiMissing && "Operation supported for printing values only!");
4343+
Statement = S;
4344+
}
4345+
bool isSemiMissing() const { return IsSemiMissing; }
4346+
void setSemiMissing(bool Missing = true) { IsSemiMissing = Missing; }
43404347

43414348
static bool classof(const Decl *D) { return classofKind(D->getKind()); }
43424349
static bool classofKind(Kind K) { return K == TopLevelStmt; }

clang/include/clang/Basic/TokenKinds.def

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -942,6 +942,9 @@ ANNOTATION(module_end)
942942
// into the name of a header unit.
943943
ANNOTATION(header_unit)
944944

945+
// Annotation for end of input in clang-repl.
946+
ANNOTATION(repl_input_end)
947+
945948
#undef PRAGMA_ANNOTATION
946949
#undef ANNOTATION
947950
#undef TESTING_KEYWORD

clang/include/clang/Parse/Parser.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "clang/Basic/OpenMPKinds.h"
1919
#include "clang/Basic/OperatorPrecedence.h"
2020
#include "clang/Basic/Specifiers.h"
21+
#include "clang/Basic/TokenKinds.h"
2122
#include "clang/Lex/CodeCompletionHandler.h"
2223
#include "clang/Lex/Preprocessor.h"
2324
#include "clang/Sema/DeclSpec.h"
@@ -692,7 +693,8 @@ class Parser : public CodeCompletionHandler {
692693
bool isEofOrEom() {
693694
tok::TokenKind Kind = Tok.getKind();
694695
return Kind == tok::eof || Kind == tok::annot_module_begin ||
695-
Kind == tok::annot_module_end || Kind == tok::annot_module_include;
696+
Kind == tok::annot_module_end || Kind == tok::annot_module_include ||
697+
Kind == tok::annot_repl_input_end;
696698
}
697699

698700
/// Checks if the \p Level is valid for use in a fold expression.

clang/lib/Frontend/PrintPreprocessedOutput.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -663,7 +663,8 @@ void PrintPPOutputPPCallbacks::HandleWhitespaceBeforeTok(const Token &Tok,
663663
// them.
664664
if (Tok.is(tok::eof) ||
665665
(Tok.isAnnotation() && !Tok.is(tok::annot_header_unit) &&
666-
!Tok.is(tok::annot_module_begin) && !Tok.is(tok::annot_module_end)))
666+
!Tok.is(tok::annot_module_begin) && !Tok.is(tok::annot_module_end) &&
667+
!Tok.is(tok::annot_repl_input_end)))
667668
return;
668669

669670
// EmittedDirectiveOnThisLine takes priority over RequireSameLine.
@@ -819,6 +820,9 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
819820
// -traditional-cpp the lexer keeps /all/ whitespace, including comments.
820821
PP.Lex(Tok);
821822
continue;
823+
} else if (Tok.is(tok::annot_repl_input_end)) {
824+
PP.Lex(Tok);
825+
continue;
822826
} else if (Tok.is(tok::eod)) {
823827
// Don't print end of directive tokens, since they are typically newlines
824828
// that mess up our line tracking. These come from unknown pre-processor

clang/lib/Interpreter/IncrementalParser.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -158,8 +158,8 @@ IncrementalParser::ParseOrWrapTopLevelDecl() {
158158
LastPTU.TUPart = C.getTranslationUnitDecl();
159159

160160
// Skip previous eof due to last incremental input.
161-
if (P->getCurToken().is(tok::eof)) {
162-
P->ConsumeToken();
161+
if (P->getCurToken().is(tok::annot_repl_input_end)) {
162+
P->ConsumeAnyToken();
163163
// FIXME: Clang does not call ExitScope on finalizing the regular TU, we
164164
// might want to do that around HandleEndOfTranslationUnit.
165165
P->ExitScope();
@@ -259,14 +259,14 @@ IncrementalParser::Parse(llvm::StringRef input) {
259259
Token Tok;
260260
do {
261261
PP.Lex(Tok);
262-
} while (Tok.isNot(tok::eof));
262+
} while (Tok.isNot(tok::annot_repl_input_end));
263+
} else {
264+
Token AssertTok;
265+
PP.Lex(AssertTok);
266+
assert(AssertTok.is(tok::annot_repl_input_end) &&
267+
"Lexer must be EOF when starting incremental parse!");
263268
}
264269

265-
Token AssertTok;
266-
PP.Lex(AssertTok);
267-
assert(AssertTok.is(tok::eof) &&
268-
"Lexer must be EOF when starting incremental parse!");
269-
270270
if (CodeGenerator *CG = getCodeGen(Act.get())) {
271271
std::unique_ptr<llvm::Module> M(CG->ReleaseModule());
272272
CG->StartModule("incr_module_" + std::to_string(PTUs.size()),

clang/lib/Lex/PPLexerChange.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -535,13 +535,19 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) {
535535
return LeavingSubmodule;
536536
}
537537
}
538-
539538
// If this is the end of the main file, form an EOF token.
540539
assert(CurLexer && "Got EOF but no current lexer set!");
541540
const char *EndPos = getCurLexerEndPos();
542541
Result.startToken();
543542
CurLexer->BufferPtr = EndPos;
544-
CurLexer->FormTokenWithChars(Result, EndPos, tok::eof);
543+
544+
if (isIncrementalProcessingEnabled()) {
545+
CurLexer->FormTokenWithChars(Result, EndPos, tok::annot_repl_input_end);
546+
Result.setAnnotationEndLoc(Result.getLocation());
547+
Result.setAnnotationValue(nullptr);
548+
} else {
549+
CurLexer->FormTokenWithChars(Result, EndPos, tok::eof);
550+
}
545551

546552
if (isCodeCompletionEnabled()) {
547553
// Inserting the code-completion point increases the source buffer by 1,

clang/lib/Parse/ParseCXXInlineMethods.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -838,6 +838,7 @@ bool Parser::ConsumeAndStoreUntil(tok::TokenKind T1, tok::TokenKind T2,
838838
case tok::annot_module_begin:
839839
case tok::annot_module_end:
840840
case tok::annot_module_include:
841+
case tok::annot_repl_input_end:
841842
// Ran out of tokens.
842843
return false;
843844

@@ -1244,6 +1245,7 @@ bool Parser::ConsumeAndStoreInitializer(CachedTokens &Toks,
12441245
case tok::annot_module_begin:
12451246
case tok::annot_module_end:
12461247
case tok::annot_module_include:
1248+
case tok::annot_repl_input_end:
12471249
// Ran out of tokens.
12481250
return false;
12491251

clang/lib/Parse/ParseDecl.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2070,6 +2070,7 @@ void Parser::SkipMalformedDecl() {
20702070
case tok::annot_module_begin:
20712071
case tok::annot_module_end:
20722072
case tok::annot_module_include:
2073+
case tok::annot_repl_input_end:
20732074
return;
20742075

20752076
default:
@@ -5456,6 +5457,13 @@ Parser::DeclGroupPtrTy Parser::ParseTopLevelStmtDecl() {
54565457

54575458
SmallVector<Decl *, 2> DeclsInGroup;
54585459
DeclsInGroup.push_back(Actions.ActOnTopLevelStmtDecl(R.get()));
5460+
5461+
if (Tok.is(tok::annot_repl_input_end) &&
5462+
Tok.getAnnotationValue() != nullptr) {
5463+
ConsumeAnnotationToken();
5464+
cast<TopLevelStmtDecl>(DeclsInGroup.back())->setSemiMissing();
5465+
}
5466+
54595467
// Currently happens for things like -fms-extensions and use `__if_exists`.
54605468
for (Stmt *S : Stmts)
54615469
DeclsInGroup.push_back(Actions.ActOnTopLevelStmtDecl(S));

clang/lib/Parse/ParseStmt.cpp

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -544,9 +544,22 @@ StmtResult Parser::ParseExprStatement(ParsedStmtContext StmtCtx) {
544544
return ParseCaseStatement(StmtCtx, /*MissingCase=*/true, Expr);
545545
}
546546

547-
// Otherwise, eat the semicolon.
548-
ExpectAndConsumeSemi(diag::err_expected_semi_after_expr);
549-
return handleExprStmt(Expr, StmtCtx);
547+
Token *CurTok = nullptr;
548+
// If the semicolon is missing at the end of REPL input, consider if
549+
// we want to do value printing. Note this is only enabled in C++ mode
550+
// since part of the implementation requires C++ language features.
551+
// Note we shouldn't eat the token since the callback needs it.
552+
if (Tok.is(tok::annot_repl_input_end) && Actions.getLangOpts().CPlusPlus)
553+
CurTok = &Tok;
554+
else
555+
// Otherwise, eat the semicolon.
556+
ExpectAndConsumeSemi(diag::err_expected_semi_after_expr);
557+
558+
StmtResult R = handleExprStmt(Expr, StmtCtx);
559+
if (CurTok && !R.isInvalid())
560+
CurTok->setAnnotationValue(R.get());
561+
562+
return R;
550563
}
551564

552565
/// ParseSEHTryBlockCommon

clang/lib/Parse/Parser.cpp

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,7 @@ bool Parser::SkipUntil(ArrayRef<tok::TokenKind> Toks, SkipUntilFlags Flags) {
320320
case tok::annot_module_begin:
321321
case tok::annot_module_end:
322322
case tok::annot_module_include:
323+
case tok::annot_repl_input_end:
323324
// Stop before we change submodules. They generally indicate a "good"
324325
// place to pick up parsing again (except in the special case where
325326
// we're trying to skip to EOF).
@@ -614,11 +615,6 @@ bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result,
614615
Sema::ModuleImportState &ImportState) {
615616
DestroyTemplateIdAnnotationsRAIIObj CleanupRAII(*this);
616617

617-
// Skip over the EOF token, flagging end of previous input for incremental
618-
// processing
619-
if (PP.isIncrementalProcessingEnabled() && Tok.is(tok::eof))
620-
ConsumeToken();
621-
622618
Result = nullptr;
623619
switch (Tok.getKind()) {
624620
case tok::annot_pragma_unused:
@@ -697,6 +693,7 @@ bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result,
697693
return false;
698694

699695
case tok::eof:
696+
case tok::annot_repl_input_end:
700697
// Check whether -fmax-tokens= was reached.
701698
if (PP.getMaxTokens() != 0 && PP.getTokenCount() > PP.getMaxTokens()) {
702699
PP.Diag(Tok.getLocation(), diag::warn_max_tokens_total)

0 commit comments

Comments
 (0)