Skip to content

[clang][Preprocessor] Add peekNextPPToken, makes look ahead next token without side-effects #143898

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions clang/include/clang/Lex/Lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ class Lexer : public PreprocessorLexer {
//===--------------------------------------------------------------------===//
// Context that changes as the file is lexed.
// NOTE: any state that mutates when in raw mode must have save/restore code
// in Lexer::isNextPPTokenLParen.
// in Lexer::peekNextPPToken.

// BufferPtr - Current pointer into the buffer. This is the next character
// to be lexed.
Expand Down Expand Up @@ -645,10 +645,10 @@ class Lexer : public PreprocessorLexer {
BufferPtr = TokEnd;
}

/// isNextPPTokenLParen - Return 1 if the next unexpanded token will return a
/// tok::l_paren token, 0 if it is something else and 2 if there are no more
/// tokens in the buffer controlled by this lexer.
unsigned isNextPPTokenLParen();
/// peekNextPPToken - Return std::nullopt if there are no more tokens in the
/// buffer controlled by this lexer, otherwise return the next unexpanded
/// token.
std::optional<Token> peekNextPPToken();

//===--------------------------------------------------------------------===//
// Lexer character reading interfaces.
Expand Down
37 changes: 34 additions & 3 deletions clang/include/clang/Lex/Preprocessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -2302,10 +2302,41 @@ class Preprocessor {
}
}

/// Determine whether the next preprocessor token to be
/// lexed is a '('. If so, consume the token and return true, if not, this
/// Check whether the next pp-token is one of the specificed token kind. this
/// method should have no observable side-effect on the lexed tokens.
bool isNextPPTokenLParen();
template <tok::TokenKind K, tok::TokenKind... Ks> bool isNextPPTokenOneOf() {
// Do some quick tests for rejection cases.
std::optional<Token> Val;
if (CurLexer)
Val = CurLexer->peekNextPPToken();
else
Val = CurTokenLexer->peekNextPPToken();

if (!Val) {
// We have run off the end. If it's a source file we don't
// examine enclosing ones (C99 5.1.1.2p4). Otherwise walk up the
// macro stack.
if (CurPPLexer)
return false;
for (const IncludeStackInfo &Entry : llvm::reverse(IncludeMacroStack)) {
if (Entry.TheLexer)
Val = Entry.TheLexer->peekNextPPToken();
else
Val = Entry.TheTokenLexer->peekNextPPToken();

if (Val)
break;

// Ran off the end of a source file?
if (Entry.ThePPLexer)
return false;
}
}

// Okay, we found the token and return. Otherwise we found the end of the
// translation unit.
return Val->is(K) || (... || Val->is(Ks));
}

private:
/// Identifiers used for SEH handling in Borland. These are only
Expand Down
7 changes: 3 additions & 4 deletions clang/include/clang/Lex/TokenLexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,10 +139,9 @@ class TokenLexer {
void Init(const Token *TokArray, unsigned NumToks, bool DisableMacroExpansion,
bool OwnsTokens, bool IsReinject);

/// If the next token lexed will pop this macro off the
/// expansion stack, return 2. If the next unexpanded token is a '(', return
/// 1, otherwise return 0.
unsigned isNextTokenLParen() const;
/// If the next token lexed will pop this macro off the expansion stack,
/// return std::nullopt, otherwise return the next unexpanded token.
std::optional<Token> peekNextPPToken() const;

/// Lex and return a token from this macro stream.
bool Lex(Token &Tok);
Expand Down
21 changes: 11 additions & 10 deletions clang/lib/Lex/Lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3202,18 +3202,19 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) {
return PP->HandleEndOfFile(Result, isPragmaLexer());
}

/// isNextPPTokenLParen - Return 1 if the next unexpanded token lexed from
/// the specified lexer will return a tok::l_paren token, 0 if it is something
/// else and 2 if there are no more tokens in the buffer controlled by the
/// lexer.
unsigned Lexer::isNextPPTokenLParen() {
/// peekNextPPToken - Return std::nullopt if there are no more tokens in the
/// buffer controlled by this lexer, otherwise return the next unexpanded
/// token.
std::optional<Token> Lexer::peekNextPPToken() {
assert(!LexingRawMode && "How can we expand a macro from a skipping buffer?");

if (isDependencyDirectivesLexer()) {
if (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size())
return 2;
return DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is(
tok::l_paren);
return std::nullopt;
Token Result;
(void)convertDependencyDirectiveToken(
DepDirectives.front().Tokens[NextDepDirectiveTokenIndex], Result);
return Result;
}

// Switch to 'skipping' mode. This will ensure that we can lex a token
Expand Down Expand Up @@ -3242,8 +3243,8 @@ unsigned Lexer::isNextPPTokenLParen() {
LexingRawMode = false;

if (Tok.is(tok::eof))
return 2;
return Tok.is(tok::l_paren);
return std::nullopt;
return Tok;
}

/// Find the end of a version control conflict marker.
Expand Down
4 changes: 2 additions & 2 deletions clang/lib/Lex/PPDirectives.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,9 +183,9 @@ static bool isReservedCXXAttributeName(Preprocessor &PP, IdentifierInfo *II) {
AttributeCommonInfo::AttrArgsInfo AttrArgsInfo =
AttributeCommonInfo::getCXX11AttrArgsInfo(II);
if (AttrArgsInfo == AttributeCommonInfo::AttrArgsInfo::Required)
return PP.isNextPPTokenLParen();
return PP.isNextPPTokenOneOf<tok::l_paren>();

return !PP.isNextPPTokenLParen() ||
return !PP.isNextPPTokenOneOf<tok::l_paren>() ||
AttrArgsInfo == AttributeCommonInfo::AttrArgsInfo::Optional;
}
return false;
Expand Down
38 changes: 0 additions & 38 deletions clang/lib/Lex/PPMacroExpansion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -418,44 +418,6 @@ static bool isTrivialSingleTokenExpansion(const MacroInfo *MI,
return !llvm::is_contained(MI->params(), II);
}

/// isNextPPTokenLParen - Determine whether the next preprocessor token to be
/// lexed is a '('. If so, consume the token and return true, if not, this
/// method should have no observable side-effect on the lexed tokens.
bool Preprocessor::isNextPPTokenLParen() {
// Do some quick tests for rejection cases.
unsigned Val;
if (CurLexer)
Val = CurLexer->isNextPPTokenLParen();
else
Val = CurTokenLexer->isNextTokenLParen();

if (Val == 2) {
// We have run off the end. If it's a source file we don't
// examine enclosing ones (C99 5.1.1.2p4). Otherwise walk up the
// macro stack.
if (CurPPLexer)
return false;
for (const IncludeStackInfo &Entry : llvm::reverse(IncludeMacroStack)) {
if (Entry.TheLexer)
Val = Entry.TheLexer->isNextPPTokenLParen();
else
Val = Entry.TheTokenLexer->isNextTokenLParen();

if (Val != 2)
break;

// Ran off the end of a source file?
if (Entry.ThePPLexer)
return false;
}
}

// Okay, if we know that the token is a '(', lex it and return. Otherwise we
// have found something that isn't a '(' or we found the end of the
// translation unit. In either case, return false.
return Val == 1;
}

/// HandleMacroExpandedIdentifier - If an identifier token is read that is to be
/// expanded as a macro, handle it and return the next token as 'Identifier'.
bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,
Expand Down
4 changes: 2 additions & 2 deletions clang/lib/Lex/Preprocessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -813,14 +813,14 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) {
if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
// C99 6.10.3p10: If the preprocessing token immediately after the
// macro name isn't a '(', this macro should not be expanded.
if (!MI->isFunctionLike() || isNextPPTokenLParen())
if (!MI->isFunctionLike() || isNextPPTokenOneOf<tok::l_paren>())
return HandleMacroExpandedIdentifier(Identifier, MD);
} else {
// C99 6.10.3.4p2 says that a disabled macro may never again be
// expanded, even if it's in a context where it could be expanded in the
// future.
Identifier.setFlag(Token::DisableExpand);
if (MI->isObjectLike() || isNextPPTokenLParen())
if (MI->isObjectLike() || isNextPPTokenOneOf<tok::l_paren>())
Diag(Identifier, diag::pp_disabled_macro_expansion);
}
}
Expand Down
10 changes: 5 additions & 5 deletions clang/lib/Lex/TokenLexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -921,13 +921,13 @@ bool TokenLexer::pasteTokens(Token &LHSTok, ArrayRef<Token> TokenStream,
}

/// isNextTokenLParen - If the next token lexed will pop this macro off the
/// expansion stack, return 2. If the next unexpanded token is a '(', return
/// 1, otherwise return 0.
unsigned TokenLexer::isNextTokenLParen() const {
/// expansion stack, return std::nullopt, otherwise return the next unexpanded
/// token.
std::optional<Token> TokenLexer::peekNextPPToken() const {
// Out of tokens?
if (isAtEnd())
return 2;
return Tokens[CurTokenIdx].is(tok::l_paren);
return std::nullopt;
return Tokens[CurTokenIdx];
}

/// isParsingPreprocessorDirective - Return true if we are in the middle of a
Expand Down
Loading