Skip to content

Reapply "[Clang] Implement P3034R1 Module Declarations Shouldn’t be Macros" #102135

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ C++2c Feature Support
- Add ``__builtin_is_virtual_base_of`` intrinsic, which supports
`P2985R0 A type trait for detecting virtual base classes <https://wg21.link/p2985r0>`_

- Implemented `P3034R1 Module Declarations Shouldn’t be Macros <https://wg21.link/P3034R1>`_.

Resolutions to C++ Defect Reports
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Expand Down
5 changes: 5 additions & 0 deletions clang/include/clang/Basic/DiagnosticLexKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -952,6 +952,11 @@ def warn_module_conflict : Warning<
InGroup<ModuleConflict>;

// C++20 modules
def err_module_decl_cannot_be_macros : Error<
"the module name in a module%select{| partition}0 declaration cannot contain "
"an object-like macro %1">;
def err_unxepected_paren_in_module_decl : Error<
"unexpected '(' after the module name in a module%select{| partition}0 declaration">;
def err_header_import_semi_in_macro : Error<
"semicolon terminating header import declaration cannot be produced "
"by a macro">;
Expand Down
27 changes: 23 additions & 4 deletions clang/include/clang/Basic/IdentifierTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,10 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
LLVM_PREFERRED_TYPE(bool)
unsigned IsModulesImport : 1;

// True if this is the 'module' contextual keyword.
LLVM_PREFERRED_TYPE(bool)
unsigned IsModulesDecl : 1;

// True if this is a mangled OpenMP variant name.
LLVM_PREFERRED_TYPE(bool)
unsigned IsMangledOpenMPVariantName : 1;
Expand All @@ -196,7 +200,7 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
LLVM_PREFERRED_TYPE(bool)
unsigned IsFinal : 1;

// 22 bits left in a 64-bit word.
// 21 bits left in a 64-bit word.

// Managed by the language front-end.
void *FETokenInfo = nullptr;
Expand All @@ -212,8 +216,8 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
IsCPPOperatorKeyword(false), NeedsHandleIdentifier(false),
IsFromAST(false), ChangedAfterLoad(false), FEChangedAfterLoad(false),
RevertedTokenID(false), OutOfDate(false), IsModulesImport(false),
IsMangledOpenMPVariantName(false), IsDeprecatedMacro(false),
IsRestrictExpansion(false), IsFinal(false) {}
IsModulesDecl(false), IsMangledOpenMPVariantName(false),
IsDeprecatedMacro(false), IsRestrictExpansion(false), IsFinal(false) {}

public:
IdentifierInfo(const IdentifierInfo &) = delete;
Expand Down Expand Up @@ -520,6 +524,18 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
RecomputeNeedsHandleIdentifier();
}

/// Determine whether this is the contextual keyword \c module.
bool isModulesDeclaration() const { return IsModulesDecl; }

/// Set whether this identifier is the contextual keyword \c module.
void setModulesDeclaration(bool I) {
IsModulesDecl = I;
if (I)
NeedsHandleIdentifier = true;
else
RecomputeNeedsHandleIdentifier();
}

/// Determine whether this is the mangled name of an OpenMP variant.
bool isMangledOpenMPVariantName() const { return IsMangledOpenMPVariantName; }

Expand Down Expand Up @@ -569,7 +585,8 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
void RecomputeNeedsHandleIdentifier() {
NeedsHandleIdentifier = isPoisoned() || hasMacroDefinition() ||
isExtensionToken() || isFutureCompatKeyword() ||
isOutOfDate() || isModulesImport();
isOutOfDate() || isModulesImport() ||
isModulesDeclaration();
}
};

Expand Down Expand Up @@ -740,6 +757,8 @@ class IdentifierTable {
// If this is the 'import' contextual keyword, mark it as such.
if (Name == "import")
II->setModulesImport(true);
else if (Name == "module")
II->setModulesDeclaration(true);

return *II;
}
Expand Down
3 changes: 3 additions & 0 deletions clang/include/clang/Basic/TokenKinds.def
Original file line number Diff line number Diff line change
Expand Up @@ -1006,6 +1006,9 @@ ANNOTATION(module_include)
ANNOTATION(module_begin)
ANNOTATION(module_end)

// Annotations for C++, Clang and Objective-C named modules.
ANNOTATION(module_name)

// Annotation for a header_name token that has been looked up and transformed
// into the name of a header unit.
ANNOTATION(header_unit)
Expand Down
23 changes: 11 additions & 12 deletions clang/include/clang/Lex/Lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ class Lexer : public PreprocessorLexer {
//===--------------------------------------------------------------------===//
// Context that changes as the file is lexed.
// NOTE: any state that mutates when in raw mode must have save/restore code
// in Lexer::isNextPPTokenLParen.
// in Lexer::peekNextPPToken.

// BufferPtr - Current pointer into the buffer. This is the next character
// to be lexed.
Expand All @@ -136,6 +136,8 @@ class Lexer : public PreprocessorLexer {

bool IsAtPhysicalStartOfLine;

bool IsCurrentLexingTokAtPhysicalStartOfLine;
Comment on lines 137 to +139
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we have IsAtStartOfLine and IsAtPhysicalStartOfLine - Why do we need a third one?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for your review! This change introduced by 829b1c1. It's used to replace the bool TokAtPhysicalStartOfLine agurments in

bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
, it's a bit different with Lexer::IsAtPhysicalStartOfLine , It represents the current token starts at the physical line. We may skip the '\n' or comment before the real token starts. with this change, we can avoid pass this value as a function argument.


bool HasLeadingSpace;

bool HasLeadingEmptyMacro;
Expand Down Expand Up @@ -609,7 +611,7 @@ class Lexer : public PreprocessorLexer {
/// LexTokenInternal - Internal interface to lex a preprocessing token. Called
/// by Lex.
///
bool LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine);
bool LexTokenInternal(Token &Result);

bool CheckUnicodeWhitespace(Token &Result, uint32_t C, const char *CurPtr);

Expand All @@ -629,10 +631,10 @@ class Lexer : public PreprocessorLexer {
BufferPtr = TokEnd;
}

/// isNextPPTokenLParen - Return 1 if the next unexpanded token will return a
/// tok::l_paren token, 0 if it is something else and 2 if there are no more
/// tokens in the buffer controlled by this lexer.
unsigned isNextPPTokenLParen();
/// peekNextPPToken - Return std::nullopt if there are no more tokens in the
/// buffer controlled by this lexer, otherwise return the next unexpanded
/// token.
std::optional<Token> peekNextPPToken();

//===--------------------------------------------------------------------===//
// Lexer character reading interfaces.
Expand Down Expand Up @@ -749,12 +751,9 @@ class Lexer : public PreprocessorLexer {
bool LexCharConstant (Token &Result, const char *CurPtr,
tok::TokenKind Kind);
bool LexEndOfFile (Token &Result, const char *CurPtr);
bool SkipWhitespace (Token &Result, const char *CurPtr,
bool &TokAtPhysicalStartOfLine);
bool SkipLineComment (Token &Result, const char *CurPtr,
bool &TokAtPhysicalStartOfLine);
bool SkipBlockComment (Token &Result, const char *CurPtr,
bool &TokAtPhysicalStartOfLine);
bool SkipWhitespace(Token &Result, const char *CurPtr);
bool SkipLineComment(Token &Result, const char *CurPtr);
bool SkipBlockComment(Token &Result, const char *CurPtr);
bool SaveLineComment (Token &Result, const char *CurPtr);

bool IsStartOfConflictMarker(const char *CurPtr);
Expand Down
121 changes: 96 additions & 25 deletions clang/include/clang/Lex/Preprocessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,77 @@ class TokenValue {
}
};

/// Represents module or partition name token sequance.
///
/// module-name:
/// module-name-qualifier[opt] identifier
///
/// partition-name: [C++20]
/// : module-name-qualifier[opt] identifier
///
/// module-name-qualifier
/// module-name-qualifier[opt] identifier .
///
/// This class can only be created by the preprocessor and guarantees that the
/// two source array being contiguous in memory and only contains 3 kind of
/// tokens (identifier, '.' and ':'). And only available when the preprocessor
/// returns annot_module_name token.
///
/// For exmaple:
///
/// export module m.n:c.d
///
/// The module name array has 3 tokens ['m', '.', 'n'].
/// The partition name array has 4 tokens [':', 'c', '.', 'd'].
///
/// When import a partition in a named module fragment (Eg. import :part1;),
/// the module name array will be empty, and the partition name array has 2
/// tokens.
///
/// When we meet a private-module-fragment (Eg. module :private;), preprocessor
/// will not return a annot_module_name token, but will return 2 separate tokens
/// [':', 'kw_private'].

class ModuleNameInfo {
friend class Preprocessor;
ArrayRef<Token> ModuleName;
ArrayRef<Token> PartitionName;

ModuleNameInfo(ArrayRef<Token> AnnotToks, std::optional<unsigned> ColonIndex);

public:
/// Return the contiguous token array.
ArrayRef<Token> getTokens() const {
if (ModuleName.empty())
return PartitionName;
if (PartitionName.empty())
return ModuleName;
return ArrayRef(ModuleName.begin(), PartitionName.end());
}
bool hasModuleName() const { return !ModuleName.empty(); }
bool hasPartitionName() const { return !PartitionName.empty(); }
ArrayRef<Token> getModuleName() const { return ModuleName; }
ArrayRef<Token> getPartitionName() const { return PartitionName; }
Token getColonToken() const {
assert(hasPartitionName() && "Do not have a partition name");
return getPartitionName().front();
}

/// Under the standard C++ Modules, the dot is just part of the module name,
/// and not a real hierarchy separator. Flatten such module names now.
std::string getFlatName() const;

/// Build a module id path from the contiguous token array, both include
/// module name and partition name.
void getModuleIdPath(
SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path) const;

/// Build a module id path from \param ModuleName.
static void getModuleIdPath(
ArrayRef<Token> ModuleName,
SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path);
};

/// Context in which macro name is used.
enum MacroUse {
// other than #define or #undef
Expand Down Expand Up @@ -337,6 +408,9 @@ class Preprocessor {
/// Whether the last token we lexed was an '@'.
bool LastTokenWasAt = false;

/// Whether the last token we lexed was an 'export' keyword.
std::optional<Token> LastTokenWasExportKeyword = std::nullopt;

/// A position within a C++20 import-seq.
class StdCXXImportSeq {
public:
Expand Down Expand Up @@ -540,24 +614,12 @@ class Preprocessor {
reset();
}

void handleIdentifier(IdentifierInfo *Identifier) {
if (isModuleCandidate() && Identifier)
Name += Identifier->getName().str();
else if (!isNamedModule())
reset();
}

void handleColon() {
if (isModuleCandidate())
Name += ":";
else if (!isNamedModule())
reset();
}

void handlePeriod() {
if (isModuleCandidate())
Name += ".";
else if (!isNamedModule())
void handleModuleName(Token ModuleName) {
assert(ModuleName.is(tok::annot_module_name) && "Expect a module name");
if (isModuleCandidate()) {
Name =
ModuleName.getAnnotationValueAs<ModuleNameInfo *>()->getFlatName();
} else if (!isNamedModule())
reset();
}

Expand Down Expand Up @@ -615,10 +677,6 @@ class Preprocessor {

ModuleDeclSeq ModuleDeclState;

/// Whether the module import expects an identifier next. Otherwise,
/// it expects a '.' or ';'.
bool ModuleImportExpectsIdentifier = false;

/// The identifier and source location of the currently-active
/// \#pragma clang arc_cf_code_audited begin.
std::pair<IdentifierInfo *, SourceLocation> PragmaARCCFCodeAuditedInfo;
Expand Down Expand Up @@ -1763,11 +1821,14 @@ class Preprocessor {
/// Lex a token, forming a header-name token if possible.
bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true);

/// Lex a module name or a partition name.
bool LexModuleName(Token &Result, bool IsImport);

/// Lex the parameters for an #embed directive, returns nullopt on error.
std::optional<LexEmbedParametersResult> LexEmbedParameters(Token &Current,
bool ForHasEmbed);

bool LexAfterModuleImport(Token &Result);
bool LexAfterModuleDecl(Token &Result);
void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks);

void makeModuleVisible(Module *M, SourceLocation Loc);
Expand Down Expand Up @@ -2329,6 +2390,8 @@ class Preprocessor {
/// token stream.
bool HandleEndOfTokenLexer(Token &Result);

bool HandleModuleContextualKeyword(Token &Result);

/// Callback invoked when the lexer sees a # token at the start of a
/// line.
///
Expand Down Expand Up @@ -2650,10 +2713,16 @@ class Preprocessor {

void removeCachedMacroExpandedTokensOfLastLexer();

/// Peek the next token. If so, return the token, if not, this
/// method should have no observable side-effect on the lexed tokens.
std::optional<Token> peekNextPPToken();

/// Determine whether the next preprocessor token to be
/// lexed is a '('. If so, consume the token and return true, if not, this
/// method should have no observable side-effect on the lexed tokens.
bool isNextPPTokenLParen();
bool isNextPPTokenLParen() {
return peekNextPPToken().value_or(Token{}).is(tok::l_paren);
}

/// After reading "MACRO(", this method is invoked to read all of the formal
/// arguments specified for the macro invocation. Returns null on error.
Expand Down Expand Up @@ -3059,6 +3128,9 @@ class Preprocessor {
static bool CLK_LexAfterModuleImport(Preprocessor &P, Token &Result) {
return P.LexAfterModuleImport(Result);
}
static bool CLK_LexAfterModuleDecl(Preprocessor &P, Token &Result) {
return P.LexAfterModuleDecl(Result);
}
};

/// Abstract base class that describes a handler that will receive
Expand Down Expand Up @@ -3090,7 +3162,6 @@ struct EmbedAnnotationData {

/// Registry of pragma handlers added by plugins
using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>;

} // namespace clang

#endif // LLVM_CLANG_LEX_PREPROCESSOR_H
7 changes: 7 additions & 0 deletions clang/include/clang/Lex/Token.h
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,9 @@ class Token {
assert(isAnnotation() && "Used AnnotVal on non-annotation token");
return PtrData;
}
template <class T> T getAnnotationValueAs() const {
return static_cast<T>(getAnnotationValue());
}
void setAnnotationValue(void *val) {
assert(isAnnotation() && "Used AnnotVal on non-annotation token");
PtrData = val;
Expand Down Expand Up @@ -289,6 +292,10 @@ class Token {
/// Return the ObjC keyword kind.
tok::ObjCKeywordKind getObjCKeywordID() const;

/// Return true if we have an C++20 Modules contextual keyword(export, import
/// or module).
bool isModuleContextualKeyword() const;

bool isSimpleTypeSpecifier(const LangOptions &LangOpts) const;

/// Return true if this token has trigraphs or escaped newlines in it.
Expand Down
7 changes: 3 additions & 4 deletions clang/include/clang/Lex/TokenLexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,10 +139,9 @@ class TokenLexer {
void Init(const Token *TokArray, unsigned NumToks, bool DisableMacroExpansion,
bool OwnsTokens, bool IsReinject);

/// If the next token lexed will pop this macro off the
/// expansion stack, return 2. If the next unexpanded token is a '(', return
/// 1, otherwise return 0.
unsigned isNextTokenLParen() const;
/// If the next token lexed will pop this macro off the expansion stack,
/// return std::nullopt, otherwise return the next unexpanded token.
std::optional<Token> peekNextPPToken() const;

/// Lex and return a token from this macro stream.
bool Lex(Token &Tok);
Expand Down
Loading
Loading