Skip to content

Commit a3dfe5f

Browse files
yronglinyuxuanchen1997
authored andcommitted
[Clang] Implement P3034R1 Module Declarations Shouldn’t be Macros (#90574)
Summary: This PR implement [P3034R1 Module Declarations Shouldn’t be Macros](https://wg21.link/P3034R1), and refactor the convoluted state machines in module name lexical analysis. --------- Signed-off-by: yronglin <[email protected]> Co-authored-by: Aaron Ballman <[email protected]> Co-authored-by: cor3ntin <[email protected]> Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60251236
1 parent 31f7ed6 commit a3dfe5f

File tree

19 files changed

+717
-263
lines changed

19 files changed

+717
-263
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,8 @@ C++2c Feature Support
285285

286286
- Implemented `P2963R3 Ordering of constraints involving fold expressions <https://wg21.link/P2963R3>`_.
287287

288+
- Implemented `P3034R1 Module Declarations Shouldn’t be Macros <https://wg21.link/P3034R1>`_.
289+
288290

289291
Resolutions to C++ Defect Reports
290292
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

clang/include/clang/Basic/DiagnosticLexKinds.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -952,6 +952,11 @@ def warn_module_conflict : Warning<
952952
InGroup<ModuleConflict>;
953953

954954
// C++20 modules
955+
def err_module_decl_cannot_be_macros : Error<
956+
"the module name in a module%select{| partition}0 declaration cannot contain "
957+
"an object-like macro %1">;
958+
def err_unxepected_paren_in_module_decl : Error<
959+
"unexpected '(' after the module name in a module%select{| partition}0 declaration">;
955960
def err_header_import_semi_in_macro : Error<
956961
"semicolon terminating header import declaration cannot be produced "
957962
"by a macro">;

clang/include/clang/Basic/IdentifierTable.h

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,10 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
180180
LLVM_PREFERRED_TYPE(bool)
181181
unsigned IsModulesImport : 1;
182182

183+
// True if this is the 'module' contextual keyword.
184+
LLVM_PREFERRED_TYPE(bool)
185+
unsigned IsModulesDecl : 1;
186+
183187
// True if this is a mangled OpenMP variant name.
184188
LLVM_PREFERRED_TYPE(bool)
185189
unsigned IsMangledOpenMPVariantName : 1;
@@ -196,7 +200,7 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
196200
LLVM_PREFERRED_TYPE(bool)
197201
unsigned IsFinal : 1;
198202

199-
// 22 bits left in a 64-bit word.
203+
// 21 bits left in a 64-bit word.
200204

201205
// Managed by the language front-end.
202206
void *FETokenInfo = nullptr;
@@ -212,8 +216,8 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
212216
IsCPPOperatorKeyword(false), NeedsHandleIdentifier(false),
213217
IsFromAST(false), ChangedAfterLoad(false), FEChangedAfterLoad(false),
214218
RevertedTokenID(false), OutOfDate(false), IsModulesImport(false),
215-
IsMangledOpenMPVariantName(false), IsDeprecatedMacro(false),
216-
IsRestrictExpansion(false), IsFinal(false) {}
219+
IsModulesDecl(false), IsMangledOpenMPVariantName(false),
220+
IsDeprecatedMacro(false), IsRestrictExpansion(false), IsFinal(false) {}
217221

218222
public:
219223
IdentifierInfo(const IdentifierInfo &) = delete;
@@ -520,6 +524,18 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
520524
RecomputeNeedsHandleIdentifier();
521525
}
522526

527+
/// Determine whether this is the contextual keyword \c module.
528+
bool isModulesDeclaration() const { return IsModulesDecl; }
529+
530+
/// Set whether this identifier is the contextual keyword \c module.
531+
void setModulesDeclaration(bool I) {
532+
IsModulesDecl = I;
533+
if (I)
534+
NeedsHandleIdentifier = true;
535+
else
536+
RecomputeNeedsHandleIdentifier();
537+
}
538+
523539
/// Determine whether this is the mangled name of an OpenMP variant.
524540
bool isMangledOpenMPVariantName() const { return IsMangledOpenMPVariantName; }
525541

@@ -740,6 +756,8 @@ class IdentifierTable {
740756
// If this is the 'import' contextual keyword, mark it as such.
741757
if (Name == "import")
742758
II->setModulesImport(true);
759+
else if (Name == "module")
760+
II->setModulesDeclaration(true);
743761

744762
return *II;
745763
}

clang/include/clang/Basic/TokenKinds.def

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1003,6 +1003,9 @@ ANNOTATION(module_include)
10031003
ANNOTATION(module_begin)
10041004
ANNOTATION(module_end)
10051005

1006+
// Annotations for C++, Clang and Objective-C named modules.
1007+
ANNOTATION(module_name)
1008+
10061009
// Annotation for a header_name token that has been looked up and transformed
10071010
// into the name of a header unit.
10081011
ANNOTATION(header_unit)

clang/include/clang/Lex/Preprocessor.h

Lines changed: 78 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -615,10 +615,6 @@ class Preprocessor {
615615

616616
ModuleDeclSeq ModuleDeclState;
617617

618-
/// Whether the module import expects an identifier next. Otherwise,
619-
/// it expects a '.' or ';'.
620-
bool ModuleImportExpectsIdentifier = false;
621-
622618
/// The identifier and source location of the currently-active
623619
/// \#pragma clang arc_cf_code_audited begin.
624620
std::pair<IdentifierInfo *, SourceLocation> PragmaARCCFCodeAuditedInfo;
@@ -1744,11 +1740,14 @@ class Preprocessor {
17441740
/// Lex a token, forming a header-name token if possible.
17451741
bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true);
17461742

1743+
/// Lex a module name or a partition name.
1744+
bool LexModuleName(Token &Result, bool IsImport);
1745+
17471746
/// Lex the parameters for an #embed directive, returns nullopt on error.
17481747
std::optional<LexEmbedParametersResult> LexEmbedParameters(Token &Current,
17491748
bool ForHasEmbed);
1750-
17511749
bool LexAfterModuleImport(Token &Result);
1750+
bool LexAfterModuleDecl(Token &Result);
17521751
void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks);
17531752

17541753
void makeModuleVisible(Module *M, SourceLocation Loc);
@@ -3039,6 +3038,9 @@ class Preprocessor {
30393038
static bool CLK_LexAfterModuleImport(Preprocessor &P, Token &Result) {
30403039
return P.LexAfterModuleImport(Result);
30413040
}
3041+
static bool CLK_LexAfterModuleDecl(Preprocessor &P, Token &Result) {
3042+
return P.LexAfterModuleDecl(Result);
3043+
}
30423044
};
30433045

30443046
/// Abstract base class that describes a handler that will receive
@@ -3071,6 +3073,77 @@ struct EmbedAnnotationData {
30713073
/// Registry of pragma handlers added by plugins
30723074
using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>;
30733075

3076+
/// Represents module or partition name token sequance.
3077+
///
3078+
/// module-name:
3079+
/// module-name-qualifier[opt] identifier
3080+
///
3081+
/// partition-name: [C++20]
3082+
/// : module-name-qualifier[opt] identifier
3083+
///
3084+
/// module-name-qualifier
3085+
/// module-name-qualifier[opt] identifier .
3086+
///
3087+
/// This class can only be created by the preprocessor and guarantees that the
3088+
/// two source array being contiguous in memory and only contains 3 kind of
3089+
/// tokens (identifier, '.' and ':'). And only available when the preprocessor
3090+
/// returns annot_module_name token.
3091+
///
3092+
/// For exmaple:
3093+
///
3094+
/// export module m.n:c.d
3095+
///
3096+
/// The module name array has 3 tokens ['m', '.', 'n'].
3097+
/// The partition name array has 4 tokens [':', 'c', '.', 'd'].
3098+
///
3099+
/// When import a partition in a named module fragment (Eg. import :part1;),
3100+
/// the module name array will be empty, and the partition name array has 2
3101+
/// tokens.
3102+
///
3103+
/// When we meet a private-module-fragment (Eg. module :private;), preprocessor
3104+
/// will not return a annot_module_name token, but will return 2 separate tokens
3105+
/// [':', 'kw_private'].
3106+
3107+
class ModuleNameInfo {
3108+
friend class Preprocessor;
3109+
ArrayRef<Token> ModuleName;
3110+
ArrayRef<Token> PartitionName;
3111+
3112+
ModuleNameInfo(ArrayRef<Token> AnnotToks, std::optional<unsigned> ColonIndex);
3113+
3114+
public:
3115+
/// Return the contiguous token array.
3116+
ArrayRef<Token> getTokens() const {
3117+
if (ModuleName.empty())
3118+
return PartitionName;
3119+
if (PartitionName.empty())
3120+
return ModuleName;
3121+
return ArrayRef(ModuleName.begin(), PartitionName.end());
3122+
}
3123+
bool hasModuleName() const { return !ModuleName.empty(); }
3124+
bool hasPartitionName() const { return !PartitionName.empty(); }
3125+
ArrayRef<Token> getModuleName() const { return ModuleName; }
3126+
ArrayRef<Token> getPartitionName() const { return PartitionName; }
3127+
Token getColonToken() const {
3128+
assert(hasPartitionName() && "Do not have a partition name");
3129+
return getPartitionName().front();
3130+
}
3131+
3132+
/// Under the standard C++ Modules, the dot is just part of the module name,
3133+
/// and not a real hierarchy separator. Flatten such module names now.
3134+
std::string getFlatName() const;
3135+
3136+
/// Build a module id path from the contiguous token array, both include
3137+
/// module name and partition name.
3138+
void getModuleIdPath(
3139+
SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path) const;
3140+
3141+
/// Build a module id path from \param ModuleName.
3142+
static void getModuleIdPath(
3143+
ArrayRef<Token> ModuleName,
3144+
SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path);
3145+
};
3146+
30743147
} // namespace clang
30753148

30763149
#endif // LLVM_CLANG_LEX_PREPROCESSOR_H

clang/include/clang/Lex/Token.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,9 @@ class Token {
235235
assert(isAnnotation() && "Used AnnotVal on non-annotation token");
236236
return PtrData;
237237
}
238+
template <class T> T getAnnotationValueAs() const {
239+
return static_cast<T>(getAnnotationValue());
240+
}
238241
void setAnnotationValue(void *val) {
239242
assert(isAnnotation() && "Used AnnotVal on non-annotation token");
240243
PtrData = val;

clang/include/clang/Parse/Parser.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3876,7 +3876,7 @@ class Parser : public CodeCompletionHandler {
38763876
}
38773877

38783878
bool ParseModuleName(
3879-
SourceLocation UseLoc,
3879+
SourceLocation UseLoc, ArrayRef<Token> ModuleName,
38803880
SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path,
38813881
bool IsImport);
38823882

clang/lib/Basic/IdentifierTable.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -322,8 +322,9 @@ void IdentifierTable::AddKeywords(const LangOptions &LangOpts) {
322322
if (LangOpts.IEEE128)
323323
AddKeyword("__ieee128", tok::kw___float128, KEYALL, LangOpts, *this);
324324

325-
// Add the 'import' contextual keyword.
325+
// Add the 'import' and 'module' contextual keyword.
326326
get("import").setModulesImport(true);
327+
get("module").setModulesDeclaration(true);
327328
}
328329

329330
/// Checks if the specified token kind represents a keyword in the

clang/lib/Frontend/PrintPreprocessedOutput.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -758,9 +758,10 @@ void PrintPPOutputPPCallbacks::HandleWhitespaceBeforeTok(const Token &Tok,
758758
// These tokens are not expanded to anything and don't need whitespace before
759759
// them.
760760
if (Tok.is(tok::eof) ||
761-
(Tok.isAnnotation() && !Tok.is(tok::annot_header_unit) &&
762-
!Tok.is(tok::annot_module_begin) && !Tok.is(tok::annot_module_end) &&
763-
!Tok.is(tok::annot_repl_input_end) && !Tok.is(tok::annot_embed)))
761+
(Tok.isAnnotation() && Tok.isNot(tok::annot_header_unit) &&
762+
Tok.isNot(tok::annot_module_begin) && Tok.isNot(tok::annot_module_end) &&
763+
Tok.isNot(tok::annot_module_name) &&
764+
Tok.isNot(tok::annot_repl_input_end) && Tok.isNot(tok::annot_embed)))
764765
return;
765766

766767
// EmittedDirectiveOnThisLine takes priority over RequireSameLine.
@@ -951,6 +952,11 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
951952
PP.Lex(Tok);
952953
IsStartOfLine = true;
953954
continue;
955+
} else if (Tok.is(tok::annot_module_name)) {
956+
auto *Info = static_cast<ModuleNameInfo *>(Tok.getAnnotationValue());
957+
*Callbacks->OS << Info->getFlatName();
958+
PP.Lex(Tok);
959+
continue;
954960
} else if (Tok.is(tok::annot_header_unit)) {
955961
// This is a header-name that has been (effectively) converted into a
956962
// module-name.

clang/lib/Lex/PPLexerChange.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,8 @@ void Preprocessor::EnterSourceFileWithLexer(Lexer *TheLexer,
122122
CurPPLexer = TheLexer;
123123
CurDirLookup = CurDir;
124124
CurLexerSubmodule = nullptr;
125-
if (CurLexerCallback != CLK_LexAfterModuleImport)
125+
if (CurLexerCallback != CLK_LexAfterModuleImport &&
126+
CurLexerCallback != CLK_LexAfterModuleDecl)
126127
CurLexerCallback = TheLexer->isDependencyDirectivesLexer()
127128
? CLK_DependencyDirectivesLexer
128129
: CLK_Lexer;
@@ -161,8 +162,7 @@ void Preprocessor::EnterMacro(Token &Tok, SourceLocation ILEnd,
161162
PushIncludeMacroStack();
162163
CurDirLookup = nullptr;
163164
CurTokenLexer = std::move(TokLexer);
164-
if (CurLexerCallback != CLK_LexAfterModuleImport)
165-
CurLexerCallback = CLK_TokenLexer;
165+
CurLexerCallback = CLK_TokenLexer;
166166
}
167167

168168
/// EnterTokenStream - Add a "macro" context to the top of the include stack,
@@ -216,7 +216,8 @@ void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks,
216216
PushIncludeMacroStack();
217217
CurDirLookup = nullptr;
218218
CurTokenLexer = std::move(TokLexer);
219-
if (CurLexerCallback != CLK_LexAfterModuleImport)
219+
if (CurLexerCallback != CLK_LexAfterModuleImport &&
220+
CurLexerCallback != CLK_LexAfterModuleDecl)
220221
CurLexerCallback = CLK_TokenLexer;
221222
}
222223

0 commit comments

Comments
 (0)