Skip to content

Commit b7b4e25

Browse files
committed
[C++20][Modules] Implement P1857R3 Modules Dependency Discovery
Signed-off-by: yronglin <[email protected]>
1 parent fb02801 commit b7b4e25

15 files changed

+483
-309
lines changed

clang/include/clang/Basic/DiagnosticLexKinds.td

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -477,7 +477,7 @@ def warn_cxx98_compat_variadic_macro : Warning<
477477
def ext_named_variadic_macro : Extension<
478478
"named variadic macros are a GNU extension">, InGroup<VariadicMacros>;
479479
def err_embedded_directive : Error<
480-
"embedding a #%0 directive within macro arguments is not supported">;
480+
"embedding a %select{#|C++ }0%1 directive within macro arguments is not supported">;
481481
def ext_embedded_directive : Extension<
482482
"embedding a directive within macro arguments has undefined behavior">,
483483
InGroup<DiagGroup<"embedded-directive">>;
@@ -952,6 +952,10 @@ def warn_module_conflict : Warning<
952952
InGroup<ModuleConflict>;
953953

954954
// C++20 modules
955+
def err_module_decl_in_header : Error<
956+
"module declaration must not come from an #include directive">;
957+
def err_pp_cond_span_module_decl : Error<
958+
"preprocessor conditionals shall not span a module declaration">;
955959
def err_header_import_semi_in_macro : Error<
956960
"semicolon terminating header import declaration cannot be produced "
957961
"by a macro">;

clang/include/clang/Basic/DiagnosticParseKinds.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1695,8 +1695,8 @@ def ext_bit_int : Extension<
16951695
} // end of Parse Issue category.
16961696

16971697
let CategoryName = "Modules Issue" in {
1698-
def err_unexpected_module_decl : Error<
1699-
"module declaration can only appear at the top level">;
1698+
def err_unexpected_module_or_import_decl : Error<
1699+
"%select{module|import}0 declaration can only appear at the top level">;
17001700
def err_module_expected_ident : Error<
17011701
"expected a module name after '%select{module|import}0'">;
17021702
def err_attribute_not_module_attr : Error<

clang/include/clang/Basic/IdentifierTable.h

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,10 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
180180
LLVM_PREFERRED_TYPE(bool)
181181
unsigned IsModulesImport : 1;
182182

183+
// True if this is the 'module' contextual keyword.
184+
LLVM_PREFERRED_TYPE(bool)
185+
unsigned IsModulesDecl : 1;
186+
183187
// True if this is a mangled OpenMP variant name.
184188
LLVM_PREFERRED_TYPE(bool)
185189
unsigned IsMangledOpenMPVariantName : 1;
@@ -196,7 +200,7 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
196200
LLVM_PREFERRED_TYPE(bool)
197201
unsigned IsFinal : 1;
198202

199-
// 22 bits left in a 64-bit word.
203+
// 21 bits left in a 64-bit word.
200204

201205
// Managed by the language front-end.
202206
void *FETokenInfo = nullptr;
@@ -212,8 +216,8 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
212216
IsCPPOperatorKeyword(false), NeedsHandleIdentifier(false),
213217
IsFromAST(false), ChangedAfterLoad(false), FEChangedAfterLoad(false),
214218
RevertedTokenID(false), OutOfDate(false), IsModulesImport(false),
215-
IsMangledOpenMPVariantName(false), IsDeprecatedMacro(false),
216-
IsRestrictExpansion(false), IsFinal(false) {}
219+
IsModulesDecl(false), IsMangledOpenMPVariantName(false),
220+
IsDeprecatedMacro(false), IsRestrictExpansion(false), IsFinal(false) {}
217221

218222
public:
219223
IdentifierInfo(const IdentifierInfo &) = delete;
@@ -520,6 +524,18 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
520524
RecomputeNeedsHandleIdentifier();
521525
}
522526

527+
/// Determine whether this is the contextual keyword \c module.
528+
bool isModulesDeclaration() const { return IsModulesDecl; }
529+
530+
/// Set whether this identifier is the contextual keyword \c module.
531+
void setModulesDeclaration(bool I) {
532+
IsModulesDecl = I;
533+
if (I)
534+
NeedsHandleIdentifier = true;
535+
else
536+
RecomputeNeedsHandleIdentifier();
537+
}
538+
523539
/// Determine whether this is the mangled name of an OpenMP variant.
524540
bool isMangledOpenMPVariantName() const { return IsMangledOpenMPVariantName; }
525541

@@ -737,10 +753,11 @@ class IdentifierTable {
737753
// contents.
738754
II->Entry = &Entry;
739755

740-
// If this is the 'import' contextual keyword, mark it as such.
756+
// If this is the 'import' or 'module' contextual keyword, mark it as such.
741757
if (Name == "import")
742758
II->setModulesImport(true);
743-
759+
else if (Name == "module")
760+
II->setModulesDeclaration(true);
744761
return *II;
745762
}
746763

clang/include/clang/Basic/TokenKinds.def

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,9 @@ PPKEYWORD(pragma)
129129
// C23 & C++26 #embed
130130
PPKEYWORD(embed)
131131

132+
// C++20 Module Directive
133+
PPKEYWORD(module)
134+
132135
// GNU Extensions.
133136
PPKEYWORD(import)
134137
PPKEYWORD(include_next)
@@ -1014,6 +1017,9 @@ ANNOTATION(module_include)
10141017
ANNOTATION(module_begin)
10151018
ANNOTATION(module_end)
10161019

1020+
// Annotations for C++, Clang and Objective-C named modules.
1021+
ANNOTATION(module_name)
1022+
10171023
// Annotation for a header_name token that has been looked up and transformed
10181024
// into the name of a header unit.
10191025
ANNOTATION(header_unit)

clang/include/clang/Lex/Preprocessor.h

Lines changed: 86 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,70 @@ enum class EmbedResult {
128128
Empty = 2, // Corresponds to __STDC_EMBED_EMPTY__
129129
};
130130

131+
/// Represents module or partition name token sequance.
132+
///
133+
/// module-name:
134+
/// module-name-qualifier[opt] identifier
135+
///
136+
/// partition-name: [C++20]
137+
/// : module-name-qualifier[opt] identifier
138+
///
139+
/// module-name-qualifier
140+
/// module-name-qualifier[opt] identifier .
141+
///
142+
/// This class can only be created by the preprocessor and guarantees that the
143+
/// two source array being contiguous in memory and only contains 3 kind of
144+
/// tokens (identifier, '.' and ':'). And only available when the preprocessor
145+
/// returns annot_module_name token.
146+
///
147+
/// For exmaple:
148+
///
149+
/// export module m.n:c.d
150+
///
151+
/// The module name array has 3 tokens ['m', '.', 'n'].
152+
/// The partition name array has 4 tokens [':', 'c', '.', 'd'].
153+
///
154+
/// When import a partition in a named module fragment (Eg. import :part1;),
155+
/// the module name array will be empty, and the partition name array has 2
156+
/// tokens.
157+
///
158+
/// When we meet a private-module-fragment (Eg. module :private;), preprocessor
159+
/// will not return a annot_module_name token, but will return 2 separate tokens
160+
/// [':', 'kw_private'].
161+
class ModuleNameInfo {
162+
friend class Preprocessor;
163+
ArrayRef<Token> ModuleName;
164+
ArrayRef<Token> PartitionName;
165+
166+
ModuleNameInfo(ArrayRef<Token> AnnotToks, std::optional<unsigned> ColonIndex);
167+
168+
public:
169+
/// Return the contiguous token array.
170+
ArrayRef<Token> getTokens() const {
171+
if (ModuleName.empty())
172+
return PartitionName;
173+
if (PartitionName.empty())
174+
return ModuleName;
175+
return ArrayRef(ModuleName.begin(), PartitionName.end());
176+
}
177+
bool hasModuleName() const { return !ModuleName.empty(); }
178+
bool hasPartitionName() const { return !PartitionName.empty(); }
179+
ArrayRef<Token> getModuleName() const { return ModuleName; }
180+
ArrayRef<Token> getPartitionName() const { return PartitionName; }
181+
Token getColonToken() const {
182+
assert(hasPartitionName() && "Do not have a partition name");
183+
return getPartitionName().front();
184+
}
185+
186+
/// Under the standard C++ Modules, the dot is just part of the module name,
187+
/// and not a real hierarchy separator. Flatten such module names now.
188+
std::string getFlatName() const;
189+
190+
void buildNamedModuleIdPath(
191+
Preprocessor &P,
192+
SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path) const;
193+
};
194+
131195
/// Engages in a tight little dance with the lexer to efficiently
132196
/// preprocess tokens.
133197
///
@@ -336,6 +400,15 @@ class Preprocessor {
336400

337401
/// Whether the last token we lexed was an '@'.
338402
bool LastTokenWasAt = false;
403+
404+
struct ExportContextualKeywordInfo {
405+
Token ExportTok;
406+
bool TokAtPhysicalStartOfLine;
407+
};
408+
409+
/// Whether the last token we lexed was an 'export' keyword.
410+
std::optional<ExportContextualKeywordInfo> LastTokenWasExportKeyword =
411+
std::nullopt;
339412

340413
/// A position within a C++20 import-seq.
341414
class StdCXXImportSeq {
@@ -1767,6 +1840,17 @@ class Preprocessor {
17671840
std::optional<LexEmbedParametersResult> LexEmbedParameters(Token &Current,
17681841
bool ForHasEmbed);
17691842

1843+
bool LexModuleNameOrHeaderName(Token &Result, bool IsImport);
1844+
/// Callback invoked when the lexer sees one of export, import or module token
1845+
/// at the start of a line.
1846+
///
1847+
/// This consumes the import, module directive, modifies the
1848+
/// lexer/preprocessor state, and advances the lexer(s) so that the next token
1849+
/// read is the correct one.
1850+
bool HandleModuleContextualKeyword(Token &Result, bool TokAtPhysicalStartOfLine);
1851+
1852+
void HandleModuleDirective(Token &ModuleOrImportKeyword);
1853+
void LexAfterModuleImport(SmallVectorImpl<Token> &Suffix, bool IsImport);
17701854
bool LexAfterModuleImport(Token &Result);
17711855
void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks);
17721856

@@ -2344,7 +2428,7 @@ class Preprocessor {
23442428
///
23452429
/// \return The location of the end of the directive (the terminating
23462430
/// newline).
2347-
SourceLocation CheckEndOfDirective(const char *DirType,
2431+
SourceLocation CheckEndOfDirective(StringRef DirType,
23482432
bool EnableMacros = false);
23492433

23502434
/// Read and discard all tokens remaining on the current line until
@@ -2785,6 +2869,7 @@ class Preprocessor {
27852869
void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok);
27862870
void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok);
27872871
void HandleImportDirective(SourceLocation HashLoc, Token &Tok);
2872+
void HandleCXXModuleOrImportDirective(Token &KeywordTok);
27882873
void HandleMicrosoftImportDirective(Token &Tok);
27892874

27902875
public:

clang/include/clang/Lex/Token.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,9 @@ class Token {
231231
PtrData = const_cast<char*>(Ptr);
232232
}
233233

234+
template <class T> T getAnnotationValueAs() const {
235+
return static_cast<T>(getAnnotationValue());
236+
}
234237
void *getAnnotationValue() const {
235238
assert(isAnnotation() && "Used AnnotVal on non-annotation token");
236239
return PtrData;
@@ -289,6 +292,10 @@ class Token {
289292
/// Return the ObjC keyword kind.
290293
tok::ObjCKeywordKind getObjCKeywordID() const;
291294

295+
/// Return true if we have an C++20 Modules contextual keyword(export, import
296+
/// or module).
297+
bool isModuleContextualKeyword(bool AllowExport = true) const;
298+
292299
bool isSimpleTypeSpecifier(const LangOptions &LangOpts) const;
293300

294301
/// Return true if this token has trigraphs or escaped newlines in it.

clang/include/clang/Parse/Parser.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -165,10 +165,6 @@ class Parser : public CodeCompletionHandler {
165165
mutable IdentifierInfo *Ident_GNU_final;
166166
mutable IdentifierInfo *Ident_override;
167167

168-
// C++2a contextual keywords.
169-
mutable IdentifierInfo *Ident_import;
170-
mutable IdentifierInfo *Ident_module;
171-
172168
// C++ type trait keywords that can be reverted to identifiers and still be
173169
// used as type traits.
174170
llvm::SmallDenseMap<IdentifierInfo *, tok::TokenKind> RevertibleTypeTraits;

clang/lib/Basic/IdentifierTable.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -326,8 +326,9 @@ void IdentifierTable::AddKeywords(const LangOptions &LangOpts) {
326326
if (LangOpts.IEEE128)
327327
AddKeyword("__ieee128", tok::kw___float128, KEYALL, LangOpts, *this);
328328

329-
// Add the 'import' contextual keyword.
329+
// Add the 'import' and 'module' contextual keyword.
330330
get("import").setModulesImport(true);
331+
get("module").setModulesDeclaration(true);
331332
}
332333

333334
/// Checks if the specified token kind represents a keyword in the
@@ -456,6 +457,7 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
456457
CASE( 6, 'd', 'f', define);
457458
CASE( 6, 'i', 'n', ifndef);
458459
CASE( 6, 'i', 'p', import);
460+
CASE( 6, 'm', 'd', module);
459461
CASE( 6, 'p', 'a', pragma);
460462

461463
CASE( 7, 'd', 'f', defined);

clang/lib/Lex/DependencyDirectivesScanner.cpp

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -497,21 +497,32 @@ bool Scanner::lexModuleDirectiveBody(DirectiveKind Kind, const char *&First,
497497
const char *DirectiveLoc = Input.data() + CurDirToks.front().Offset;
498498
for (;;) {
499499
const dependency_directives_scan::Token &Tok = lexToken(First, End);
500-
if (Tok.is(tok::eof))
500+
if (Tok.isOneOf(tok::eof, tok::eod))
501501
return reportError(
502502
DirectiveLoc,
503503
diag::err_dep_source_scanner_missing_semi_after_at_import);
504504
if (Tok.is(tok::semi))
505505
break;
506506
}
507+
508+
// Skip extra tokens after semi in C++20 Modules directive.
509+
bool IsCXXModules = Kind == DirectiveKind::cxx_export_import_decl ||
510+
Kind == DirectiveKind::cxx_export_module_decl ||
511+
Kind == DirectiveKind::cxx_import_decl ||
512+
Kind == DirectiveKind::cxx_module_decl;
513+
if (IsCXXModules)
514+
lexPPDirectiveBody(First, End);
507515
pushDirective(Kind);
508516
skipWhitespace(First, End);
509517
if (First == End)
510518
return false;
511-
if (!isVerticalWhitespace(*First))
512-
return reportError(
513-
DirectiveLoc, diag::err_dep_source_scanner_unexpected_tokens_at_import);
514-
skipNewline(First, End);
519+
if (!IsCXXModules) {
520+
if (!isVerticalWhitespace(*First))
521+
return reportError(
522+
DirectiveLoc,
523+
diag::err_dep_source_scanner_unexpected_tokens_at_import);
524+
skipNewline(First, End);
525+
}
515526
return false;
516527
}
517528

@@ -846,8 +857,8 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) {
846857
if (*First == '@')
847858
return lexAt(First, End);
848859

849-
if (*First == 'i' || *First == 'e' || *First == 'm')
850-
return lexModule(First, End);
860+
// if (!LangOpts.CPlusPlusModules && (*First == 'i' || *First == 'e' || *First == 'm'))
861+
// return lexModule(First, End);
851862

852863
if (*First == '_') {
853864
if (isNextIdentifierOrSkipLine("_Pragma", First, End))
@@ -860,7 +871,8 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) {
860871
TheLexer.setParsingPreprocessorDirective(true);
861872
auto ScEx2 = make_scope_exit(
862873
[&]() { TheLexer.setParsingPreprocessorDirective(false); });
863-
874+
if (*First == 'i' || *First == 'e' || *First == 'm')
875+
return lexModule(First, End);
864876
// Lex '#'.
865877
const dependency_directives_scan::Token &HashTok = lexToken(First, End);
866878
if (HashTok.is(tok::hashhash)) {

0 commit comments

Comments
 (0)