Skip to content

Commit 6470706

Browse files
committed
[C++20] [Modules] [NFC] Add Preprocessor methods for named modules - for ClangScanDeps (1/4)
This patch prepares the necessary interfaces in the preprocessor part for D137527 since we need to recognize if we're in a module unit, the module kinds and the module declaration and the module we're importing in the preprocessor. Differential Revision: https://reviews.llvm.org/D137526
1 parent ad81d01 commit 6470706

File tree

4 files changed

+555
-5
lines changed

4 files changed

+555
-5
lines changed

clang/include/clang/Lex/Preprocessor.h

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,9 @@ class Preprocessor {
313313
/// The import path for named module that we're currently processing.
314314
SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> NamedModuleImportPath;
315315

316+
/// Whether the import is an `@import` or a standard c++ modules import.
317+
bool IsAtImport = false;
318+
316319
/// Whether the last token we lexed was an '@'.
317320
bool LastTokenWasAt = false;
318321

@@ -456,6 +459,144 @@ class Preprocessor {
456459

457460
TrackGMF TrackGMFState = TrackGMF::BeforeGMFIntroducer;
458461

462+
/// Track the status of the c++20 module decl.
463+
///
464+
/// module-declaration:
465+
/// 'export'[opt] 'module' module-name module-partition[opt]
466+
/// attribute-specifier-seq[opt] ';'
467+
///
468+
/// module-name:
469+
/// module-name-qualifier[opt] identifier
470+
///
471+
/// module-partition:
472+
/// ':' module-name-qualifier[opt] identifier
473+
///
474+
/// module-name-qualifier:
475+
/// identifier '.'
476+
/// module-name-qualifier identifier '.'
477+
///
478+
/// Transition state:
479+
///
480+
/// NotAModuleDecl --- export ---> FoundExport
481+
/// NotAModuleDecl --- module ---> ImplementationCandidate
482+
/// FoundExport --- module ---> InterfaceCandidate
483+
/// ImplementationCandidate --- Identifier ---> ImplementationCandidate
484+
/// ImplementationCandidate --- period ---> ImplementationCandidate
485+
/// ImplementationCandidate --- colon ---> ImplementationCandidate
486+
/// InterfaceCandidate --- Identifier ---> InterfaceCandidate
487+
/// InterfaceCandidate --- period ---> InterfaceCandidate
488+
/// InterfaceCandidate --- colon ---> InterfaceCandidate
489+
/// ImplementationCandidate --- Semi ---> NamedModuleImplementation
490+
/// NamedModuleInterface --- Semi ---> NamedModuleInterface
491+
/// NamedModuleImplementation --- Anything ---> NamedModuleImplementation
492+
/// NamedModuleInterface --- Anything ---> NamedModuleInterface
493+
///
494+
/// FIXME: We haven't handle attribute-specifier-seq here. It may not be bad
495+
/// soon since we don't support any module attributes yet.
496+
class ModuleDeclSeq {
497+
enum ModuleDeclState : int {
498+
NotAModuleDecl,
499+
FoundExport,
500+
InterfaceCandidate,
501+
ImplementationCandidate,
502+
NamedModuleInterface,
503+
NamedModuleImplementation,
504+
};
505+
506+
public:
507+
ModuleDeclSeq() : State(NotAModuleDecl) {}
508+
509+
void handleExport() {
510+
if (State == NotAModuleDecl)
511+
State = FoundExport;
512+
else if (!isNamedModule())
513+
reset();
514+
}
515+
516+
void handleModule() {
517+
if (State == FoundExport)
518+
State = InterfaceCandidate;
519+
else if (State == NotAModuleDecl)
520+
State = ImplementationCandidate;
521+
else if (!isNamedModule())
522+
reset();
523+
}
524+
525+
void handleIdentifier(IdentifierInfo *Identifier) {
526+
if (isModuleCandidate() && Identifier)
527+
Name += Identifier->getName().str();
528+
else if (!isNamedModule())
529+
reset();
530+
}
531+
532+
void handleColon() {
533+
if (isModuleCandidate())
534+
Name += ":";
535+
else if (!isNamedModule())
536+
reset();
537+
}
538+
539+
void handlePeriod() {
540+
if (isModuleCandidate())
541+
Name += ".";
542+
else if (!isNamedModule())
543+
reset();
544+
}
545+
546+
void handleSemi() {
547+
if (!Name.empty() && isModuleCandidate()) {
548+
if (State == InterfaceCandidate)
549+
State = NamedModuleInterface;
550+
else if (State == ImplementationCandidate)
551+
State = NamedModuleImplementation;
552+
else
553+
llvm_unreachable("Unimaged ModuleDeclState.");
554+
} else if (!isNamedModule())
555+
reset();
556+
}
557+
558+
void handleMisc() {
559+
if (!isNamedModule())
560+
reset();
561+
}
562+
563+
bool isModuleCandidate() const {
564+
return State == InterfaceCandidate || State == ImplementationCandidate;
565+
}
566+
567+
bool isNamedModule() const {
568+
return State == NamedModuleInterface ||
569+
State == NamedModuleImplementation;
570+
}
571+
572+
bool isNamedInterface() const { return State == NamedModuleInterface; }
573+
574+
bool isImplementationUnit() const {
575+
return State == NamedModuleImplementation && !getName().contains(':');
576+
}
577+
578+
StringRef getName() const {
579+
assert(isNamedModule() && "Can't get name from a non named module");
580+
return Name;
581+
}
582+
583+
StringRef getPrimaryName() const {
584+
assert(isNamedModule() && "Can't get name from a non named module");
585+
return getName().split(':').first;
586+
}
587+
588+
void reset() {
589+
Name.clear();
590+
State = NotAModuleDecl;
591+
}
592+
593+
private:
594+
ModuleDeclState State;
595+
std::string Name;
596+
};
597+
598+
ModuleDeclSeq ModuleDeclState;
599+
459600
/// Whether the module import expects an identifier next. Otherwise,
460601
/// it expects a '.' or ';'.
461602
bool ModuleImportExpectsIdentifier = false;
@@ -2225,6 +2366,36 @@ class Preprocessor {
22252366
/// Retrieves the module whose implementation we're current compiling, if any.
22262367
Module *getCurrentModuleImplementation();
22272368

2369+
/// If we are preprocessing a named module.
2370+
bool isInNamedModule() const { return ModuleDeclState.isNamedModule(); }
2371+
2372+
/// If we are proprocessing a named interface unit.
2373+
/// Note that a module implementation partition is not considered as an
2374+
/// named interface unit here although it is importable
2375+
/// to ease the parsing.
2376+
bool isInNamedInterfaceUnit() const {
2377+
return ModuleDeclState.isNamedInterface();
2378+
}
2379+
2380+
/// Get the named module name we're preprocessing.
2381+
/// Requires we're preprocessing a named module.
2382+
StringRef getNamedModuleName() const { return ModuleDeclState.getName(); }
2383+
2384+
/// If we are implementing an implementation module unit.
2385+
/// Note that the module implementation partition is not considered as an
2386+
/// implementation unit.
2387+
bool isInImplementationUnit() const {
2388+
return ModuleDeclState.isImplementationUnit();
2389+
}
2390+
2391+
/// If we're importing a standard C++20 Named Modules.
2392+
bool isInImportingCXXNamedModules() const {
2393+
// NamedModuleImportPath will be non-empty only if we're importing
2394+
// Standard C++ named modules.
2395+
return !NamedModuleImportPath.empty() && getLangOpts().CPlusPlusModules &&
2396+
!IsAtImport;
2397+
}
2398+
22282399
/// Allocate a new MacroInfo object with the provided SourceLocation.
22292400
MacroInfo *AllocateMacroInfo(SourceLocation L);
22302401

clang/lib/Lex/Preprocessor.cpp

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -873,6 +873,7 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) {
873873
CurLexerKind != CLK_CachingLexer) {
874874
ModuleImportLoc = Identifier.getLocation();
875875
NamedModuleImportPath.clear();
876+
IsAtImport = true;
876877
ModuleImportExpectsIdentifier = true;
877878
CurLexerKind = CLK_LexAfterModuleImport;
878879
}
@@ -940,6 +941,7 @@ void Preprocessor::Lex(Token &Result) {
940941
case tok::semi:
941942
TrackGMFState.handleSemi();
942943
StdCXXImportSeqState.handleSemi();
944+
ModuleDeclState.handleSemi();
943945
break;
944946
case tok::header_name:
945947
case tok::annot_header_unit:
@@ -948,6 +950,13 @@ void Preprocessor::Lex(Token &Result) {
948950
case tok::kw_export:
949951
TrackGMFState.handleExport();
950952
StdCXXImportSeqState.handleExport();
953+
ModuleDeclState.handleExport();
954+
break;
955+
case tok::colon:
956+
ModuleDeclState.handleColon();
957+
break;
958+
case tok::period:
959+
ModuleDeclState.handlePeriod();
951960
break;
952961
case tok::identifier:
953962
if (Result.getIdentifierInfo()->isModulesImport()) {
@@ -956,18 +965,25 @@ void Preprocessor::Lex(Token &Result) {
956965
if (StdCXXImportSeqState.afterImportSeq()) {
957966
ModuleImportLoc = Result.getLocation();
958967
NamedModuleImportPath.clear();
968+
IsAtImport = false;
959969
ModuleImportExpectsIdentifier = true;
960970
CurLexerKind = CLK_LexAfterModuleImport;
961971
}
962972
break;
963973
} else if (Result.getIdentifierInfo() == getIdentifierInfo("module")) {
964974
TrackGMFState.handleModule(StdCXXImportSeqState.afterTopLevelSeq());
975+
ModuleDeclState.handleModule();
965976
break;
977+
} else {
978+
ModuleDeclState.handleIdentifier(Result.getIdentifierInfo());
979+
if (ModuleDeclState.isModuleCandidate())
980+
break;
966981
}
967982
[[fallthrough]];
968983
default:
969984
TrackGMFState.handleMisc();
970985
StdCXXImportSeqState.handleMisc();
986+
ModuleDeclState.handleMisc();
971987
break;
972988
}
973989
}
@@ -1151,6 +1167,15 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) {
11511167
if (NamedModuleImportPath.empty() && getLangOpts().CPlusPlusModules) {
11521168
if (LexHeaderName(Result))
11531169
return true;
1170+
1171+
if (Result.is(tok::colon) && ModuleDeclState.isNamedModule()) {
1172+
std::string Name = ModuleDeclState.getPrimaryName().str();
1173+
Name += ":";
1174+
NamedModuleImportPath.push_back(
1175+
{getIdentifierInfo(Name), Result.getLocation()});
1176+
CurLexerKind = CLK_LexAfterModuleImport;
1177+
return true;
1178+
}
11541179
} else {
11551180
Lex(Result);
11561181
}
@@ -1164,9 +1189,10 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) {
11641189
/*DisableMacroExpansion*/ true, /*IsReinject*/ false);
11651190
};
11661191

1192+
bool ImportingHeader = Result.is(tok::header_name);
11671193
// Check for a header-name.
11681194
SmallVector<Token, 32> Suffix;
1169-
if (Result.is(tok::header_name)) {
1195+
if (ImportingHeader) {
11701196
// Enter the header-name token into the token stream; a Lex action cannot
11711197
// both return a token and cache tokens (doing so would corrupt the token
11721198
// cache if the call to Lex comes from CachingLex / PeekAhead).
@@ -1244,8 +1270,8 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) {
12441270
if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
12451271
// We expected to see an identifier here, and we did; continue handling
12461272
// identifiers.
1247-
NamedModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(),
1248-
Result.getLocation()));
1273+
NamedModuleImportPath.push_back(
1274+
std::make_pair(Result.getIdentifierInfo(), Result.getLocation()));
12491275
ModuleImportExpectsIdentifier = false;
12501276
CurLexerKind = CLK_LexAfterModuleImport;
12511277
return true;
@@ -1285,7 +1311,8 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) {
12851311
std::string FlatModuleName;
12861312
if (getLangOpts().ModulesTS || getLangOpts().CPlusPlusModules) {
12871313
for (auto &Piece : NamedModuleImportPath) {
1288-
if (!FlatModuleName.empty())
1314+
// If the FlatModuleName ends with colon, it implies it is a partition.
1315+
if (!FlatModuleName.empty() && FlatModuleName.back() != ':')
12891316
FlatModuleName += ".";
12901317
FlatModuleName += Piece.first->getName();
12911318
}
@@ -1296,14 +1323,16 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) {
12961323
}
12971324

12981325
Module *Imported = nullptr;
1299-
if (getLangOpts().Modules) {
1326+
// We don't/shouldn't load the standard c++20 modules when preprocessing.
1327+
if (getLangOpts().Modules && !isInImportingCXXNamedModules()) {
13001328
Imported = TheModuleLoader.loadModule(ModuleImportLoc,
13011329
NamedModuleImportPath,
13021330
Module::Hidden,
13031331
/*IsInclusionDirective=*/false);
13041332
if (Imported)
13051333
makeModuleVisible(Imported, SemiLoc);
13061334
}
1335+
13071336
if (Callbacks)
13081337
Callbacks->moduleImport(ModuleImportLoc, NamedModuleImportPath, Imported);
13091338

clang/unittests/Lex/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ add_clang_unittest(LexTests
77
HeaderMapTest.cpp
88
HeaderSearchTest.cpp
99
LexerTest.cpp
10+
ModuleDeclStateTest.cpp
1011
PPCallbacksTest.cpp
1112
PPConditionalDirectiveRecordTest.cpp
1213
PPDependencyDirectivesTest.cpp
@@ -17,6 +18,7 @@ clang_target_link_libraries(LexTests
1718
PRIVATE
1819
clangAST
1920
clangBasic
21+
clangFrontend
2022
clangLex
2123
clangParse
2224
clangSema

0 commit comments

Comments
 (0)