Skip to content

Commit 037ca25

Browse files
committed
[clang][modules] Avoid allocations when reading blob paths
When reading paths from bitstream blobs, `ASTReader` always performs up to three allocations: 1. Convert the `StringRef` blob to `std::string` right away. 2. Concatenate the module file prefix directory and the relative path within `ASTReader::ResolveImportedPath()`. 3. Call `std::string::assign()` on the allocated blob with the concatenation result. This patch makes is so that we avoid allocations altogether (amortized). This works by: 1. Avoiding conversion of the `StringRef` blob into `std::string`. 2. Keeping one "global" buffer to perform the concatenation with. 3. Returning `StringRef` pointing into the global buffer and being careful to not store this anywhere where it can outlive the underlying global buffer allocation. Note that in some places, we don't store paths as blobs, but rather as records that get VBR-encoded. This makes the allocation in (1) unavoidable. I plan to fix this in a follow-up PR by changing the PCM format to store those as offsets into the blob part of the record. Similarly, there are some data structures that store the deserialized paths as `std::string`. If we don't access them frequently, it might be better to store just the unresolved `StringRef` and resolve it on demand, again using some kind of shared buffer to prevent allocations. This improves `clang-scan-deps` performance on my workload by <TBD>%.
1 parent c11ea47 commit 037ca25

File tree

2 files changed

+42
-41
lines changed

2 files changed

+42
-41
lines changed

clang/include/clang/Serialization/ASTReader.h

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1341,9 +1341,22 @@ class ASTReader
13411341
serialization::InputFile getInputFile(ModuleFile &F, unsigned ID,
13421342
bool Complain = true);
13431343

1344+
/// Buffer we use as temporary storage backing resolved paths.
1345+
SmallString<256> PathBuf;
1346+
13441347
public:
1345-
void ResolveImportedPath(ModuleFile &M, std::string &Filename);
1346-
static void ResolveImportedPath(std::string &Filename, StringRef Prefix);
1348+
StringRef ResolveImportedPath(StringRef Path, ModuleFile &M) {
1349+
return ResolveImportedPath(PathBuf, Path, M);
1350+
}
1351+
StringRef ResolveImportedPath(StringRef Path, StringRef Prefix) {
1352+
return ResolveImportedPath(PathBuf, Path, Prefix);
1353+
}
1354+
static StringRef ResolveImportedPath(SmallVectorImpl<char> &Buffer,
1355+
StringRef Path, ModuleFile &M) {
1356+
return ResolveImportedPath(Buffer, Path, M.BaseDirectory);
1357+
}
1358+
static StringRef ResolveImportedPath(SmallVectorImpl<char> &Buffer,
1359+
StringRef Path, StringRef Prefix);
13471360

13481361
/// Returns the first key declaration for the given declaration. This
13491362
/// is one that is formerly-canonical (or still canonical) and whose module

clang/lib/Serialization/ASTReader.cpp

Lines changed: 27 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -2048,8 +2048,7 @@ HeaderFileInfoTrait::getFile(const internal_key_type &Key) {
20482048
if (!Key.Imported)
20492049
return FileMgr.getOptionalFileRef(Key.Filename);
20502050

2051-
std::string Resolved = std::string(Key.Filename);
2052-
Reader.ResolveImportedPath(M, Resolved);
2051+
StringRef Resolved = Reader.ResolveImportedPath(Key.Filename, M);
20532052
return FileMgr.getOptionalFileRef(Resolved);
20542053
}
20552054

@@ -2513,11 +2512,10 @@ InputFileInfo ASTReader::getInputFileInfo(ModuleFile &F, unsigned ID) {
25132512
std::tie(R.FilenameAsRequested, R.Filename) = [&]() {
25142513
uint16_t AsRequestedLength = Record[7];
25152514

2516-
std::string NameAsRequested = Blob.substr(0, AsRequestedLength).str();
2517-
std::string Name = Blob.substr(AsRequestedLength).str();
2518-
2519-
ResolveImportedPath(F, NameAsRequested);
2520-
ResolveImportedPath(F, Name);
2515+
std::string NameAsRequested =
2516+
ResolveImportedPath(Blob.substr(0, AsRequestedLength), F).str();
2517+
std::string Name =
2518+
ResolveImportedPath(Blob.substr(AsRequestedLength), F).str();
25212519

25222520
if (Name.empty())
25232521
Name = NameAsRequested;
@@ -2746,20 +2744,15 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) {
27462744
/// If we are loading a relocatable PCH or module file, and the filename
27472745
/// is not an absolute path, add the system or module root to the beginning of
27482746
/// the file name.
2749-
void ASTReader::ResolveImportedPath(ModuleFile &M, std::string &Filename) {
2750-
// Resolve relative to the base directory, if we have one.
2751-
if (!M.BaseDirectory.empty())
2752-
return ResolveImportedPath(Filename, M.BaseDirectory);
2753-
}
2754-
2755-
void ASTReader::ResolveImportedPath(std::string &Filename, StringRef Prefix) {
2756-
if (Filename.empty() || llvm::sys::path::is_absolute(Filename) ||
2757-
Filename == "<built-in>" || Filename == "<command line>")
2758-
return;
2747+
StringRef ASTReader::ResolveImportedPath(SmallVectorImpl<char> &Buffer,
2748+
StringRef Path, StringRef Prefix) {
2749+
if (Prefix.empty() || Path.empty() || llvm::sys::path::is_absolute(Path) ||
2750+
Path == "<built-in>" || Path == "<command line>")
2751+
return Path;
27592752

2760-
SmallString<128> Buffer;
2761-
llvm::sys::path::append(Buffer, Prefix, Filename);
2762-
Filename.assign(Buffer.begin(), Buffer.end());
2753+
Buffer.clear();
2754+
llvm::sys::path::append(Buffer, Prefix, Path);
2755+
return {Buffer.data(), Buffer.size()};
27632756
}
27642757

27652758
static bool isDiagnosedResult(ASTReader::ASTReadResult ARR, unsigned Caps) {
@@ -3187,8 +3180,8 @@ ASTReader::ReadControlBlock(ModuleFile &F,
31873180
case ORIGINAL_FILE:
31883181
F.OriginalSourceFileID = FileID::get(Record[0]);
31893182
F.ActualOriginalSourceFileName = std::string(Blob);
3190-
F.OriginalSourceFileName = F.ActualOriginalSourceFileName;
3191-
ResolveImportedPath(F, F.OriginalSourceFileName);
3183+
F.OriginalSourceFileName =
3184+
ResolveImportedPath(F.ActualOriginalSourceFileName, F).str();
31923185
break;
31933186

31943187
case ORIGINAL_FILE_ID:
@@ -5477,6 +5470,7 @@ bool ASTReader::readASTFileControlBlock(
54775470
RecordData Record;
54785471
std::string ModuleDir;
54795472
bool DoneWithControlBlock = false;
5473+
SmallString<256> PathBuf;
54805474
while (!DoneWithControlBlock) {
54815475
Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
54825476
if (!MaybeEntry) {
@@ -5559,8 +5553,8 @@ bool ASTReader::readASTFileControlBlock(
55595553
break;
55605554
case MODULE_MAP_FILE: {
55615555
unsigned Idx = 0;
5562-
auto Path = ReadString(Record, Idx);
5563-
ResolveImportedPath(Path, ModuleDir);
5556+
std::string PathStr = ReadString(Record, Idx);
5557+
StringRef Path = ResolveImportedPath(PathBuf, PathStr, ModuleDir);
55645558
Listener.ReadModuleMapFile(Path);
55655559
break;
55665560
}
@@ -5608,8 +5602,7 @@ bool ASTReader::readASTFileControlBlock(
56085602
break;
56095603
case INPUT_FILE:
56105604
bool Overridden = static_cast<bool>(Record[3]);
5611-
std::string Filename = std::string(Blob);
5612-
ResolveImportedPath(Filename, ModuleDir);
5605+
StringRef Filename = ResolveImportedPath(PathBuf, Blob, ModuleDir);
56135606
shouldContinue = Listener.visitInputFile(
56145607
Filename, isSystemFile, Overridden, /*IsExplicitModule*/false);
56155608
break;
@@ -5646,8 +5639,9 @@ bool ASTReader::readASTFileControlBlock(
56465639
// Skip Size, ModTime and Signature
56475640
Idx += 1 + 1 + ASTFileSignature::size;
56485641
std::string ModuleName = ReadString(Record, Idx);
5649-
std::string Filename = ReadString(Record, Idx);
5650-
ResolveImportedPath(Filename, ModuleDir);
5642+
std::string FilenameStr = ReadString(Record, Idx);
5643+
StringRef Filename =
5644+
ResolveImportedPath(PathBuf, FilenameStr, ModuleDir);
56515645
Listener.visitImport(ModuleName, Filename);
56525646
}
56535647
break;
@@ -5901,8 +5895,7 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
59015895
// FIXME: This doesn't work for framework modules as `Filename` is the
59025896
// name as written in the module file and does not include
59035897
// `Headers/`, so this path will never exist.
5904-
std::string Filename = std::string(Blob);
5905-
ResolveImportedPath(F, Filename);
5898+
StringRef Filename = ResolveImportedPath(Blob, F);
59065899
if (auto Umbrella = PP.getFileManager().getOptionalFileRef(Filename)) {
59075900
if (!CurrentModule->getUmbrellaHeaderAsWritten()) {
59085901
// FIXME: NameAsWritten
@@ -5931,16 +5924,14 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
59315924
break;
59325925

59335926
case SUBMODULE_TOPHEADER: {
5934-
std::string HeaderName(Blob);
5935-
ResolveImportedPath(F, HeaderName);
5927+
StringRef HeaderName = ResolveImportedPath(Blob, F);
59365928
CurrentModule->addTopHeaderFilename(HeaderName);
59375929
break;
59385930
}
59395931

59405932
case SUBMODULE_UMBRELLA_DIR: {
59415933
// See comments in SUBMODULE_UMBRELLA_HEADER
5942-
std::string Dirname = std::string(Blob);
5943-
ResolveImportedPath(F, Dirname);
5934+
StringRef Dirname = ResolveImportedPath(Blob, F);
59445935
if (auto Umbrella =
59455936
PP.getFileManager().getOptionalDirectoryRef(Dirname)) {
59465937
if (!CurrentModule->getUmbrellaDirAsWritten()) {
@@ -9598,16 +9589,13 @@ std::string ASTReader::ReadString(const RecordDataImpl &Record, unsigned &Idx) {
95989589
std::string ASTReader::ReadPath(ModuleFile &F, const RecordData &Record,
95999590
unsigned &Idx) {
96009591
std::string Filename = ReadString(Record, Idx);
9601-
ResolveImportedPath(F, Filename);
9602-
return Filename;
9592+
return ResolveImportedPath(Filename, F).str();
96039593
}
96049594

96059595
std::string ASTReader::ReadPath(StringRef BaseDirectory,
96069596
const RecordData &Record, unsigned &Idx) {
96079597
std::string Filename = ReadString(Record, Idx);
9608-
if (!BaseDirectory.empty())
9609-
ResolveImportedPath(Filename, BaseDirectory);
9610-
return Filename;
9598+
return ResolveImportedPath(Filename, BaseDirectory).str();
96119599
}
96129600

96139601
VersionTuple ASTReader::ReadVersionTuple(const RecordData &Record,

0 commit comments

Comments
 (0)