Skip to content

Commit a553c62

Browse files
authored
[clang][modules] Avoid allocations when reading blob paths (#113984)
When reading a path from a bitstream blob, `ASTReader` performs up to three allocations: 1. Conversion of the `StringRef` blob into `std::string` to conform to the `ResolveImportedPath()` API that takes `std::string &`. 2. Concatenation of the module file prefix directory and the relative path into a fresh `SmallString<128>` buffer in `ResolveImportedPath()`. 3. Propagating the result out of `ResolveImportedPath()` by calling `std::string::assign()` on the out-parameter. This patch makes is so that we avoid allocations altogether (amortized) by: 1. Avoiding conversion of the `StringRef` blob into `std::string` and changing the `ResolveImportedPath()` API. 2. Using one "global" buffer to hold the concatenation. 3. Returning `StringRef` that points into the buffer and ensuring the contents are not overwritten while it lives. Note that in some places of the bitstream we don't store paths as blobs, but rather as records that get VBR-encoded. This makes the allocation in (1) unavoidable. I plan to fix this in a follow-up PR by changing the PCM format. Moreover, there are some data structures (e.g. `serialization::InputFileInfo`) that store deserialized and resolved paths as `std::string`. If we don't access them frequently, it would be more efficient to store just the unresolved `StringRef` and resolve them on demand (within some kind of shared buffer to prevent allocations). This PR alone improves `clang-scan-deps` performance on my workload by 3.6%.
1 parent ca9f5b6 commit a553c62

File tree

2 files changed

+101
-49
lines changed

2 files changed

+101
-49
lines changed

clang/include/clang/Serialization/ASTReader.h

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
#include "llvm/ADT/iterator_range.h"
5151
#include "llvm/Bitstream/BitstreamReader.h"
5252
#include "llvm/Support/MemoryBuffer.h"
53+
#include "llvm/Support/SaveAndRestore.h"
5354
#include "llvm/Support/Timer.h"
5455
#include "llvm/Support/VersionTuple.h"
5556
#include <cassert>
@@ -1341,9 +1342,48 @@ class ASTReader
13411342
serialization::InputFile getInputFile(ModuleFile &F, unsigned ID,
13421343
bool Complain = true);
13431344

1345+
/// The buffer used as the temporary backing storage for resolved paths.
1346+
SmallString<0> PathBuf;
1347+
1348+
/// A wrapper around StringRef that temporarily borrows the underlying buffer.
1349+
class TemporarilyOwnedStringRef {
1350+
StringRef String;
1351+
llvm::SaveAndRestore<SmallString<0>> UnderlyingBuffer;
1352+
1353+
public:
1354+
TemporarilyOwnedStringRef(StringRef S, SmallString<0> &UnderlyingBuffer)
1355+
: String(S), UnderlyingBuffer(UnderlyingBuffer, {}) {}
1356+
1357+
/// Return the wrapped \c StringRef that must be outlived by \c this.
1358+
const StringRef *operator->() const & { return &String; }
1359+
const StringRef &operator*() const & { return String; }
1360+
1361+
/// Make it harder to get a \c StringRef that outlives \c this.
1362+
const StringRef *operator->() && = delete;
1363+
const StringRef &operator*() && = delete;
1364+
};
1365+
13441366
public:
1345-
void ResolveImportedPath(ModuleFile &M, std::string &Filename);
1346-
static void ResolveImportedPath(std::string &Filename, StringRef Prefix);
1367+
/// Get the buffer for resolving paths.
1368+
SmallString<0> &getPathBuf() { return PathBuf; }
1369+
1370+
/// Resolve \c Path in the context of module file \c M. The return value
1371+
/// must go out of scope before the next call to \c ResolveImportedPath.
1372+
static TemporarilyOwnedStringRef
1373+
ResolveImportedPath(SmallString<0> &Buf, StringRef Path, ModuleFile &ModF);
1374+
/// Resolve \c Path in the context of the \c Prefix directory. The return
1375+
/// value must go out of scope before the next call to \c ResolveImportedPath.
1376+
static TemporarilyOwnedStringRef
1377+
ResolveImportedPath(SmallString<0> &Buf, StringRef Path, StringRef Prefix);
1378+
1379+
/// Resolve \c Path in the context of module file \c M.
1380+
static std::string ResolveImportedPathAndAllocate(SmallString<0> &Buf,
1381+
StringRef Path,
1382+
ModuleFile &ModF);
1383+
/// Resolve \c Path in the context of the \c Prefix directory.
1384+
static std::string ResolveImportedPathAndAllocate(SmallString<0> &Buf,
1385+
StringRef Path,
1386+
StringRef Prefix);
13471387

13481388
/// Returns the first key declaration for the given declaration. This
13491389
/// is one that is formerly-canonical (or still canonical) and whose module

clang/lib/Serialization/ASTReader.cpp

Lines changed: 59 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -2048,9 +2048,9 @@ HeaderFileInfoTrait::getFile(const internal_key_type &Key) {
20482048
if (!Key.Imported)
20492049
return FileMgr.getOptionalFileRef(Key.Filename);
20502050

2051-
std::string Resolved = std::string(Key.Filename);
2052-
Reader.ResolveImportedPath(M, Resolved);
2053-
return FileMgr.getOptionalFileRef(Resolved);
2051+
auto Resolved =
2052+
ASTReader::ResolveImportedPath(Reader.getPathBuf(), Key.Filename, M);
2053+
return FileMgr.getOptionalFileRef(*Resolved);
20542054
}
20552055

20562056
unsigned HeaderFileInfoTrait::ComputeHash(internal_key_ref ikey) {
@@ -2513,11 +2513,12 @@ InputFileInfo ASTReader::getInputFileInfo(ModuleFile &F, unsigned ID) {
25132513
std::tie(R.FilenameAsRequested, R.Filename) = [&]() {
25142514
uint16_t AsRequestedLength = Record[7];
25152515

2516-
std::string NameAsRequested = Blob.substr(0, AsRequestedLength).str();
2517-
std::string Name = Blob.substr(AsRequestedLength).str();
2516+
StringRef NameAsRequestedRef = Blob.substr(0, AsRequestedLength);
2517+
StringRef NameRef = Blob.substr(AsRequestedLength);
25182518

2519-
ResolveImportedPath(F, NameAsRequested);
2520-
ResolveImportedPath(F, Name);
2519+
std::string NameAsRequested =
2520+
ResolveImportedPathAndAllocate(PathBuf, NameAsRequestedRef, F);
2521+
std::string Name = ResolveImportedPathAndAllocate(PathBuf, NameRef, F);
25212522

25222523
if (Name.empty())
25232524
Name = NameAsRequested;
@@ -2743,23 +2744,38 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) {
27432744
return IF;
27442745
}
27452746

2746-
/// If we are loading a relocatable PCH or module file, and the filename
2747-
/// is not an absolute path, add the system or module root to the beginning of
2748-
/// the file name.
2749-
void ASTReader::ResolveImportedPath(ModuleFile &M, std::string &Filename) {
2750-
// Resolve relative to the base directory, if we have one.
2751-
if (!M.BaseDirectory.empty())
2752-
return ResolveImportedPath(Filename, M.BaseDirectory);
2747+
ASTReader::TemporarilyOwnedStringRef
2748+
ASTReader::ResolveImportedPath(SmallString<0> &Buf, StringRef Path,
2749+
ModuleFile &ModF) {
2750+
return ResolveImportedPath(Buf, Path, ModF.BaseDirectory);
27532751
}
27542752

2755-
void ASTReader::ResolveImportedPath(std::string &Filename, StringRef Prefix) {
2756-
if (Filename.empty() || llvm::sys::path::is_absolute(Filename) ||
2757-
Filename == "<built-in>" || Filename == "<command line>")
2758-
return;
2753+
ASTReader::TemporarilyOwnedStringRef
2754+
ASTReader::ResolveImportedPath(SmallString<0> &Buf, StringRef Path,
2755+
StringRef Prefix) {
2756+
assert(Buf.capacity() != 0 && "Overlapping ResolveImportedPath calls");
2757+
2758+
if (Prefix.empty() || Path.empty() || llvm::sys::path::is_absolute(Path) ||
2759+
Path == "<built-in>" || Path == "<command line>")
2760+
return {Path, Buf};
2761+
2762+
Buf.clear();
2763+
llvm::sys::path::append(Buf, Prefix, Path);
2764+
StringRef ResolvedPath{Buf.data(), Buf.size()};
2765+
return {ResolvedPath, Buf};
2766+
}
27592767

2760-
SmallString<128> Buffer;
2761-
llvm::sys::path::append(Buffer, Prefix, Filename);
2762-
Filename.assign(Buffer.begin(), Buffer.end());
2768+
std::string ASTReader::ResolveImportedPathAndAllocate(SmallString<0> &Buf,
2769+
StringRef P,
2770+
ModuleFile &ModF) {
2771+
return ResolveImportedPathAndAllocate(Buf, P, ModF.BaseDirectory);
2772+
}
2773+
2774+
std::string ASTReader::ResolveImportedPathAndAllocate(SmallString<0> &Buf,
2775+
StringRef P,
2776+
StringRef Prefix) {
2777+
auto ResolvedPath = ResolveImportedPath(Buf, P, Prefix);
2778+
return ResolvedPath->str();
27632779
}
27642780

27652781
static bool isDiagnosedResult(ASTReader::ASTReadResult ARR, unsigned Caps) {
@@ -3187,8 +3203,8 @@ ASTReader::ReadControlBlock(ModuleFile &F,
31873203
case ORIGINAL_FILE:
31883204
F.OriginalSourceFileID = FileID::get(Record[0]);
31893205
F.ActualOriginalSourceFileName = std::string(Blob);
3190-
F.OriginalSourceFileName = F.ActualOriginalSourceFileName;
3191-
ResolveImportedPath(F, F.OriginalSourceFileName);
3206+
F.OriginalSourceFileName = ResolveImportedPathAndAllocate(
3207+
PathBuf, F.ActualOriginalSourceFileName, F);
31923208
break;
31933209

31943210
case ORIGINAL_FILE_ID:
@@ -5477,6 +5493,8 @@ bool ASTReader::readASTFileControlBlock(
54775493
RecordData Record;
54785494
std::string ModuleDir;
54795495
bool DoneWithControlBlock = false;
5496+
SmallString<0> PathBuf;
5497+
PathBuf.reserve(256);
54805498
while (!DoneWithControlBlock) {
54815499
Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
54825500
if (!MaybeEntry) {
@@ -5559,9 +5577,9 @@ bool ASTReader::readASTFileControlBlock(
55595577
break;
55605578
case MODULE_MAP_FILE: {
55615579
unsigned Idx = 0;
5562-
auto Path = ReadString(Record, Idx);
5563-
ResolveImportedPath(Path, ModuleDir);
5564-
Listener.ReadModuleMapFile(Path);
5580+
std::string PathStr = ReadString(Record, Idx);
5581+
auto Path = ResolveImportedPath(PathBuf, PathStr, ModuleDir);
5582+
Listener.ReadModuleMapFile(*Path);
55655583
break;
55665584
}
55675585
case INPUT_FILE_OFFSETS: {
@@ -5608,10 +5626,9 @@ bool ASTReader::readASTFileControlBlock(
56085626
break;
56095627
case INPUT_FILE:
56105628
bool Overridden = static_cast<bool>(Record[3]);
5611-
std::string Filename = std::string(Blob);
5612-
ResolveImportedPath(Filename, ModuleDir);
5629+
auto Filename = ResolveImportedPath(PathBuf, Blob, ModuleDir);
56135630
shouldContinue = Listener.visitInputFile(
5614-
Filename, isSystemFile, Overridden, /*IsExplicitModule*/false);
5631+
*Filename, isSystemFile, Overridden, /*IsExplicitModule=*/false);
56155632
break;
56165633
}
56175634
if (!shouldContinue)
@@ -5646,9 +5663,9 @@ bool ASTReader::readASTFileControlBlock(
56465663
// Skip Size, ModTime and Signature
56475664
Idx += 1 + 1 + ASTFileSignature::size;
56485665
std::string ModuleName = ReadString(Record, Idx);
5649-
std::string Filename = ReadString(Record, Idx);
5650-
ResolveImportedPath(Filename, ModuleDir);
5651-
Listener.visitImport(ModuleName, Filename);
5666+
std::string FilenameStr = ReadString(Record, Idx);
5667+
auto Filename = ResolveImportedPath(PathBuf, FilenameStr, ModuleDir);
5668+
Listener.visitImport(ModuleName, *Filename);
56525669
}
56535670
break;
56545671
}
@@ -5901,9 +5918,8 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
59015918
// FIXME: This doesn't work for framework modules as `Filename` is the
59025919
// name as written in the module file and does not include
59035920
// `Headers/`, so this path will never exist.
5904-
std::string Filename = std::string(Blob);
5905-
ResolveImportedPath(F, Filename);
5906-
if (auto Umbrella = PP.getFileManager().getOptionalFileRef(Filename)) {
5921+
auto Filename = ResolveImportedPath(PathBuf, Blob, F);
5922+
if (auto Umbrella = PP.getFileManager().getOptionalFileRef(*Filename)) {
59075923
if (!CurrentModule->getUmbrellaHeaderAsWritten()) {
59085924
// FIXME: NameAsWritten
59095925
ModMap.setUmbrellaHeaderAsWritten(CurrentModule, *Umbrella, Blob, "");
@@ -5931,18 +5947,16 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
59315947
break;
59325948

59335949
case SUBMODULE_TOPHEADER: {
5934-
std::string HeaderName(Blob);
5935-
ResolveImportedPath(F, HeaderName);
5936-
CurrentModule->addTopHeaderFilename(HeaderName);
5950+
auto HeaderName = ResolveImportedPath(PathBuf, Blob, F);
5951+
CurrentModule->addTopHeaderFilename(*HeaderName);
59375952
break;
59385953
}
59395954

59405955
case SUBMODULE_UMBRELLA_DIR: {
59415956
// See comments in SUBMODULE_UMBRELLA_HEADER
5942-
std::string Dirname = std::string(Blob);
5943-
ResolveImportedPath(F, Dirname);
5957+
auto Dirname = ResolveImportedPath(PathBuf, Blob, F);
59445958
if (auto Umbrella =
5945-
PP.getFileManager().getOptionalDirectoryRef(Dirname)) {
5959+
PP.getFileManager().getOptionalDirectoryRef(*Dirname)) {
59465960
if (!CurrentModule->getUmbrellaDirAsWritten()) {
59475961
// FIXME: NameAsWritten
59485962
ModMap.setUmbrellaDirAsWritten(CurrentModule, *Umbrella, Blob, "");
@@ -9597,17 +9611,13 @@ std::string ASTReader::ReadString(const RecordDataImpl &Record, unsigned &Idx) {
95979611

95989612
std::string ASTReader::ReadPath(ModuleFile &F, const RecordData &Record,
95999613
unsigned &Idx) {
9600-
std::string Filename = ReadString(Record, Idx);
9601-
ResolveImportedPath(F, Filename);
9602-
return Filename;
9614+
return ReadPath(F.BaseDirectory, Record, Idx);
96039615
}
96049616

96059617
std::string ASTReader::ReadPath(StringRef BaseDirectory,
96069618
const RecordData &Record, unsigned &Idx) {
96079619
std::string Filename = ReadString(Record, Idx);
9608-
if (!BaseDirectory.empty())
9609-
ResolveImportedPath(Filename, BaseDirectory);
9610-
return Filename;
9620+
return ResolveImportedPathAndAllocate(PathBuf, Filename, BaseDirectory);
96119621
}
96129622

96139623
VersionTuple ASTReader::ReadVersionTuple(const RecordData &Record,
@@ -10512,6 +10522,8 @@ ASTReader::ASTReader(Preprocessor &PP, InMemoryModuleCache &ModuleCache,
1051210522
UseGlobalIndex(UseGlobalIndex), CurrSwitchCaseStmts(&SwitchCaseStmts) {
1051310523
SourceMgr.setExternalSLocEntrySource(this);
1051410524

10525+
PathBuf.reserve(256);
10526+
1051510527
for (const auto &Ext : Extensions) {
1051610528
auto BlockName = Ext->getExtensionMetadata().BlockName;
1051710529
auto Known = ModuleFileExtensions.find(BlockName);

0 commit comments

Comments
 (0)