Skip to content

Commit 08fd8e1

Browse files
committed
[clang][modules] Avoid allocations when reading blob paths (llvm#113984)
When reading a path from a bitstream blob, `ASTReader` performs up to three allocations: 1. Conversion of the `StringRef` blob into `std::string` to conform to the `ResolveImportedPath()` API that takes `std::string &`. 2. Concatenation of the module file prefix directory and the relative path into a fresh `SmallString<128>` buffer in `ResolveImportedPath()`. 3. Propagating the result out of `ResolveImportedPath()` by calling `std::string::assign()` on the out-parameter. This patch makes is so that we avoid allocations altogether (amortized) by: 1. Avoiding conversion of the `StringRef` blob into `std::string` and changing the `ResolveImportedPath()` API. 2. Using one "global" buffer to hold the concatenation. 3. Returning `StringRef` that points into the buffer and ensuring the contents are not overwritten while it lives. Note that in some places of the bitstream we don't store paths as blobs, but rather as records that get VBR-encoded. This makes the allocation in (1) unavoidable. I plan to fix this in a follow-up PR by changing the PCM format. Moreover, there are some data structures (e.g. `serialization::InputFileInfo`) that store deserialized and resolved paths as `std::string`. If we don't access them frequently, it would be more efficient to store just the unresolved `StringRef` and resolve them on demand (within some kind of shared buffer to prevent allocations). This PR alone improves `clang-scan-deps` performance on my workload by 3.6%. (cherry picked from commit a553c62)
1 parent 9db0258 commit 08fd8e1

File tree

2 files changed

+102
-50
lines changed

2 files changed

+102
-50
lines changed

clang/include/clang/Serialization/ASTReader.h

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
#include "llvm/ADT/iterator_range.h"
5050
#include "llvm/Bitstream/BitstreamReader.h"
5151
#include "llvm/Support/MemoryBuffer.h"
52+
#include "llvm/Support/SaveAndRestore.h"
5253
#include "llvm/Support/Timer.h"
5354
#include "llvm/Support/VersionTuple.h"
5455
#include <cassert>
@@ -1326,9 +1327,48 @@ class ASTReader
13261327
serialization::InputFile getInputFile(ModuleFile &F, unsigned ID,
13271328
bool Complain = true);
13281329

1330+
/// The buffer used as the temporary backing storage for resolved paths.
1331+
SmallString<0> PathBuf;
1332+
1333+
/// A wrapper around StringRef that temporarily borrows the underlying buffer.
1334+
class TemporarilyOwnedStringRef {
1335+
StringRef String;
1336+
llvm::SaveAndRestore<SmallString<0>> UnderlyingBuffer;
1337+
1338+
public:
1339+
TemporarilyOwnedStringRef(StringRef S, SmallString<0> &UnderlyingBuffer)
1340+
: String(S), UnderlyingBuffer(UnderlyingBuffer, {}) {}
1341+
1342+
/// Return the wrapped \c StringRef that must be outlived by \c this.
1343+
const StringRef *operator->() const & { return &String; }
1344+
const StringRef &operator*() const & { return String; }
1345+
1346+
/// Make it harder to get a \c StringRef that outlives \c this.
1347+
const StringRef *operator->() && = delete;
1348+
const StringRef &operator*() && = delete;
1349+
};
1350+
13291351
public:
1330-
void ResolveImportedPath(ModuleFile &M, std::string &Filename);
1331-
static void ResolveImportedPath(std::string &Filename, StringRef Prefix);
1352+
/// Get the buffer for resolving paths.
1353+
SmallString<0> &getPathBuf() { return PathBuf; }
1354+
1355+
/// Resolve \c Path in the context of module file \c M. The return value
1356+
/// must go out of scope before the next call to \c ResolveImportedPath.
1357+
static TemporarilyOwnedStringRef
1358+
ResolveImportedPath(SmallString<0> &Buf, StringRef Path, ModuleFile &ModF);
1359+
/// Resolve \c Path in the context of the \c Prefix directory. The return
1360+
/// value must go out of scope before the next call to \c ResolveImportedPath.
1361+
static TemporarilyOwnedStringRef
1362+
ResolveImportedPath(SmallString<0> &Buf, StringRef Path, StringRef Prefix);
1363+
1364+
/// Resolve \c Path in the context of module file \c M.
1365+
static std::string ResolveImportedPathAndAllocate(SmallString<0> &Buf,
1366+
StringRef Path,
1367+
ModuleFile &ModF);
1368+
/// Resolve \c Path in the context of the \c Prefix directory.
1369+
static std::string ResolveImportedPathAndAllocate(SmallString<0> &Buf,
1370+
StringRef Path,
1371+
StringRef Prefix);
13321372

13331373
/// Returns the first key declaration for the given declaration. This
13341374
/// is one that is formerly-canonical (or still canonical) and whose module

clang/lib/Serialization/ASTReader.cpp

Lines changed: 60 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -2037,9 +2037,9 @@ HeaderFileInfoTrait::getFile(const internal_key_type &Key) {
20372037
if (!Key.Imported)
20382038
return FileMgr.getOptionalFileRef(Key.Filename);
20392039

2040-
std::string Resolved = std::string(Key.Filename);
2041-
Reader.ResolveImportedPath(M, Resolved);
2042-
return FileMgr.getOptionalFileRef(Resolved);
2040+
auto Resolved =
2041+
ASTReader::ResolveImportedPath(Reader.getPathBuf(), Key.Filename, M);
2042+
return FileMgr.getOptionalFileRef(*Resolved);
20432043
}
20442044

20452045
unsigned HeaderFileInfoTrait::ComputeHash(internal_key_ref ikey) {
@@ -2502,11 +2502,12 @@ InputFileInfo ASTReader::getInputFileInfo(ModuleFile &F, unsigned ID) {
25022502
std::tie(R.FilenameAsRequested, R.Filename) = [&]() {
25032503
uint16_t AsRequestedLength = Record[7];
25042504

2505-
std::string NameAsRequested = Blob.substr(0, AsRequestedLength).str();
2506-
std::string Name = Blob.substr(AsRequestedLength).str();
2505+
StringRef NameAsRequestedRef = Blob.substr(0, AsRequestedLength);
2506+
StringRef NameRef = Blob.substr(AsRequestedLength);
25072507

2508-
ResolveImportedPath(F, NameAsRequested);
2509-
ResolveImportedPath(F, Name);
2508+
std::string NameAsRequested =
2509+
ResolveImportedPathAndAllocate(PathBuf, NameAsRequestedRef, F);
2510+
std::string Name = ResolveImportedPathAndAllocate(PathBuf, NameRef, F);
25102511

25112512
if (Name.empty())
25122513
Name = NameAsRequested;
@@ -2736,23 +2737,38 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) {
27362737
return IF;
27372738
}
27382739

2739-
/// If we are loading a relocatable PCH or module file, and the filename
2740-
/// is not an absolute path, add the system or module root to the beginning of
2741-
/// the file name.
2742-
void ASTReader::ResolveImportedPath(ModuleFile &M, std::string &Filename) {
2743-
// Resolve relative to the base directory, if we have one.
2744-
if (!M.BaseDirectory.empty())
2745-
return ResolveImportedPath(Filename, M.BaseDirectory);
2740+
ASTReader::TemporarilyOwnedStringRef
2741+
ASTReader::ResolveImportedPath(SmallString<0> &Buf, StringRef Path,
2742+
ModuleFile &ModF) {
2743+
return ResolveImportedPath(Buf, Path, ModF.BaseDirectory);
27462744
}
27472745

2748-
void ASTReader::ResolveImportedPath(std::string &Filename, StringRef Prefix) {
2749-
if (Filename.empty() || llvm::sys::path::is_absolute(Filename) ||
2750-
Filename == "<built-in>" || Filename == "<command line>")
2751-
return;
2746+
ASTReader::TemporarilyOwnedStringRef
2747+
ASTReader::ResolveImportedPath(SmallString<0> &Buf, StringRef Path,
2748+
StringRef Prefix) {
2749+
assert(Buf.capacity() != 0 && "Overlapping ResolveImportedPath calls");
2750+
2751+
if (Prefix.empty() || Path.empty() || llvm::sys::path::is_absolute(Path) ||
2752+
Path == "<built-in>" || Path == "<command line>")
2753+
return {Path, Buf};
2754+
2755+
Buf.clear();
2756+
llvm::sys::path::append(Buf, Prefix, Path);
2757+
StringRef ResolvedPath{Buf.data(), Buf.size()};
2758+
return {ResolvedPath, Buf};
2759+
}
27522760

2753-
SmallString<128> Buffer;
2754-
llvm::sys::path::append(Buffer, Prefix, Filename);
2755-
Filename.assign(Buffer.begin(), Buffer.end());
2761+
std::string ASTReader::ResolveImportedPathAndAllocate(SmallString<0> &Buf,
2762+
StringRef P,
2763+
ModuleFile &ModF) {
2764+
return ResolveImportedPathAndAllocate(Buf, P, ModF.BaseDirectory);
2765+
}
2766+
2767+
std::string ASTReader::ResolveImportedPathAndAllocate(SmallString<0> &Buf,
2768+
StringRef P,
2769+
StringRef Prefix) {
2770+
auto ResolvedPath = ResolveImportedPath(Buf, P, Prefix);
2771+
return ResolvedPath->str();
27562772
}
27572773

27582774
static bool isDiagnosedResult(ASTReader::ASTReadResult ARR, unsigned Caps) {
@@ -3183,8 +3199,8 @@ ASTReader::ReadControlBlock(ModuleFile &F,
31833199
case ORIGINAL_FILE:
31843200
F.OriginalSourceFileID = FileID::get(Record[0]);
31853201
F.ActualOriginalSourceFileName = std::string(Blob);
3186-
F.OriginalSourceFileName = F.ActualOriginalSourceFileName;
3187-
ResolveImportedPath(F, F.OriginalSourceFileName);
3202+
F.OriginalSourceFileName = ResolveImportedPathAndAllocate(
3203+
PathBuf, F.ActualOriginalSourceFileName, F);
31883204
break;
31893205

31903206
case ORIGINAL_FILE_ID:
@@ -5474,6 +5490,8 @@ bool ASTReader::readASTFileControlBlock(
54745490
RecordData Record;
54755491
std::string ModuleDir;
54765492
bool DoneWithControlBlock = false;
5493+
SmallString<0> PathBuf;
5494+
PathBuf.reserve(256);
54775495
while (!DoneWithControlBlock) {
54785496
Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
54795497
if (!MaybeEntry) {
@@ -5556,9 +5574,9 @@ bool ASTReader::readASTFileControlBlock(
55565574
break;
55575575
case MODULE_MAP_FILE: {
55585576
unsigned Idx = 0;
5559-
auto Path = ReadString(Record, Idx);
5560-
ResolveImportedPath(Path, ModuleDir);
5561-
Listener.ReadModuleMapFile(Path);
5577+
std::string PathStr = ReadString(Record, Idx);
5578+
auto Path = ResolveImportedPath(PathBuf, PathStr, ModuleDir);
5579+
Listener.ReadModuleMapFile(*Path);
55625580
break;
55635581
}
55645582
case INPUT_FILE_OFFSETS: {
@@ -5605,10 +5623,9 @@ bool ASTReader::readASTFileControlBlock(
56055623
break;
56065624
case INPUT_FILE:
56075625
bool Overridden = static_cast<bool>(Record[3]);
5608-
std::string Filename = std::string(Blob);
5609-
ResolveImportedPath(Filename, ModuleDir);
5626+
auto Filename = ResolveImportedPath(PathBuf, Blob, ModuleDir);
56105627
shouldContinue = Listener.visitInputFile(
5611-
Filename, isSystemFile, Overridden, /*IsExplicitModule*/false);
5628+
*Filename, isSystemFile, Overridden, /*IsExplicitModule=*/false);
56125629
break;
56135630
}
56145631
if (!shouldContinue)
@@ -5643,12 +5660,12 @@ bool ASTReader::readASTFileControlBlock(
56435660
// Skip Size, ModTime and Signature
56445661
Idx += 1 + 1 + ASTFileSignature::size;
56455662
std::string ModuleName = ReadString(Record, Idx);
5646-
std::string Filename = ReadString(Record, Idx);
5647-
ResolveImportedPath(Filename, ModuleDir);
5663+
std::string FilenameStr = ReadString(Record, Idx);
5664+
auto Filename = ResolveImportedPath(PathBuf, FilenameStr, ModuleDir);
56485665
std::string CacheKey = ReadString(Record, Idx);
56495666
if (!CacheKey.empty())
5650-
Listener.readModuleCacheKey(ModuleName, Filename, CacheKey);
5651-
Listener.visitImport(ModuleName, Filename);
5667+
Listener.readModuleCacheKey(ModuleName, *Filename, CacheKey);
5668+
Listener.visitImport(ModuleName, *Filename);
56525669
}
56535670
break;
56545671
}
@@ -5916,9 +5933,8 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
59165933
// FIXME: This doesn't work for framework modules as `Filename` is the
59175934
// name as written in the module file and does not include
59185935
// `Headers/`, so this path will never exist.
5919-
std::string Filename = std::string(Blob);
5920-
ResolveImportedPath(F, Filename);
5921-
if (auto Umbrella = PP.getFileManager().getOptionalFileRef(Filename)) {
5936+
auto Filename = ResolveImportedPath(PathBuf, Blob, F);
5937+
if (auto Umbrella = PP.getFileManager().getOptionalFileRef(*Filename)) {
59225938
if (!CurrentModule->getUmbrellaHeaderAsWritten()) {
59235939
// FIXME: NameAsWritten
59245940
ModMap.setUmbrellaHeaderAsWritten(CurrentModule, *Umbrella, Blob, "");
@@ -5946,18 +5962,16 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
59465962
break;
59475963

59485964
case SUBMODULE_TOPHEADER: {
5949-
std::string HeaderName(Blob);
5950-
ResolveImportedPath(F, HeaderName);
5951-
CurrentModule->addTopHeaderFilename(HeaderName);
5965+
auto HeaderName = ResolveImportedPath(PathBuf, Blob, F);
5966+
CurrentModule->addTopHeaderFilename(*HeaderName);
59525967
break;
59535968
}
59545969

59555970
case SUBMODULE_UMBRELLA_DIR: {
59565971
// See comments in SUBMODULE_UMBRELLA_HEADER
5957-
std::string Dirname = std::string(Blob);
5958-
ResolveImportedPath(F, Dirname);
5972+
auto Dirname = ResolveImportedPath(PathBuf, Blob, F);
59595973
if (auto Umbrella =
5960-
PP.getFileManager().getOptionalDirectoryRef(Dirname)) {
5974+
PP.getFileManager().getOptionalDirectoryRef(*Dirname)) {
59615975
if (!CurrentModule->getUmbrellaDirAsWritten()) {
59625976
// FIXME: NameAsWritten
59635977
ModMap.setUmbrellaDirAsWritten(CurrentModule, *Umbrella, Blob, "");
@@ -9503,17 +9517,13 @@ std::string ASTReader::ReadString(const RecordDataImpl &Record, unsigned &Idx) {
95039517

95049518
std::string ASTReader::ReadPath(ModuleFile &F, const RecordData &Record,
95059519
unsigned &Idx) {
9506-
std::string Filename = ReadString(Record, Idx);
9507-
ResolveImportedPath(F, Filename);
9508-
return Filename;
9520+
return ReadPath(F.BaseDirectory, Record, Idx);
95099521
}
95109522

95119523
std::string ASTReader::ReadPath(StringRef BaseDirectory,
95129524
const RecordData &Record, unsigned &Idx) {
95139525
std::string Filename = ReadString(Record, Idx);
9514-
if (!BaseDirectory.empty())
9515-
ResolveImportedPath(Filename, BaseDirectory);
9516-
return Filename;
9526+
return ResolveImportedPathAndAllocate(PathBuf, Filename, BaseDirectory);
95179527
}
95189528

95199529
VersionTuple ASTReader::ReadVersionTuple(const RecordData &Record,
@@ -10380,6 +10390,8 @@ ASTReader::ASTReader(Preprocessor &PP, InMemoryModuleCache &ModuleCache,
1038010390
UseGlobalIndex(UseGlobalIndex), CurrSwitchCaseStmts(&SwitchCaseStmts) {
1038110391
SourceMgr.setExternalSLocEntrySource(this);
1038210392

10393+
PathBuf.reserve(256);
10394+
1038310395
for (const auto &Ext : Extensions) {
1038410396
auto BlockName = Ext->getExtensionMetadata().BlockName;
1038510397
auto Known = ModuleFileExtensions.find(BlockName);

0 commit comments

Comments
 (0)