Skip to content

Commit 10c0eca

Browse files
committed
[clang][Lexer] Speed up HeaderSearch when there are many HeaderMaps
HeaderSearch already uses a caching system to avoid duplicate searches, but the initial cold miss can take a long time if a build system has supplied thousands of HeaderMaps. For this case, the SearchDirs vector begins with those HeaderMaps, so a cache miss requires testing if the sought filename is present in each of those maps. Instead, we can consolidate the keys of those HeaderMaps into one StringMap and then each cache miss can skip directly to the correct HeaderMap or continue its search beyond the initial sequence of HeaderMaps. In testing on TUs with ~15000 SearchDirs where the initial 99% are HeaderMaps, time spent in Clang was reduced by 15%. This patch is expected to be neutral for SearchDir vectors that do not begin with HeaderMaps. Differential Revision: https://reviews.llvm.org/D135801
1 parent a8e9742 commit 10c0eca

File tree

3 files changed

+86
-16
lines changed

3 files changed

+86
-16
lines changed

clang/include/clang/Lex/HeaderMap.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
#include "clang/Basic/FileManager.h"
1717
#include "clang/Basic/LLVM.h"
18+
#include "clang/Lex/HeaderMapTypes.h"
1819
#include "llvm/ADT/Optional.h"
1920
#include "llvm/ADT/StringMap.h"
2021
#include "llvm/Support/Compiler.h"
@@ -39,6 +40,21 @@ class HeaderMapImpl {
3940
// Check for a valid header and extract the byte swap.
4041
static bool checkHeader(const llvm::MemoryBuffer &File, bool &NeedsByteSwap);
4142

43+
// Make a call for every Key in the map.
44+
template <typename Func> void forEachKey(Func Callback) const {
45+
const HMapHeader &Hdr = getHeader();
46+
unsigned NumBuckets = getEndianAdjustedWord(Hdr.NumBuckets);
47+
48+
for (unsigned Bucket = 0; Bucket < NumBuckets; ++Bucket) {
49+
HMapBucket B = getBucket(Bucket);
50+
if (B.Key != HMAP_EmptyBucketKey) {
51+
Optional<StringRef> Key = getString(B.Key);
52+
if (Key)
53+
Callback(Key.value());
54+
}
55+
}
56+
}
57+
4258
/// If the specified relative filename is located in this HeaderMap return
4359
/// the filename it is mapped to, otherwise return an empty StringRef.
4460
StringRef lookupFilename(StringRef Filename,
@@ -78,6 +94,7 @@ class HeaderMap : private HeaderMapImpl {
7894
FileManager &FM);
7995

8096
using HeaderMapImpl::dump;
97+
using HeaderMapImpl::forEachKey;
8198
using HeaderMapImpl::getFileName;
8299
using HeaderMapImpl::lookupFilename;
83100
using HeaderMapImpl::reverseLookupFilename;

clang/include/clang/Lex/HeaderSearch.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,14 @@ class HeaderSearch {
249249
unsigned SystemDirIdx = 0;
250250
bool NoCurDirSearch = false;
251251

252+
/// Maps HeaderMap keys to SearchDir indices. When HeaderMaps are used
253+
/// heavily, SearchDirs can start with thousands of HeaderMaps, so this Index
254+
/// lets us avoid scanning them all to find a match.
255+
llvm::StringMap<unsigned, llvm::BumpPtrAllocator> SearchDirHeaderMapIndex;
256+
257+
/// The index of the first SearchDir that isn't a header map.
258+
unsigned FirstNonHeaderMapSearchDirIdx = 0;
259+
252260
/// \#include prefixes for which the 'system header' property is
253261
/// overridden.
254262
///
@@ -330,6 +338,10 @@ class HeaderSearch {
330338
/// Entity used to look up stored header file information.
331339
ExternalHeaderFileInfoSource *ExternalSource = nullptr;
332340

341+
/// Scan all of the header maps at the beginning of SearchDirs and
342+
/// map their keys to the SearchDir index of their header map.
343+
void indexInitialHeaderMaps();
344+
333345
public:
334346
HeaderSearch(std::shared_ptr<HeaderSearchOptions> HSOpts,
335347
SourceManager &SourceMgr, DiagnosticsEngine &Diags,
@@ -801,6 +813,10 @@ class HeaderSearch {
801813
}
802814

803815
ConstSearchDirIterator search_dir_begin() const { return quoted_dir_begin(); }
816+
ConstSearchDirIterator search_dir_nth(size_t n) const {
817+
assert(n < SearchDirs.size());
818+
return {*this, n};
819+
}
804820
ConstSearchDirIterator search_dir_end() const { return system_dir_end(); }
805821
ConstSearchDirRange search_dir_range() const {
806822
return {search_dir_begin(), search_dir_end()};

clang/lib/Lex/HeaderSearch.cpp

Lines changed: 53 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ void HeaderSearch::SetSearchPaths(
116116
NoCurDirSearch = noCurDirSearch;
117117
SearchDirToHSEntry = std::move(searchDirToHSEntry);
118118
//LookupFileCache.clear();
119+
indexInitialHeaderMaps();
119120
}
120121

121122
void HeaderSearch::AddSearchPath(const DirectoryLookup &dir, bool isAngled) {
@@ -372,6 +373,29 @@ Module *HeaderSearch::lookupModule(StringRef ModuleName, StringRef SearchName,
372373
return Module;
373374
}
374375

376+
void HeaderSearch::indexInitialHeaderMaps() {
377+
llvm::StringMap<unsigned, llvm::BumpPtrAllocator> Index(SearchDirs.size());
378+
379+
// Iterate over all filename keys and associate them with the index i.
380+
unsigned i = 0;
381+
for (; i != SearchDirs.size(); ++i) {
382+
auto &Dir = SearchDirs[i];
383+
384+
// We're concerned with only the initial contiguous run of header
385+
// maps within SearchDirs, which can be 99% of SearchDirs when
386+
// SearchDirs.size() is ~10000.
387+
if (!Dir.isHeaderMap())
388+
break;
389+
390+
// Give earlier keys precedence over identical later keys.
391+
auto Callback = [&](StringRef Filename) { Index.try_emplace(Filename, i); };
392+
Dir.getHeaderMap()->forEachKey(Callback);
393+
}
394+
395+
SearchDirHeaderMapIndex = std::move(Index);
396+
FirstNonHeaderMapSearchDirIdx = i;
397+
}
398+
375399
//===----------------------------------------------------------------------===//
376400
// File lookup within a DirectoryLookup scope
377401
//===----------------------------------------------------------------------===//
@@ -977,24 +1001,37 @@ Optional<FileEntryRef> HeaderSearch::LookupFile(
9771001

9781002
ConstSearchDirIterator NextIt = std::next(It);
9791003

980-
// If the entry has been previously looked up, the first value will be
981-
// non-zero. If the value is equal to i (the start point of our search), then
982-
// this is a matching hit.
983-
if (!SkipCache && CacheLookup.StartIt == NextIt) {
984-
// Skip querying potentially lots of directories for this lookup.
985-
if (CacheLookup.HitIt)
986-
It = CacheLookup.HitIt;
987-
if (CacheLookup.MappedName) {
988-
Filename = CacheLookup.MappedName;
989-
if (IsMapped)
990-
*IsMapped = true;
1004+
if (!SkipCache) {
1005+
if (CacheLookup.StartIt == NextIt) {
1006+
// HIT: Skip querying potentially lots of directories for this lookup.
1007+
if (CacheLookup.HitIt)
1008+
It = CacheLookup.HitIt;
1009+
if (CacheLookup.MappedName) {
1010+
Filename = CacheLookup.MappedName;
1011+
if (IsMapped)
1012+
*IsMapped = true;
1013+
}
1014+
} else {
1015+
// MISS: This is the first query, or the previous query didn't match
1016+
// our search start. We will fill in our found location below, so prime
1017+
// the start point value.
1018+
CacheLookup.reset(/*NewStartIt=*/NextIt);
1019+
1020+
if (It == search_dir_begin() && FirstNonHeaderMapSearchDirIdx > 0) {
1021+
// Handle cold misses of user includes in the presence of many header
1022+
// maps. We avoid searching perhaps thousands of header maps by
1023+
// jumping directly to the correct one or jumping beyond all of them.
1024+
auto Iter = SearchDirHeaderMapIndex.find(Filename);
1025+
if (Iter == SearchDirHeaderMapIndex.end())
1026+
// Not in index => Skip to first SearchDir after initial header maps
1027+
It = search_dir_nth(FirstNonHeaderMapSearchDirIdx);
1028+
else
1029+
// In index => Start with a specific header map
1030+
It = search_dir_nth(Iter->second);
1031+
}
9911032
}
992-
} else {
993-
// Otherwise, this is the first query, or the previous query didn't match
994-
// our search start. We will fill in our found location below, so prime the
995-
// start point value.
1033+
} else
9961034
CacheLookup.reset(/*NewStartIt=*/NextIt);
997-
}
9981035

9991036
SmallString<64> MappedName;
10001037

0 commit comments

Comments
 (0)