Skip to content

Commit 634a377

Browse files
committed
[clangd] Extract per-dir CDB cache to its own threadsafe class. NFC
This is a step towards making compile_commands.json reloadable. The idea is: - in addition to rare CDB loads we're soon going to have somewhat-rare CDB reloads and fairly-common stat() of files to validate the CDB - so stop doing all our work under a big global lock, instead using it to acquire per-directory structures with their own locks - each directory can be refreshed from disk every N seconds, like filecache - avoid locking these at all in the most common case: directory has no CDB Differential Revision: https://reviews.llvm.org/D92381
1 parent 717b0da commit 634a377

File tree

2 files changed

+190
-89
lines changed

2 files changed

+190
-89
lines changed

clang-tools-extra/clangd/GlobalCompilationDatabase.cpp

Lines changed: 176 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,13 @@
1616
#include "llvm/ADT/None.h"
1717
#include "llvm/ADT/Optional.h"
1818
#include "llvm/ADT/STLExtras.h"
19+
#include "llvm/ADT/ScopeExit.h"
1920
#include "llvm/ADT/SmallString.h"
2021
#include "llvm/Support/FileSystem.h"
2122
#include "llvm/Support/FileUtilities.h"
2223
#include "llvm/Support/Path.h"
2324
#include "llvm/Support/Program.h"
25+
#include <chrono>
2426
#include <string>
2527
#include <tuple>
2628
#include <vector>
@@ -58,10 +60,117 @@ GlobalCompilationDatabase::getFallbackCommand(PathRef File) const {
5860
return Cmd;
5961
}
6062

63+
// Loads and caches the CDB from a single directory.
64+
//
65+
// This class is threadsafe, which is to say we have independent locks for each
66+
// directory we're searching for a CDB.
67+
// Loading is deferred until first access.
68+
//
69+
// The DirectoryBasedCDB keeps a map from path => DirectoryCache.
70+
// Typical usage is to:
71+
// - 1) determine all the paths that might be searched
72+
// - 2) acquire the map lock and get-or-create all the DirectoryCache entries
73+
// - 3) release the map lock and query the caches as desired
74+
//
75+
// FIXME: this should revalidate the cache sometimes
76+
// FIXME: IO should go through a VFS
77+
class DirectoryBasedGlobalCompilationDatabase::DirectoryCache {
78+
// Absolute canonical path that we're the cache for. (Not case-folded).
79+
const std::string Path;
80+
81+
// True if we've looked for a CDB here and found none.
82+
// (This makes it possible for get() to return without taking a lock)
83+
// FIXME: this should have an expiry time instead of lasting forever.
84+
std::atomic<bool> FinalizedNoCDB = {false};
85+
86+
// Guards following cache state.
87+
std::mutex Mu;
88+
// Has cache been filled from disk? FIXME: this should be an expiry time.
89+
bool CachePopulated = false;
90+
// Whether a new CDB has been loaded but not broadcast yet.
91+
bool NeedsBroadcast = false;
92+
// Last loaded CDB, meaningful if CachePopulated is set.
93+
// shared_ptr so we can overwrite this when callers are still using the CDB.
94+
std::shared_ptr<tooling::CompilationDatabase> CDB;
95+
96+
public:
97+
DirectoryCache(llvm::StringRef Path) : Path(Path) {
98+
assert(llvm::sys::path::is_absolute(Path));
99+
}
100+
101+
// Get the CDB associated with this directory.
102+
// ShouldBroadcast:
103+
// - as input, signals whether the caller is willing to broadcast a
104+
// newly-discovered CDB. (e.g. to trigger background indexing)
105+
// - as output, signals whether the caller should do so.
106+
// (If a new CDB is discovered and ShouldBroadcast is false, we mark the
107+
// CDB as needing broadcast, and broadcast it next time we can).
108+
std::shared_ptr<const tooling::CompilationDatabase>
109+
get(bool &ShouldBroadcast) {
110+
// Fast path for common case without taking lock.
111+
if (FinalizedNoCDB.load()) {
112+
ShouldBroadcast = false;
113+
return nullptr;
114+
}
115+
std::lock_guard<std::mutex> Lock(Mu);
116+
auto RequestBroadcast = llvm::make_scope_exit([&, OldCDB(CDB.get())] {
117+
// If we loaded a new CDB, it should be broadcast at some point.
118+
if (CDB != nullptr && CDB.get() != OldCDB)
119+
NeedsBroadcast = true;
120+
else if (CDB == nullptr) // nothing to broadcast anymore!
121+
NeedsBroadcast = false;
122+
// If we have something to broadcast, then do so iff allowed.
123+
if (!ShouldBroadcast)
124+
return;
125+
ShouldBroadcast = NeedsBroadcast;
126+
NeedsBroadcast = false;
127+
});
128+
129+
// For now, we never actually attempt to revalidate a populated cache.
130+
if (CachePopulated)
131+
return CDB;
132+
assert(CDB == nullptr);
133+
134+
load();
135+
CachePopulated = true;
136+
137+
if (!CDB)
138+
FinalizedNoCDB.store(true);
139+
return CDB;
140+
}
141+
142+
llvm::StringRef path() const { return Path; }
143+
144+
private:
145+
// Updates `CDB` from disk state.
146+
void load() {
147+
std::string Error; // ignored, because it's often "didn't find anything".
148+
CDB = tooling::CompilationDatabase::loadFromDirectory(Path, Error);
149+
if (!CDB) {
150+
// Fallback: check for $src/build, the conventional CMake build root.
151+
// Probe existence first to avoid each plugin doing IO if it doesn't
152+
// exist.
153+
llvm::SmallString<256> BuildDir(Path);
154+
llvm::sys::path::append(BuildDir, "build");
155+
if (llvm::sys::fs::is_directory(BuildDir)) {
156+
vlog("Found candidate build directory {0}", BuildDir);
157+
CDB = tooling::CompilationDatabase::loadFromDirectory(BuildDir, Error);
158+
}
159+
}
160+
if (CDB) {
161+
log("Loaded compilation database from {0}", Path);
162+
} else {
163+
vlog("No compilation database at {0}", Path);
164+
}
165+
}
166+
};
167+
61168
DirectoryBasedGlobalCompilationDatabase::
62169
DirectoryBasedGlobalCompilationDatabase(
63-
llvm::Optional<Path> CompileCommandsDir)
64-
: CompileCommandsDir(std::move(CompileCommandsDir)) {}
170+
llvm::Optional<Path> CompileCommandsDir) {
171+
if (CompileCommandsDir)
172+
OnlyDirCache = std::make_unique<DirectoryCache>(*CompileCommandsDir);
173+
}
65174

66175
DirectoryBasedGlobalCompilationDatabase::
67176
~DirectoryBasedGlobalCompilationDatabase() = default;
@@ -107,31 +216,21 @@ static bool pathEqual(PathRef A, PathRef B) {
107216
#endif
108217
}
109218

110-
DirectoryBasedGlobalCompilationDatabase::CachedCDB &
111-
DirectoryBasedGlobalCompilationDatabase::getCDBInDirLocked(PathRef Dir) const {
112-
// FIXME(ibiryukov): Invalidate cached compilation databases on changes
113-
auto Key = maybeCaseFoldPath(Dir);
114-
auto R = CompilationDatabases.try_emplace(Key);
115-
if (R.second) { // Cache miss, try to load CDB.
116-
CachedCDB &Entry = R.first->second;
117-
std::string Error;
118-
Entry.Path = std::string(Dir);
119-
Entry.CDB = tooling::CompilationDatabase::loadFromDirectory(Dir, Error);
120-
// Check for $src/build, the conventional CMake build root.
121-
// Probe existence first to avoid each plugin doing IO if it doesn't exist.
122-
if (!CompileCommandsDir && !Entry.CDB) {
123-
llvm::SmallString<256> BuildDir = Dir;
124-
llvm::sys::path::append(BuildDir, "build");
125-
if (llvm::sys::fs::is_directory(BuildDir)) {
126-
vlog("Found candidate build directory {0}", BuildDir);
127-
Entry.CDB =
128-
tooling::CompilationDatabase::loadFromDirectory(BuildDir, Error);
129-
}
130-
}
131-
if (Entry.CDB)
132-
log("Loaded compilation database from {0}", Dir);
133-
}
134-
return R.first->second;
219+
std::vector<DirectoryBasedGlobalCompilationDatabase::DirectoryCache *>
220+
DirectoryBasedGlobalCompilationDatabase::getDirectoryCaches(
221+
llvm::ArrayRef<llvm::StringRef> Dirs) const {
222+
std::vector<std::string> FoldedDirs;
223+
FoldedDirs.reserve(Dirs.size());
224+
for (const auto &Dir : Dirs)
225+
FoldedDirs.push_back(maybeCaseFoldPath(Dir));
226+
227+
std::vector<DirectoryCache *> Ret;
228+
Ret.reserve(Dirs.size());
229+
230+
std::lock_guard<std::mutex> Lock(DirCachesMutex);
231+
for (unsigned I = 0; I < Dirs.size(); ++I)
232+
Ret.push_back(&DirCaches.try_emplace(FoldedDirs[I], Dirs[I]).first->second);
233+
return Ret;
135234
}
136235

137236
llvm::Optional<DirectoryBasedGlobalCompilationDatabase::CDBLookupResult>
@@ -141,39 +240,40 @@ DirectoryBasedGlobalCompilationDatabase::lookupCDB(
141240
"path must be absolute");
142241

143242
bool ShouldBroadcast = false;
144-
CDBLookupResult Result;
145-
146-
{
147-
std::lock_guard<std::mutex> Lock(Mutex);
148-
CachedCDB *Entry = nullptr;
149-
if (CompileCommandsDir) {
150-
Entry = &getCDBInDirLocked(*CompileCommandsDir);
151-
} else {
152-
// Traverse the canonical version to prevent false positives. i.e.:
153-
// src/build/../a.cc can detect a CDB in /src/build if not canonicalized.
154-
// FIXME(sammccall): this loop is hot, use a union-find-like structure.
155-
actOnAllParentDirectories(removeDots(Request.FileName),
156-
[&](PathRef Path) {
157-
Entry = &getCDBInDirLocked(Path);
158-
return Entry->CDB != nullptr;
159-
});
243+
DirectoryCache *DirCache = nullptr;
244+
std::shared_ptr<const tooling::CompilationDatabase> CDB = nullptr;
245+
if (OnlyDirCache) {
246+
DirCache = OnlyDirCache.get();
247+
ShouldBroadcast = Request.ShouldBroadcast;
248+
CDB = DirCache->get(ShouldBroadcast);
249+
} else {
250+
// Traverse the canonical version to prevent false positives. i.e.:
251+
// src/build/../a.cc can detect a CDB in /src/build if not canonicalized.
252+
std::string CanonicalPath = removeDots(Request.FileName);
253+
std::vector<llvm::StringRef> SearchDirs;
254+
actOnAllParentDirectories(CanonicalPath, [&](PathRef Path) {
255+
SearchDirs.push_back(Path);
256+
return false;
257+
});
258+
for (DirectoryCache *Candidate : getDirectoryCaches(SearchDirs)) {
259+
bool CandidateShouldBroadcast = Request.ShouldBroadcast;
260+
if ((CDB = Candidate->get(CandidateShouldBroadcast))) {
261+
DirCache = Candidate;
262+
ShouldBroadcast = CandidateShouldBroadcast;
263+
break;
264+
}
160265
}
266+
}
161267

162-
if (!Entry || !Entry->CDB)
163-
return llvm::None;
164-
165-
// Mark CDB as broadcasted to make sure discovery is performed once.
166-
if (Request.ShouldBroadcast && !Entry->SentBroadcast) {
167-
Entry->SentBroadcast = true;
168-
ShouldBroadcast = true;
169-
}
268+
if (!CDB)
269+
return llvm::None;
170270

171-
Result.CDB = Entry->CDB.get();
172-
Result.PI.SourceRoot = Entry->Path;
173-
}
271+
CDBLookupResult Result;
272+
Result.CDB = std::move(CDB);
273+
Result.PI.SourceRoot = DirCache->path().str();
174274

175-
// FIXME: Maybe make the following part async, since this can block retrieval
176-
// of compile commands.
275+
// FIXME: Maybe make the following part async, since this can block
276+
// retrieval of compile commands.
177277
if (ShouldBroadcast)
178278
broadcastCDB(Result);
179279
return Result;
@@ -186,29 +286,32 @@ void DirectoryBasedGlobalCompilationDatabase::broadcastCDB(
186286
std::vector<std::string> AllFiles = Result.CDB->getAllFiles();
187287
// We assume CDB in CompileCommandsDir owns all of its entries, since we don't
188288
// perform any search in parent paths whenever it is set.
189-
if (CompileCommandsDir) {
190-
assert(*CompileCommandsDir == Result.PI.SourceRoot &&
289+
if (OnlyDirCache) {
290+
assert(OnlyDirCache->path() == Result.PI.SourceRoot &&
191291
"Trying to broadcast a CDB outside of CompileCommandsDir!");
192292
OnCommandChanged.broadcast(std::move(AllFiles));
193293
return;
194294
}
195295

296+
// Uniquify all parent directories of all files.
196297
llvm::StringMap<bool> DirectoryHasCDB;
197-
{
198-
std::lock_guard<std::mutex> Lock(Mutex);
199-
// Uniquify all parent directories of all files.
200-
for (llvm::StringRef File : AllFiles) {
201-
actOnAllParentDirectories(File, [&](PathRef Path) {
202-
auto It = DirectoryHasCDB.try_emplace(Path);
203-
// Already seen this path, and all of its parents.
204-
if (!It.second)
205-
return true;
206-
207-
CachedCDB &Entry = getCDBInDirLocked(Path);
208-
It.first->second = Entry.CDB != nullptr;
209-
return pathEqual(Path, Result.PI.SourceRoot);
210-
});
211-
}
298+
std::vector<llvm::StringRef> FileAncestors;
299+
for (llvm::StringRef File : AllFiles) {
300+
actOnAllParentDirectories(File, [&](PathRef Path) {
301+
auto It = DirectoryHasCDB.try_emplace(Path);
302+
// Already seen this path, and all of its parents.
303+
if (!It.second)
304+
return true;
305+
306+
FileAncestors.push_back(It.first->getKey());
307+
return pathEqual(Path, Result.PI.SourceRoot);
308+
});
309+
}
310+
// Work out which ones have CDBs in them.
311+
for (DirectoryCache *Dir : getDirectoryCaches(FileAncestors)) {
312+
bool ShouldBroadcast = false;
313+
if (Dir->get(ShouldBroadcast))
314+
DirectoryHasCDB.find(Dir->path())->setValue(true);
212315
}
213316

214317
std::vector<std::string> GovernedFiles;

clang-tools-extra/clangd/GlobalCompilationDatabase.h

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -81,35 +81,33 @@ class DirectoryBasedGlobalCompilationDatabase
8181
llvm::Optional<ProjectInfo> getProjectInfo(PathRef File) const override;
8282

8383
private:
84-
/// Caches compilation databases loaded from directories.
85-
struct CachedCDB {
86-
std::string Path; // Not case-folded.
87-
std::unique_ptr<clang::tooling::CompilationDatabase> CDB = nullptr;
88-
bool SentBroadcast = false;
89-
};
90-
CachedCDB &getCDBInDirLocked(PathRef File) const;
84+
class DirectoryCache;
85+
// If there's an explicit CompileCommandsDir, cache of the CDB found there.
86+
mutable std::unique_ptr<DirectoryCache> OnlyDirCache;
87+
88+
// Keyed by possibly-case-folded directory path.
89+
// We can hand out pointers as they're stable and entries are never removed.
90+
// Empty if CompileCommandsDir is given (OnlyDirCache is used instead).
91+
mutable llvm::StringMap<DirectoryCache> DirCaches;
92+
// DirCaches access must be locked (unlike OnlyDirCache, which is threadsafe).
93+
mutable std::mutex DirCachesMutex;
94+
95+
std::vector<DirectoryCache *>
96+
getDirectoryCaches(llvm::ArrayRef<llvm::StringRef> Dirs) const;
9197

9298
struct CDBLookupRequest {
9399
PathRef FileName;
94100
// Whether this lookup should trigger discovery of the CDB found.
95101
bool ShouldBroadcast = false;
96102
};
97103
struct CDBLookupResult {
98-
tooling::CompilationDatabase *CDB = nullptr;
104+
std::shared_ptr<const tooling::CompilationDatabase> CDB;
99105
ProjectInfo PI;
100106
};
101107
llvm::Optional<CDBLookupResult> lookupCDB(CDBLookupRequest Request) const;
102108

103109
// Performs broadcast on governed files.
104110
void broadcastCDB(CDBLookupResult Res) const;
105-
106-
mutable std::mutex Mutex;
107-
// Keyed by possibly-case-folded directory path.
108-
mutable llvm::StringMap<CachedCDB> CompilationDatabases;
109-
110-
/// Used for command argument pointing to folder where compile_commands.json
111-
/// is located.
112-
llvm::Optional<Path> CompileCommandsDir;
113111
};
114112

115113
/// Extracts system include search path from drivers matching QueryDriverGlobs

0 commit comments

Comments
 (0)