Skip to content

Commit e1f4c4a

Browse files
committed
[clang-scan-deps] Implementation of dependency scanner over minimized sources
This commit implements the fast dependency scanning mode in clang-scan-deps: the preprocessing is done on files that are minimized using the dependency directives source minimizer. A shared file system cache is used to ensure that the file system requests and source minimization is performed only once. The cache assumes that the underlying filesystem won't change during the course of the scan (or if it will, it will not affect the output), and it can't be evicted. This means that the service and workers can be used for a single run of a dependency scanner, and can't be reused across multiple, incremental runs. This is something that we'll most likely support in the future though. Note that the driver still utilizes the underlying real filesystem. This commit is also still missing the fast skipped PP block skipping optimization that I mentioned at EuroLLVM talk. Additionally, the file manager is still not reused by the threads as well. Differential Revision: https://reviews.llvm.org/D63907 llvm-svn: 368086
1 parent e4bd384 commit e1f4c4a

File tree

17 files changed

+608
-21
lines changed

17 files changed

+608
-21
lines changed

clang/include/clang/Basic/FileManager.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,10 @@ class FileManager : public RefCountedBase<FileManager> {
231231

232232
llvm::vfs::FileSystem &getVirtualFileSystem() const { return *FS; }
233233

234+
void setVirtualFileSystem(IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS) {
235+
this->FS = std::move(FS);
236+
}
237+
234238
/// Retrieve a file entry for a "virtual" file that acts as
235239
/// if there were a file with the given name on disk.
236240
///
Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
//===- DependencyScanningFilesystem.h - clang-scan-deps fs ===---*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_FILESYSTEM_H
10+
#define LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_FILESYSTEM_H
11+
12+
#include "clang/Basic/LLVM.h"
13+
#include "llvm/ADT/StringMap.h"
14+
#include "llvm/ADT/StringSet.h"
15+
#include "llvm/Support/Allocator.h"
16+
#include "llvm/Support/ErrorOr.h"
17+
#include "llvm/Support/VirtualFileSystem.h"
18+
#include <mutex>
19+
20+
namespace clang {
21+
namespace tooling {
22+
namespace dependencies {
23+
24+
/// An in-memory representation of a file system entity that is of interest to
25+
/// the dependency scanning filesystem.
26+
///
27+
/// It represents one of the following:
28+
/// - an opened source file with minimized contents and a stat value.
29+
/// - an opened source file with original contents and a stat value.
30+
/// - a directory entry with its stat value.
31+
/// - an error value to represent a file system error.
32+
/// - a placeholder with an invalid stat indicating a not yet initialized entry.
33+
class CachedFileSystemEntry {
34+
public:
35+
/// Default constructor creates an entry with an invalid stat.
36+
CachedFileSystemEntry() : MaybeStat(llvm::vfs::Status()) {}
37+
38+
CachedFileSystemEntry(std::error_code Error) : MaybeStat(std::move(Error)) {}
39+
40+
/// Create an entry that represents an opened source file with minimized or
41+
/// original contents.
42+
///
43+
/// The filesystem opens the file even for `stat` calls open to avoid the
44+
/// issues with stat + open of minimized files that might lead to a
45+
/// mismatching size of the file. If file is not minimized, the full file is
46+
/// read and copied into memory to ensure that it's not memory mapped to avoid
47+
/// running out of file descriptors.
48+
static CachedFileSystemEntry createFileEntry(StringRef Filename,
49+
llvm::vfs::FileSystem &FS,
50+
bool Minimize = true);
51+
52+
/// Create an entry that represents a directory on the filesystem.
53+
static CachedFileSystemEntry createDirectoryEntry(llvm::vfs::Status &&Stat);
54+
55+
/// \returns True if the entry is valid.
56+
bool isValid() const { return !MaybeStat || MaybeStat->isStatusKnown(); }
57+
58+
/// \returns The error or the file's contents.
59+
llvm::ErrorOr<StringRef> getContents() const {
60+
if (!MaybeStat)
61+
return MaybeStat.getError();
62+
assert(!MaybeStat->isDirectory() && "not a file");
63+
assert(isValid() && "not initialized");
64+
return StringRef(Contents);
65+
}
66+
67+
/// \returns The error or the status of the entry.
68+
llvm::ErrorOr<llvm::vfs::Status> getStatus() const {
69+
assert(isValid() && "not initialized");
70+
return MaybeStat;
71+
}
72+
73+
/// \returns the name of the file.
74+
StringRef getName() const {
75+
assert(isValid() && "not initialized");
76+
return MaybeStat->getName();
77+
}
78+
79+
CachedFileSystemEntry(CachedFileSystemEntry &&) = default;
80+
CachedFileSystemEntry &operator=(CachedFileSystemEntry &&) = default;
81+
82+
CachedFileSystemEntry(const CachedFileSystemEntry &) = delete;
83+
CachedFileSystemEntry &operator=(const CachedFileSystemEntry &) = delete;
84+
85+
private:
86+
llvm::ErrorOr<llvm::vfs::Status> MaybeStat;
87+
// Store the contents in a small string to allow a
88+
// move from the small string for the minimized contents.
89+
// Note: small size of 1 allows us to store an empty string with an implicit
90+
// null terminator without any allocations.
91+
llvm::SmallString<1> Contents;
92+
};
93+
94+
/// This class is a shared cache, that caches the 'stat' and 'open' calls to the
95+
/// underlying real file system.
96+
///
97+
/// It is sharded based on the hash of the key to reduce the lock contention for
98+
/// the worker threads.
99+
class DependencyScanningFilesystemSharedCache {
100+
public:
101+
struct SharedFileSystemEntry {
102+
std::mutex ValueLock;
103+
CachedFileSystemEntry Value;
104+
};
105+
106+
DependencyScanningFilesystemSharedCache();
107+
108+
/// Returns a cache entry for the corresponding key.
109+
///
110+
/// A new cache entry is created if the key is not in the cache. This is a
111+
/// thread safe call.
112+
SharedFileSystemEntry &get(StringRef Key);
113+
114+
private:
115+
struct CacheShard {
116+
std::mutex CacheLock;
117+
llvm::StringMap<SharedFileSystemEntry, llvm::BumpPtrAllocator> Cache;
118+
};
119+
std::unique_ptr<CacheShard[]> CacheShards;
120+
unsigned NumShards;
121+
};
122+
123+
/// A virtual file system optimized for the dependency discovery.
124+
///
125+
/// It is primarily designed to work with source files whose contents was was
126+
/// preprocessed to remove any tokens that are unlikely to affect the dependency
127+
/// computation.
128+
///
129+
/// This is not a thread safe VFS. A single instance is meant to be used only in
130+
/// one thread. Multiple instances are allowed to service multiple threads
131+
/// running in parallel.
132+
class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem {
133+
public:
134+
DependencyScanningWorkerFilesystem(
135+
DependencyScanningFilesystemSharedCache &SharedCache,
136+
IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS)
137+
: ProxyFileSystem(std::move(FS)), SharedCache(SharedCache) {}
138+
139+
llvm::ErrorOr<llvm::vfs::Status> status(const Twine &Path) override;
140+
llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
141+
openFileForRead(const Twine &Path) override;
142+
143+
/// The set of files that should not be minimized.
144+
llvm::StringSet<> IgnoredFiles;
145+
146+
private:
147+
void setCachedEntry(StringRef Filename, const CachedFileSystemEntry *Entry) {
148+
bool IsInserted = Cache.try_emplace(Filename, Entry).second;
149+
(void)IsInserted;
150+
assert(IsInserted && "local cache is updated more than once");
151+
}
152+
153+
const CachedFileSystemEntry *getCachedEntry(StringRef Filename) {
154+
auto It = Cache.find(Filename);
155+
return It == Cache.end() ? nullptr : It->getValue();
156+
}
157+
158+
DependencyScanningFilesystemSharedCache &SharedCache;
159+
/// The local cache is used by the worker thread to cache file system queries
160+
/// locally instead of querying the global cache every time.
161+
llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator> Cache;
162+
};
163+
164+
} // end namespace dependencies
165+
} // end namespace tooling
166+
} // end namespace clang
167+
168+
#endif // LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_FILESYSTEM_H
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
//===- DependencyScanningService.h - clang-scan-deps service ===-*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_SERVICE_H
10+
#define LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_SERVICE_H
11+
12+
#include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h"
13+
14+
namespace clang {
15+
namespace tooling {
16+
namespace dependencies {
17+
18+
/// The mode in which the dependency scanner will operate to find the
19+
/// dependencies.
20+
enum class ScanningMode {
21+
/// This mode is used to compute the dependencies by running the preprocessor
22+
/// over
23+
/// the unmodified source files.
24+
CanonicalPreprocessing,
25+
26+
/// This mode is used to compute the dependencies by running the preprocessor
27+
/// over
28+
/// the source files that have been minimized to contents that might affect
29+
/// the dependencies.
30+
MinimizedSourcePreprocessing
31+
};
32+
33+
/// The dependency scanning service contains the shared state that is used by
34+
/// the invidual dependency scanning workers.
35+
class DependencyScanningService {
36+
public:
37+
DependencyScanningService(ScanningMode Mode);
38+
39+
ScanningMode getMode() const { return Mode; }
40+
41+
DependencyScanningFilesystemSharedCache &getSharedCache() {
42+
return SharedCache;
43+
}
44+
45+
private:
46+
const ScanningMode Mode;
47+
/// The global file system cache.
48+
DependencyScanningFilesystemSharedCache SharedCache;
49+
};
50+
51+
} // end namespace dependencies
52+
} // end namespace tooling
53+
} // end namespace clang
54+
55+
#endif // LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_SERVICE_H

clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#define LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_WORKER_H
1111

1212
#include "clang/Basic/DiagnosticOptions.h"
13+
#include "clang/Basic/FileManager.h"
1314
#include "clang/Basic/LLVM.h"
1415
#include "clang/Frontend/PCHContainerOperations.h"
1516
#include "clang/Tooling/CompilationDatabase.h"
@@ -21,6 +22,9 @@ namespace clang {
2122
namespace tooling {
2223
namespace dependencies {
2324

25+
class DependencyScanningService;
26+
class DependencyScanningWorkerFilesystem;
27+
2428
/// An individual dependency scanning worker that is able to run on its own
2529
/// thread.
2630
///
@@ -29,7 +33,7 @@ namespace dependencies {
2933
/// using the regular processing run.
3034
class DependencyScanningWorker {
3135
public:
32-
DependencyScanningWorker();
36+
DependencyScanningWorker(DependencyScanningService &Service);
3337

3438
/// Print out the dependency information into a string using the dependency
3539
/// file format that is specified in the options (-MD is the default) and
@@ -45,10 +49,11 @@ class DependencyScanningWorker {
4549
IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts;
4650
std::shared_ptr<PCHContainerOperations> PCHContainerOps;
4751

52+
llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> RealFS;
4853
/// The file system that is used by each worker when scanning for
4954
/// dependencies. This filesystem persists accross multiple compiler
5055
/// invocations.
51-
llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> WorkerFS;
56+
llvm::IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS;
5257
};
5358

5459
} // end namespace dependencies

clang/lib/Tooling/DependencyScanning/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ set(LLVM_LINK_COMPONENTS
44
)
55

66
add_clang_library(clangDependencyScanning
7+
DependencyScanningFilesystem.cpp
8+
DependencyScanningService.cpp
79
DependencyScanningWorker.cpp
810

911
DEPENDS

0 commit comments

Comments
 (0)