Skip to content

Commit fe7878e

Browse files
committed
[Serialization] Improve module loading performance
When looking for a Swift module on disk, we were scanning all module search paths if they contain the module we are searching for. In a setup where each module is contained in its own framework search path, this scaled quadratically with the number of modules being imported. E.g. a setup with 100 modules being imported form 100 module search paths could cause on the order of 10,000 checks of `FileSystem::exists`. While these checks are fairly fast (~10µs), they add up to ~100ms. To improve this, perform a first scan of all module search paths and list the files they contain. From this, create a lookup map that maps filenames to the search paths they can be found in. E.g. for ``` searchPath1/ Module1.framework searchPath2/ Module1.framework Module2.swiftmodule ``` we create the following lookup table ``` Module1.framework -> [searchPath1, searchPath2] Module2.swiftmodule -> [searchPath2] ```
1 parent 5a6341b commit fe7878e

24 files changed

+585
-216
lines changed

include/swift/AST/SearchPathOptions.h

Lines changed: 253 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -16,27 +16,155 @@
1616
#include "swift/Basic/ArrayRefView.h"
1717
#include "swift/Basic/PathRemapper.h"
1818
#include "llvm/ADT/Hashing.h"
19+
#include "llvm/ADT/StringMap.h"
20+
#include "llvm/Support/VirtualFileSystem.h"
1921

2022
#include <string>
2123
#include <vector>
2224

2325
namespace swift {
2426

25-
/// Options for controlling search path behavior.
26-
class SearchPathOptions {
27+
/// The kind of a module search path. The order of this enum is important
28+
/// because import search paths should be considered before framework search
29+
/// paths etc.
30+
enum class ModuleSearchPathKind {
31+
Import,
32+
Framework,
33+
DarwinImplictFramework,
34+
RuntimeLibrary,
35+
};
36+
37+
/// A single module search path that can come from different sources, e.g.
38+
/// framework search paths, import search path etc.
39+
struct ModuleSearchPath {
40+
/// The actual path of the module search path. References a search path string
41+
/// stored inside \c SearchPathOptions, which must outlive this reference.
42+
StringRef Path;
43+
44+
/// The kind of the search path.
45+
ModuleSearchPathKind Kind;
46+
47+
bool IsSystem;
48+
49+
/// An index that describes the order this search path should be considered
50+
/// in within its \c ModuleSearchPathKind. This allows us to reconstruct the
51+
/// user-defined search path order when merging search paths containing
52+
/// different file names in \c searchPathsContainingFile.
53+
unsigned Index;
54+
55+
bool operator<(const ModuleSearchPath &Other) const {
56+
if (this->Kind == Other.Kind) {
57+
return this->Index < Other.Index;
58+
} else {
59+
return this->Kind < Other.Kind;
60+
}
61+
}
62+
};
63+
64+
class SearchPathOptions;
65+
66+
/// Maintains a mapping of filenames to search paths that contain a file with
67+
/// this name (non-recursively). E.g. if we have a directory structure as
68+
/// follows.
69+
///
70+
/// \code
71+
/// searchPath1/
72+
/// Module1.framework
73+
///
74+
/// searchPath2/
75+
/// Module1.framework
76+
/// Module2.swiftmodule
77+
/// \endcode
78+
///
79+
/// We have the following lookup table
80+
///
81+
/// \code
82+
/// Module1.framework -> [searchPath1, searchPath2]
83+
/// Module2.swiftmodule -> [searchPath2]
84+
/// \endcode
85+
///
86+
/// When searching for a module this allows an efficient search of only those
87+
/// search paths that are relevant. In a naive implementation, we would need
88+
/// to scan all search paths for every module we import.
89+
class ModuleSearchPathLookup {
90+
/// Parameters for which the \c LookupTable has been built. If one if these
91+
/// changes, the lookup table needs to be rebuilt. It is not expected that any
92+
/// of these change frequently.
93+
struct {
94+
llvm::vfs::FileSystem *FileSystem;
95+
bool IsOSDarwin;
96+
bool IsPopulated;
97+
const SearchPathOptions *Opts;
98+
} State;
99+
100+
llvm::StringMap<SmallVector<ModuleSearchPath, 4>> LookupTable;
101+
102+
/// Scan the directory at \p SearchPath for files and add those files to the
103+
/// lookup table. \p Kind specifies the search path kind and \p Index the
104+
/// index of \p SearchPath within that search path kind. Search paths with
105+
/// lower indicies are considered first.
106+
/// The \p SearchPath is stored by as a \c StringRef, so the string backing it
107+
/// must be alive as long as this lookup table is alive and not cleared.
108+
void addFilesInPathToLookupTable(llvm::vfs::FileSystem *FS,
109+
StringRef SearchPath,
110+
ModuleSearchPathKind Kind, bool IsSystem,
111+
unsigned Index);
112+
113+
/// Discard the current lookup table and rebuild a new one.
114+
void rebuildLookupTable(const SearchPathOptions *Opts,
115+
llvm::vfs::FileSystem *FS, bool IsOsDarwin);
116+
117+
/// Discard the current lookup table.
118+
void clearLookupTable() {
119+
LookupTable.clear();
120+
State.IsPopulated = false;
121+
State.FileSystem = nullptr;
122+
State.IsOSDarwin = false;
123+
State.Opts = nullptr;
124+
}
125+
27126
public:
28-
/// Path to the SDK which is being built against.
29-
std::string SDKPath;
127+
/// Called by \p SearchPathOptions when search paths indexed by this \c
128+
/// SearchPathLookup have changed in an unknown way. Causes the lookup table
129+
/// to be rebuilt at the next request.
130+
void searchPathsDidChange() { clearLookupTable(); }
30131

31-
/// Path(s) which should be searched for modules.
32-
///
33-
/// Do not add values to this directly. Instead, use
34-
/// \c ASTContext::addSearchPath.
35-
std::vector<std::string> ImportSearchPaths;
132+
/// Called by \p SearchPathOptions when an import or framework search path has
133+
/// been added.
134+
/// \p Index is the index of the search path within its kind and is used to
135+
/// make sure this search path is considered last (within its kind).
136+
void searchPathAdded(llvm::vfs::FileSystem *FS, StringRef SearchPath,
137+
ModuleSearchPathKind Kind, bool IsSystem,
138+
unsigned Index) {
139+
if (!State.IsPopulated) {
140+
// If the lookup table hasn't been built yet, we will scan the search path
141+
// once the lookup table is requested. Nothing to do yet.
142+
return;
143+
}
144+
if (State.FileSystem != FS) {
145+
// We would be using a different file system to augment the lookup table
146+
// than we initially used to build it. Discard everything to be safe.
147+
clearLookupTable();
148+
return;
149+
}
150+
addFilesInPathToLookupTable(FS, SearchPath, Kind, IsSystem, Index);
151+
}
36152

37-
/// Path(s) to virtual filesystem overlay YAML files.
38-
std::vector<std::string> VFSOverlayFiles;
153+
/// Returns all search paths that non-recursively contain a file whose name
154+
/// is in \p Filenames.
155+
SmallVector<const ModuleSearchPath *, 4>
156+
searchPathsContainingFile(const SearchPathOptions *Opts,
157+
llvm::ArrayRef<std::string> Filenames,
158+
llvm::vfs::FileSystem *FS, bool IsOSDarwin);
159+
};
160+
161+
/// Options for controlling search path behavior.
162+
class SearchPathOptions {
163+
/// To call \c addImportSearchPath and \c addFrameworkSearchPath from
164+
/// \c ASTContext::addSearchPath.
165+
friend class ASTContext;
39166

167+
public:
40168
struct FrameworkSearchPath {
41169
std::string Path;
42170
bool IsSystem = false;
@@ -52,12 +180,116 @@ class SearchPathOptions {
52180
return !(LHS == RHS);
53181
}
54182
};
183+
184+
private:
185+
ModuleSearchPathLookup Lookup;
186+
187+
/// Path to the SDK which is being built against.
188+
///
189+
/// Must me modified through setter to keep \c SearchPathLookup in sync.
190+
std::string SDKPath;
191+
192+
/// Path(s) which should be searched for modules.
193+
///
194+
/// Must me modified through setter to keep \c SearchPathLookup in sync.
195+
std::vector<std::string> ImportSearchPaths;
196+
55197
/// Path(s) which should be searched for frameworks.
56198
///
57-
/// Do not add values to this directly. Instead, use
58-
/// \c ASTContext::addSearchPath.
199+
/// Must me modified through setter to keep \c SearchPathLookup in sync.
59200
std::vector<FrameworkSearchPath> FrameworkSearchPaths;
60201

202+
/// Paths to search for stdlib modules. One of these will be
203+
/// compiler-relative.
204+
///
205+
/// Must me modified through setter to keep \c SearchPathLookup in sync.
206+
std::vector<std::string> RuntimeLibraryImportPaths;
207+
208+
/// When on Darwin the framework paths that are implicitly imported.
209+
/// $SDKROOT/System/Library/Frameworks/ and $SDKROOT/Library/Frameworks/.
210+
///
211+
/// On non-Darwin platforms these are populated, but ignored.
212+
///
213+
/// Computed when the SDK path is set and cached so we can reference the
214+
/// Darwin implicit framework search paths as \c StringRef from
215+
/// \c ModuleSearchPath.
216+
std::vector<std::string> DarwinImplicitFrameworkSearchPaths;
217+
218+
/// Add a single import search path. Must only be called from
219+
/// \c ASTContext::addSearchPath.
220+
void addImportSearchPath(StringRef Path, llvm::vfs::FileSystem *FS) {
221+
ImportSearchPaths.push_back(Path.str());
222+
Lookup.searchPathAdded(FS, ImportSearchPaths.back(),
223+
ModuleSearchPathKind::Import, /*isSystem=*/false,
224+
ImportSearchPaths.size() - 1);
225+
}
226+
227+
/// Add a single framework search path. Must only be called from
228+
/// \c ASTContext::addSearchPath.
229+
void addFrameworkSearchPath(FrameworkSearchPath NewPath,
230+
llvm::vfs::FileSystem *FS) {
231+
FrameworkSearchPaths.push_back(NewPath);
232+
Lookup.searchPathAdded(FS, FrameworkSearchPaths.back().Path,
233+
ModuleSearchPathKind::Framework, NewPath.IsSystem,
234+
FrameworkSearchPaths.size() - 1);
235+
}
236+
237+
public:
238+
StringRef getSDKPath() const { return SDKPath; }
239+
240+
void setSDKPath(std::string NewSDKPath) {
241+
SDKPath = NewSDKPath;
242+
243+
// Compute Darwin implicit framework search paths.
244+
SmallString<128> systemFrameworksScratch(NewSDKPath);
245+
llvm::sys::path::append(systemFrameworksScratch, "System", "Library",
246+
"Frameworks");
247+
SmallString<128> frameworksScratch(NewSDKPath);
248+
llvm::sys::path::append(frameworksScratch, "Library", "Frameworks");
249+
DarwinImplicitFrameworkSearchPaths = {systemFrameworksScratch.str().str(),
250+
frameworksScratch.str().str()};
251+
252+
Lookup.searchPathsDidChange();
253+
}
254+
255+
ArrayRef<std::string> getImportSearchPaths() const {
256+
return ImportSearchPaths;
257+
}
258+
259+
void setImportSearchPaths(std::vector<std::string> NewImportSearchPaths) {
260+
ImportSearchPaths = NewImportSearchPaths;
261+
Lookup.searchPathsDidChange();
262+
}
263+
264+
ArrayRef<FrameworkSearchPath> getFrameworkSearchPaths() const {
265+
return FrameworkSearchPaths;
266+
}
267+
268+
void setFrameworkSearchPaths(
269+
std::vector<FrameworkSearchPath> NewFrameworkSearchPaths) {
270+
FrameworkSearchPaths = NewFrameworkSearchPaths;
271+
Lookup.searchPathsDidChange();
272+
}
273+
274+
/// The extra implicit framework search paths on Apple platforms:
275+
/// $SDKROOT/System/Library/Frameworks/ and $SDKROOT/Library/Frameworks/.
276+
ArrayRef<std::string> getDarwinImplicitFrameworkSearchPaths() const {
277+
return DarwinImplicitFrameworkSearchPaths;
278+
}
279+
280+
ArrayRef<std::string> getRuntimeLibraryImportPaths() const {
281+
return RuntimeLibraryImportPaths;
282+
}
283+
284+
void setRuntimeLibraryImportPaths(
285+
std::vector<std::string> NewRuntimeLibraryImportPaths) {
286+
RuntimeLibraryImportPaths = NewRuntimeLibraryImportPaths;
287+
Lookup.searchPathsDidChange();
288+
}
289+
290+
/// Path(s) to virtual filesystem overlay YAML files.
291+
std::vector<std::string> VFSOverlayFiles;
292+
61293
/// Path(s) which should be searched for libraries.
62294
///
63295
/// This is used in immediate modes. It is safe to add paths to this directly.
@@ -70,9 +302,6 @@ class SearchPathOptions {
70302
/// preference.
71303
std::vector<std::string> RuntimeLibraryPaths;
72304

73-
/// Paths to search for stdlib modules. One of these will be compiler-relative.
74-
std::vector<std::string> RuntimeLibraryImportPaths;
75-
76305
/// Don't look in for compiler-provided modules.
77306
bool SkipRuntimeLibraryImportPaths = false;
78307

@@ -107,6 +336,14 @@ class SearchPathOptions {
107336
/// original form.
108337
PathObfuscator DeserializedPathRecoverer;
109338

339+
/// Return all module search paths that (non-recursively) contain a file whose
340+
/// name is in \p Filenames.
341+
SmallVector<const ModuleSearchPath *, 4>
342+
moduleSearchPathsContainingFile(llvm::ArrayRef<std::string> Filenames,
343+
llvm::vfs::FileSystem *FS, bool IsOSDarwin) {
344+
return Lookup.searchPathsContainingFile(this, Filenames, FS, IsOSDarwin);
345+
}
346+
110347
private:
111348
static StringRef
112349
pathStringFromFrameworkSearchPath(const FrameworkSearchPath &next) {
@@ -141,7 +378,6 @@ class SearchPathOptions {
141378
DisableModulesValidateSystemDependencies);
142379
}
143380
};
144-
145381
}
146382

147383
#endif

include/swift/Basic/PathRemapper.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#ifndef SWIFT_BASIC_PATHREMAPPER_H
2525
#define SWIFT_BASIC_PATHREMAPPER_H
2626

27+
#include "swift/Basic/LLVM.h"
2728
#include "llvm/ADT/SmallVector.h"
2829
#include "llvm/ADT/Twine.h"
2930

include/swift/Frontend/Frontend.h

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -170,20 +170,20 @@ class CompilerInvocation {
170170
}
171171

172172
void setImportSearchPaths(const std::vector<std::string> &Paths) {
173-
SearchPathOpts.ImportSearchPaths = Paths;
173+
SearchPathOpts.setImportSearchPaths(Paths);
174174
}
175175

176176
ArrayRef<std::string> getImportSearchPaths() const {
177-
return SearchPathOpts.ImportSearchPaths;
177+
return SearchPathOpts.getImportSearchPaths();
178178
}
179179

180180
void setFrameworkSearchPaths(
181181
const std::vector<SearchPathOptions::FrameworkSearchPath> &Paths) {
182-
SearchPathOpts.FrameworkSearchPaths = Paths;
182+
SearchPathOpts.setFrameworkSearchPaths(Paths);
183183
}
184184

185185
ArrayRef<SearchPathOptions::FrameworkSearchPath> getFrameworkSearchPaths() const {
186-
return SearchPathOpts.FrameworkSearchPaths;
186+
return SearchPathOpts.getFrameworkSearchPaths();
187187
}
188188

189189
void setExtraClangArgs(const std::vector<std::string> &Args) {
@@ -229,9 +229,7 @@ class CompilerInvocation {
229229

230230
void setSDKPath(const std::string &Path);
231231

232-
StringRef getSDKPath() const {
233-
return SearchPathOpts.SDKPath;
234-
}
232+
StringRef getSDKPath() const { return SearchPathOpts.getSDKPath(); }
235233

236234
LangOptions &getLangOptions() {
237235
return LangOpts;

include/swift/Serialization/ModuleDependencyScanner.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -110,14 +110,14 @@ namespace swift {
110110
}
111111
}
112112

113-
std::error_code findModuleFilesInDirectory(
114-
ImportPath::Element ModuleID,
115-
const SerializedModuleBaseName &BaseName,
116-
SmallVectorImpl<char> *ModuleInterfacePath,
117-
std::unique_ptr<llvm::MemoryBuffer> *ModuleBuffer,
118-
std::unique_ptr<llvm::MemoryBuffer> *ModuleDocBuffer,
119-
std::unique_ptr<llvm::MemoryBuffer> *ModuleSourceInfoBuffer,
120-
bool skipBuildingInterface, bool IsFramework) override;
113+
virtual bool
114+
findModule(ImportPath::Element moduleID,
115+
SmallVectorImpl<char> *moduleInterfacePath,
116+
std::unique_ptr<llvm::MemoryBuffer> *moduleBuffer,
117+
std::unique_ptr<llvm::MemoryBuffer> *moduleDocBuffer,
118+
std::unique_ptr<llvm::MemoryBuffer> *moduleSourceInfoBuffer,
119+
bool skipBuildingInterface, bool &isFramework,
120+
bool &isSystemModule) override;
121121

122122
static bool classof(const ModuleDependencyScanner *MDS) {
123123
return MDS->getKind() == MDS_placeholder;

0 commit comments

Comments
 (0)