Skip to content

Commit 5614438

Browse files
committed
[clangd] Optimize Dex::generateProximityURIs().
Production profiles show that generateProximityURIs is roughly 3.8% of buildPreamble. Of this, the majority (3% of buildPreamble) is parsing and reserializing URIs. We can do this with ugly string manipulation instead. Differential Revision: https://reviews.llvm.org/D135226
1 parent 882a05a commit 5614438

File tree

2 files changed

+45
-26
lines changed

2 files changed

+45
-26
lines changed

clang-tools-extra/clangd/index/dex/Dex.cpp

Lines changed: 44 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -163,8 +163,8 @@ std::unique_ptr<Iterator> Dex::createFileProximityIterator(
163163
llvm::StringMap<SourceParams> Sources;
164164
for (const auto &Path : ProximityPaths) {
165165
Sources[Path] = SourceParams();
166-
auto PathURI = URI::create(Path);
167-
const auto PathProximityURIs = generateProximityURIs(PathURI.toString());
166+
auto PathURI = URI::create(Path).toString();
167+
const auto PathProximityURIs = generateProximityURIs(PathURI.c_str());
168168
for (const auto &ProximityURI : PathProximityURIs)
169169
ParentURIs.insert(ProximityURI);
170170
}
@@ -353,30 +353,49 @@ size_t Dex::estimateMemoryUsage() const {
353353
return Bytes + BackingDataSize;
354354
}
355355

356-
std::vector<std::string> generateProximityURIs(llvm::StringRef URIPath) {
357-
std::vector<std::string> Result;
358-
auto ParsedURI = URI::parse(URIPath);
359-
assert(ParsedURI &&
360-
"Non-empty argument of generateProximityURIs() should be a valid "
361-
"URI.");
362-
llvm::StringRef Body = ParsedURI->body();
363-
// FIXME(kbobyrev): Currently, this is a heuristic which defines the maximum
364-
// size of resulting vector. Some projects might want to have higher limit if
365-
// the file hierarchy is deeper. For the generic case, it would be useful to
366-
// calculate Limit in the index build stage by calculating the maximum depth
367-
// of the project source tree at runtime.
368-
size_t Limit = 5;
369-
// Insert original URI before the loop: this would save a redundant iteration
370-
// with a URI parse.
371-
Result.emplace_back(ParsedURI->toString());
372-
while (!Body.empty() && --Limit > 0) {
373-
// FIXME(kbobyrev): Parsing and encoding path to URIs is not necessary and
374-
// could be optimized.
375-
Body = llvm::sys::path::parent_path(Body, llvm::sys::path::Style::posix);
376-
if (!Body.empty())
377-
Result.emplace_back(
378-
URI(ParsedURI->scheme(), ParsedURI->authority(), Body).toString());
356+
// Given foo://bar/one/two
357+
// Returns ~~~~~~~~ (or empty for bad URI)
358+
llvm::StringRef findPathInURI(llvm::StringRef S) {
359+
S = S.split(':').second; // Eat scheme.
360+
if (S.consume_front("//")) // Eat authority.
361+
S = S.drop_until([](char C) { return C == '/'; });
362+
return S;
363+
}
364+
365+
// FIXME(kbobyrev): Currently, this is a heuristic which defines the maximum
366+
// size of resulting vector. Some projects might want to have higher limit if
367+
// the file hierarchy is deeper. For the generic case, it would be useful to
368+
// calculate Limit in the index build stage by calculating the maximum depth
369+
// of the project source tree at runtime.
370+
constexpr unsigned ProximityURILimit = 5;
371+
372+
llvm::SmallVector<llvm::StringRef, ProximityURILimit>
373+
generateProximityURIs(llvm::StringRef URI) {
374+
// This function is hot when indexing, so don't parse/reserialize URIPath,
375+
// just emit substrings of it instead.
376+
//
377+
// foo://bar/one/two
378+
// ~~~~~~~~
379+
// Path
380+
llvm::StringRef Path = findPathInURI(URI);
381+
if (Path.empty())
382+
return {}; // Bad URI.
383+
assert(Path.begin() >= URI.begin() && Path.begin() < URI.end() &&
384+
Path.end() == URI.end());
385+
386+
// The original is a proximity URI.
387+
llvm::SmallVector<llvm::StringRef, ProximityURILimit> Result = {URI};
388+
// Form proximity URIs by chopping before each slash in the path part.
389+
for (auto Slash = Path.rfind('/'); Slash > 0 && Slash != StringRef::npos;
390+
Slash = Path.rfind('/')) {
391+
Path = Path.substr(0, Slash);
392+
Result.push_back(URI.substr(0, Path.end() - URI.data()));
393+
if (Result.size() == ProximityURILimit)
394+
return Result;
379395
}
396+
// The root foo://bar/ is a proximity URI.
397+
if (Path.startswith("/"))
398+
Result.push_back(URI.substr(0, Path.begin() + 1 - URI.data()));
380399
return Result;
381400
}
382401

clang-tools-extra/clangd/index/dex/Dex.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ class Dex : public SymbolIndex {
132132
/// Should be used within the index build process.
133133
///
134134
/// This function is exposed for testing only.
135-
std::vector<std::string> generateProximityURIs(llvm::StringRef URIPath);
135+
llvm::SmallVector<llvm::StringRef, 5> generateProximityURIs(llvm::StringRef);
136136

137137
} // namespace dex
138138
} // namespace clangd

0 commit comments

Comments
 (0)