Skip to content

Commit 4d946f4

Browse files
authored
[SyntaxModel] Improve the performance of searching URLs in comments (#5214)
[SyntaxModel] When searching URLs in doc comments, reduce the number of protocol name comparisons by looking ahead more characters, NFC. rdar://28298506 Searching URL in doc comments can be expensive. We used to look for every colon as an indicator of potential URLs. However, this is not efficient enough. Suggested by Ben, we further divide protocols into categories so that most protocols can use "://" as an indicator of its existence. Not sure whether this is enough to close the radar, but I believe it is a valuable performance improvement anyway.
1 parent b51cf74 commit 4d946f4

File tree

1 file changed

+39
-17
lines changed

1 file changed

+39
-17
lines changed

lib/IDE/SyntaxModel.cpp

Lines changed: 39 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,7 @@ class ModelASTWalker : public ASTWalker {
297297
unsigned BufferID;
298298
std::vector<StructureElement> SubStructureStack;
299299
SourceLoc LastLoc;
300-
static const std::regex &getURLRegex(unsigned Index);
300+
static const std::regex &getURLRegex(StringRef Protocol);
301301
static const std::regex &getDocCommentRegex(unsigned Index);
302302

303303
Optional<SyntaxNode> parseFieldNode(StringRef Text, StringRef OrigText,
@@ -366,13 +366,27 @@ class ModelASTWalker : public ASTWalker {
366366
}
367367
};
368368

369-
const std::regex &ModelASTWalker::getURLRegex(unsigned Index) {
369+
const std::regex &ModelASTWalker::getURLRegex(StringRef Pro) {
370370
static const std::regex Regexes[3] = {
371371
std::regex{ RegexStrURL, std::regex::ECMAScript | std::regex::nosubs },
372372
std::regex{ RegexStrMailURL, std::regex::ECMAScript | std::regex::nosubs },
373373
std::regex{ RegexStrRadarURL, std::regex::ECMAScript | std::regex::nosubs }
374374
};
375-
return Regexes[Index];
375+
376+
static const auto MailToPosition = std::find(URLProtocols.begin(),
377+
URLProtocols.end(),
378+
"mailto");
379+
static const auto RadarPosition = std::find(URLProtocols.begin(),
380+
URLProtocols.end(),
381+
"radar");
382+
auto Found = std::find(URLProtocols.begin(), URLProtocols.end(), Pro);
383+
assert(Found != URLProtocols.end() && "bad protocol name");
384+
if (Found < MailToPosition)
385+
return Regexes[0];
386+
else if (Found < RadarPosition)
387+
return Regexes[1];
388+
else
389+
return Regexes[2];
376390
}
377391

378392
const std::regex &ModelASTWalker::getDocCommentRegex(unsigned Index) {
@@ -1369,24 +1383,32 @@ bool ModelASTWalker::findUrlStartingLoc(StringRef Text,
13691383
static const auto MailToPosition = std::find(URLProtocols.begin(),
13701384
URLProtocols.end(),
13711385
"mailto");
1372-
static const auto RadarPosition = std::find(URLProtocols.begin(),
1373-
URLProtocols.end(),
1374-
"radar");
13751386
auto Index = Text.find(":");
13761387
if (Index == StringRef::npos)
13771388
return false;
13781389

1379-
for (auto It = URLProtocols.begin(); It != URLProtocols.end(); ++ It) {
1380-
if (Index >= It->size() &&
1381-
Text.substr(Index - It->size(), It->size()) == *It) {
1382-
Start = Index - It->size();
1383-
if (It < MailToPosition)
1384-
Regex = getURLRegex(0);
1385-
else if (It < RadarPosition)
1386-
Regex = getURLRegex(1);
1387-
else
1388-
Regex = getURLRegex(2);
1389-
return true;
1390+
auto Lookback = [Text](unsigned Index, StringRef Name) {
1391+
return Index >= Name.size() &&
1392+
Text.substr(Index - Name.size(), Name.size()) == Name;
1393+
};
1394+
1395+
auto HasSlash = Text.substr(Index).startswith("://");
1396+
1397+
if (HasSlash) {
1398+
for (auto It = URLProtocols.begin(); It < URLProtocols.end(); ++ It) {
1399+
if (Lookback(Index, *It)) {
1400+
Regex = getURLRegex(*It);
1401+
Start = Index - It->size();
1402+
return true;
1403+
}
1404+
}
1405+
} else {
1406+
for (auto It = MailToPosition; It < URLProtocols.end(); ++ It) {
1407+
if (Lookback(Index, *It)) {
1408+
Regex = getURLRegex(*It);
1409+
Start = Index - It->size();
1410+
return true;
1411+
}
13901412
}
13911413
}
13921414
#endif

0 commit comments

Comments
 (0)