Skip to content

Commit 569687e

Browse files
authored
Merge pull request #5350 from benlangmuir/slow-regex
[sourcekitd] Replace a slow std::regex with custom parsing
2 parents c1c664b + f074578 commit 569687e

File tree

2 files changed

+106
-46
lines changed

2 files changed

+106
-46
lines changed

lib/IDE/SyntaxModel.cpp

Lines changed: 76 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "swift/Parse/Token.h"
2525
#include "swift/Config.h"
2626
#include "swift/Subsystems.h"
27+
#include "clang/Basic/CharInfo.h"
2728
#include "llvm/ADT/StringSwitch.h"
2829
#include "llvm/Support/MemoryBuffer.h"
2930
#include <vector>
@@ -274,17 +275,6 @@ static const char *const RegexStrURL =
274275
"tn3270|urn|vemmi|wais|xcdoc|z39\\.50r|z39\\.50s)://"
275276
"([a-zA-Z0-9\\-_.]+/)?[a-zA-Z0-9;/?:@\\&=+$,\\-_.!~*'()%#]+";
276277

277-
#define MARKUP_SIMPLE_FIELD(Id, Keyword, XMLKind) \
278-
#Keyword "|"
279-
static const char *const RegexStrDocCommentField =
280-
"^[ ]*- ("
281-
#include "swift/Markup/SimpleFields.def"
282-
"returns):";
283-
284-
static const char *const RegexStrParameter = "^[ ]?- (parameter) [^:]*:";
285-
286-
static const char *const RegexStrDocCommentParametersHeading = "^[ ]?- (Parameters):";
287-
288278
static const char *const RegexStrMailURL =
289279
"(mailto|im):[a-zA-Z0-9\\-_]+@[a-zA-Z0-9\\-_\\.!%]+";
290280

@@ -298,7 +288,6 @@ class ModelASTWalker : public ASTWalker {
298288
std::vector<StructureElement> SubStructureStack;
299289
SourceLoc LastLoc;
300290
static const std::regex &getURLRegex(StringRef Protocol);
301-
static const std::regex &getDocCommentRegex(unsigned Index);
302291

303292
Optional<SyntaxNode> parseFieldNode(StringRef Text, StringRef OrigText,
304293
SourceLoc OrigLoc);
@@ -389,24 +378,6 @@ const std::regex &ModelASTWalker::getURLRegex(StringRef Pro) {
389378
return Regexes[2];
390379
}
391380

392-
const std::regex &ModelASTWalker::getDocCommentRegex(unsigned Index) {
393-
static const std::regex Regexes[3] = {
394-
std::regex {
395-
RegexStrParameter,
396-
std::regex::egrep | std::regex::icase | std::regex::optimize
397-
},
398-
std::regex {
399-
RegexStrDocCommentParametersHeading,
400-
std::regex::egrep | std::regex::icase | std::regex::optimize
401-
},
402-
std::regex {
403-
RegexStrDocCommentField,
404-
std::regex::egrep | std::regex::icase | std::regex::optimize
405-
}
406-
};
407-
return Regexes[Index];
408-
}
409-
410381
SyntaxStructureKind syntaxStructureKindFromNominalTypeDecl(NominalTypeDecl *N) {
411382
if (isa<ClassDecl>(N))
412383
return SyntaxStructureKind::Class;
@@ -1469,27 +1440,86 @@ bool ModelASTWalker::searchForURL(CharSourceRange Range) {
14691440
return true;
14701441
}
14711442

1443+
namespace {
1444+
class DocFieldParser {
1445+
const char *ptr;
1446+
const char *end;
1447+
1448+
bool advanceIf(char c) {
1449+
if (ptr == end || c != *ptr)
1450+
return false;
1451+
++ptr;
1452+
return true;
1453+
}
1454+
bool advanceIf(llvm::function_ref<bool(char)> predicate) {
1455+
if (ptr == end || !predicate(*ptr))
1456+
return false;
1457+
++ptr;
1458+
return true;
1459+
}
1460+
1461+
public:
1462+
DocFieldParser(StringRef text) : ptr(text.begin()), end(text.end()) {
1463+
assert(text.rtrim().find('\n') == StringRef::npos &&
1464+
"expected single line");
1465+
}
1466+
1467+
// Case-insensitively match one of the following patterns:
1468+
// ^[ ]?- (parameter) [^:]*:
1469+
// ^[ ]?- (Parameters):
1470+
// ^[ ]*- (...MarkupSimpleFields.def...|returns):
1471+
Optional<StringRef> parseFieldName() {
1472+
unsigned numSpaces = 0;
1473+
while (advanceIf(' '))
1474+
++numSpaces;
1475+
if (!advanceIf('-') || !advanceIf(' '))
1476+
return None;
1477+
1478+
if (ptr == end || !clang::isIdentifierBody(*ptr))
1479+
return None;
1480+
const char *identStart = ptr++;
1481+
while (advanceIf([](char c) { return clang::isIdentifierBody(c); }))
1482+
;
1483+
StringRef ident(identStart, ptr - identStart);
1484+
1485+
if (ident.equals_lower("parameter")) {
1486+
if (numSpaces > 1 || !advanceIf(' '))
1487+
return None;
1488+
while (advanceIf([](char c) { return c != ':'; }))
1489+
;
1490+
if (!advanceIf(':'))
1491+
return None;
1492+
return ident;
1493+
1494+
} else if (advanceIf(':')) {
1495+
if (ident.equals_lower("parameters") && numSpaces > 1)
1496+
return None;
1497+
auto lowerIdent = ident.lower();
1498+
bool isField = llvm::StringSwitch<bool>(lowerIdent)
1499+
#define MARKUP_SIMPLE_FIELD(Id, Keyword, XMLKind) .Case(#Keyword, true)
1500+
#include "swift/Markup/SimpleFields.def"
1501+
.Case("parameters", true)
1502+
.Case("returns", true)
1503+
.Default(false);
1504+
if (isField)
1505+
return ident;
1506+
}
1507+
1508+
return None;
1509+
}
1510+
};
1511+
} // end anonymous namespace
1512+
14721513
Optional<SyntaxNode> ModelASTWalker::parseFieldNode(StringRef Text,
14731514
StringRef OrigText,
14741515
SourceLoc OrigLoc) {
14751516
Optional<SyntaxNode> Node;
1476-
#ifdef SWIFT_HAVE_WORKING_STD_REGEX
1477-
std::match_results<StringRef::iterator> Matches;
1478-
for (unsigned i = 0; i != 3; ++i) {
1479-
auto &Rx = getDocCommentRegex(i);
1480-
bool HadMatch = std::regex_search(Text.begin(), Text.end(), Matches, Rx);
1481-
if (HadMatch)
1482-
break;
1517+
DocFieldParser parser(Text);
1518+
if (auto ident = parser.parseFieldName()) {
1519+
auto loc = OrigLoc.getAdvancedLoc(ident->data() - OrigText.data());
1520+
CharSourceRange range(loc, ident->size());
1521+
Node = Optional<SyntaxNode>({SyntaxNodeKind::DocCommentField, range});
14831522
}
1484-
if (Matches.empty())
1485-
return None;
1486-
1487-
auto &Match = Matches[1];
1488-
StringRef MatchStr(Match.first, Match.second - Match.first);
1489-
auto Loc = OrigLoc.getAdvancedLoc(MatchStr.data() - OrigText.data());
1490-
CharSourceRange Range(Loc, MatchStr.size());
1491-
Node = Optional<SyntaxNode>({ SyntaxNodeKind::DocCommentField, Range });
1492-
#endif
14931523
return Node;
14941524
}
14951525

test/IDE/coloring.swift

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,21 @@ func <#test1#> () {}
369369
///
370370
/// - parameter x: A number
371371
/// - parameter y: Another number
372+
/// - PaRamEteR z-hyphen-q: Another number
373+
/// - parameter : A strange number...
374+
/// - parameternope1: Another number
375+
/// - parameter nope2
376+
/// - parameter: nope3
377+
/// -parameter nope4: Another number
378+
/// * parameter nope5: Another number
379+
/// - parameter nope6: Another number
380+
/// - Parameters: nope7
381+
/// - seealso: yes
382+
/// - seealso: yes
383+
/// - seealso:
384+
/// -seealso: nope
385+
/// - seealso : nope
386+
/// - seealso nope
372387
/// - returns: `x + y`
373388
func foo(x: Int, y: Int) -> Int { return x + y }
374389
// CHECK: <doc-comment-line>/// Brief.
@@ -377,6 +392,21 @@ func foo(x: Int, y: Int) -> Int { return x + y }
377392
// CHECK: </doc-comment-line><doc-comment-line>///
378393
// CHECK: </doc-comment-line><doc-comment-line>/// - <doc-comment-field>parameter</doc-comment-field> x: A number
379394
// CHECK: </doc-comment-line><doc-comment-line>/// - <doc-comment-field>parameter</doc-comment-field> y: Another number
395+
// CHECK: </doc-comment-line><doc-comment-line>/// - <doc-comment-field>PaRamEteR</doc-comment-field> z-hyphen-q: Another number
396+
// CHECK: </doc-comment-line><doc-comment-line>/// - <doc-comment-field>parameter</doc-comment-field> : A strange number...
397+
// CHECK: </doc-comment-line><doc-comment-line>/// - parameternope1: Another number
398+
// CHECK: </doc-comment-line><doc-comment-line>/// - parameter nope2
399+
// CHECK: </doc-comment-line><doc-comment-line>/// - parameter: nope3
400+
// CHECK: </doc-comment-line><doc-comment-line>/// -parameter nope4: Another number
401+
// CHECK: </doc-comment-line><doc-comment-line>/// * parameter nope5: Another number
402+
// CHECK: </doc-comment-line><doc-comment-line>/// - parameter nope6: Another number
403+
// CHECK: </doc-comment-line><doc-comment-line>/// - Parameters: nope7
404+
// CHECK: </doc-comment-line><doc-comment-line>/// - <doc-comment-field>seealso</doc-comment-field>: yes
405+
// CHECK: </doc-comment-line><doc-comment-line>/// - <doc-comment-field>seealso</doc-comment-field>: yes
406+
// CHECK: </doc-comment-line><doc-comment-line>/// - <doc-comment-field>seealso</doc-comment-field>:
407+
// CHECK: </doc-comment-line><doc-comment-line>/// -seealso: nope
408+
// CHECK: </doc-comment-line><doc-comment-line>/// - seealso : nope
409+
// CHECK: </doc-comment-line><doc-comment-line>/// - seealso nope
380410
// CHECK: </doc-comment-line><doc-comment-line>/// - <doc-comment-field>returns</doc-comment-field>: `x + y`
381411
// CHECK: </doc-comment-line><kw>func</kw> foo(x: <type>Int</type>, y: <type>Int</type>) -> <type>Int</type> { <kw>return</kw> x + y }
382412

0 commit comments

Comments
 (0)