Skip to content

Commit 261b471

Browse files
authored
[FileCheck] Don't use regex to find prefixes (#72237)
FileCheck currently compiles a regular expression of the form `Prefix1|Prefix2|...` and uses it to find the next prefix in the input. If we had a fast regex implementation, this would be a useful thing to do, as the regex implementation would be able to match multiple prefixes more efficiently than a naive approach. However, with our actual regex implementation, finding the prefixes basically becomes O(InputLen * RegexLen * LargeConstantFactor), which is a lot worse than a simple string search. Replace the regex with StringRef::find(), and keeping track of the next position of each prefix. There are various ways this could be improved on, but it's already significantly faster that the previous approach. For me, this improves check-llvm time from 138.5s to 132.5s, so by around 4-5%. For vector-interleaved-load-i16-stride-7.ll in particular, test time drops from 5s to 2.5s.
1 parent 9a9933f commit 261b471

File tree

5 files changed

+66
-67
lines changed

5 files changed

+66
-67
lines changed

llvm/include/llvm/FileCheck/FileCheck.h

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -187,24 +187,14 @@ class FileCheck {
187187
explicit FileCheck(FileCheckRequest Req);
188188
~FileCheck();
189189

190-
// Combines the check prefixes into a single regex so that we can efficiently
191-
// scan for any of the set.
192-
//
193-
// The semantics are that the longest-match wins which matches our regex
194-
// library.
195-
Regex buildCheckPrefixRegex();
196-
197190
/// Reads the check file from \p Buffer and records the expected strings it
198191
/// contains. Errors are reported against \p SM.
199192
///
200-
/// Only expected strings whose prefix is one of those listed in \p PrefixRE
201-
/// are recorded. \returns true in case of an error, false otherwise.
202-
///
203193
/// If \p ImpPatBufferIDRange, then the range (inclusive start, exclusive end)
204194
/// of IDs for source buffers added to \p SM for implicit patterns are
205195
/// recorded in it. The range is empty if there are none.
206196
bool
207-
readCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
197+
readCheckFile(SourceMgr &SM, StringRef Buffer,
208198
std::pair<unsigned, unsigned> *ImpPatBufferIDRange = nullptr);
209199

210200
bool ValidateCheckPrefixes();

llvm/lib/FileCheck/FileCheck.cpp

Lines changed: 62 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1634,6 +1634,60 @@ static size_t SkipWord(StringRef Str, size_t Loc) {
16341634
return Loc;
16351635
}
16361636

1637+
static const char *DefaultCheckPrefixes[] = {"CHECK"};
1638+
static const char *DefaultCommentPrefixes[] = {"COM", "RUN"};
1639+
1640+
static void addDefaultPrefixes(FileCheckRequest &Req) {
1641+
if (Req.CheckPrefixes.empty()) {
1642+
for (const char *Prefix : DefaultCheckPrefixes)
1643+
Req.CheckPrefixes.push_back(Prefix);
1644+
Req.IsDefaultCheckPrefix = true;
1645+
}
1646+
if (Req.CommentPrefixes.empty())
1647+
for (const char *Prefix : DefaultCommentPrefixes)
1648+
Req.CommentPrefixes.push_back(Prefix);
1649+
}
1650+
1651+
struct PrefixMatcher {
1652+
/// Prefixes and their first occurrence past the current position.
1653+
SmallVector<std::pair<StringRef, size_t>> Prefixes;
1654+
StringRef Input;
1655+
1656+
PrefixMatcher(ArrayRef<StringRef> CheckPrefixes,
1657+
ArrayRef<StringRef> CommentPrefixes, StringRef Input)
1658+
: Input(Input) {
1659+
for (StringRef Prefix : CheckPrefixes)
1660+
Prefixes.push_back({Prefix, Input.find(Prefix)});
1661+
for (StringRef Prefix : CommentPrefixes)
1662+
Prefixes.push_back({Prefix, Input.find(Prefix)});
1663+
1664+
// Sort by descending length.
1665+
llvm::sort(Prefixes,
1666+
[](auto A, auto B) { return A.first.size() > B.first.size(); });
1667+
}
1668+
1669+
/// Find the next match of a prefix in Buffer.
1670+
/// Returns empty StringRef if not found.
1671+
StringRef match(StringRef Buffer) {
1672+
assert(Buffer.data() >= Input.data() &&
1673+
Buffer.data() + Buffer.size() == Input.data() + Input.size() &&
1674+
"Buffer must be suffix of Input");
1675+
1676+
size_t From = Buffer.data() - Input.data();
1677+
StringRef Match;
1678+
for (auto &[Prefix, Pos] : Prefixes) {
1679+
// If the last occurrence was before From, find the next one after From.
1680+
if (Pos < From)
1681+
Pos = Input.find(Prefix, From);
1682+
// Find the first prefix with the lowest position.
1683+
if (Pos != StringRef::npos &&
1684+
(Match.empty() || size_t(Match.data() - Input.data()) > Pos))
1685+
Match = StringRef(Input.substr(Pos, Prefix.size()));
1686+
}
1687+
return Match;
1688+
}
1689+
};
1690+
16371691
/// Searches the buffer for the first prefix in the prefix regular expression.
16381692
///
16391693
/// This searches the buffer using the provided regular expression, however it
@@ -1658,20 +1712,16 @@ static size_t SkipWord(StringRef Str, size_t Loc) {
16581712
/// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy
16591713
/// is unspecified.
16601714
static std::pair<StringRef, StringRef>
1661-
FindFirstMatchingPrefix(const FileCheckRequest &Req, Regex &PrefixRE,
1715+
FindFirstMatchingPrefix(const FileCheckRequest &Req, PrefixMatcher &Matcher,
16621716
StringRef &Buffer, unsigned &LineNumber,
16631717
Check::FileCheckType &CheckTy) {
1664-
SmallVector<StringRef, 2> Matches;
1665-
16661718
while (!Buffer.empty()) {
1667-
// Find the first (longest) match using the RE.
1668-
if (!PrefixRE.match(Buffer, &Matches))
1719+
// Find the first (longest) prefix match.
1720+
StringRef Prefix = Matcher.match(Buffer);
1721+
if (Prefix.empty())
16691722
// No match at all, bail.
16701723
return {StringRef(), StringRef()};
16711724

1672-
StringRef Prefix = Matches[0];
1673-
Matches.clear();
1674-
16751725
assert(Prefix.data() >= Buffer.data() &&
16761726
Prefix.data() < Buffer.data() + Buffer.size() &&
16771727
"Prefix doesn't start inside of buffer!");
@@ -1720,7 +1770,7 @@ FileCheck::FileCheck(FileCheckRequest Req)
17201770
FileCheck::~FileCheck() = default;
17211771

17221772
bool FileCheck::readCheckFile(
1723-
SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
1773+
SourceMgr &SM, StringRef Buffer,
17241774
std::pair<unsigned, unsigned> *ImpPatBufferIDRange) {
17251775
if (ImpPatBufferIDRange)
17261776
ImpPatBufferIDRange->first = ImpPatBufferIDRange->second = 0;
@@ -1769,6 +1819,8 @@ bool FileCheck::readCheckFile(
17691819
// found.
17701820
unsigned LineNumber = 1;
17711821

1822+
addDefaultPrefixes(Req);
1823+
PrefixMatcher Matcher(Req.CheckPrefixes, Req.CommentPrefixes, Buffer);
17721824
std::set<StringRef> PrefixesNotFound(Req.CheckPrefixes.begin(),
17731825
Req.CheckPrefixes.end());
17741826
const size_t DistinctPrefixes = PrefixesNotFound.size();
@@ -1779,7 +1831,7 @@ bool FileCheck::readCheckFile(
17791831
StringRef UsedPrefix;
17801832
StringRef AfterSuffix;
17811833
std::tie(UsedPrefix, AfterSuffix) =
1782-
FindFirstMatchingPrefix(Req, PrefixRE, Buffer, LineNumber, CheckTy);
1834+
FindFirstMatchingPrefix(Req, Matcher, Buffer, LineNumber, CheckTy);
17831835
if (UsedPrefix.empty())
17841836
break;
17851837
if (CheckTy != Check::CheckComment)
@@ -2431,9 +2483,6 @@ static bool ValidatePrefixes(StringRef Kind, StringSet<> &UniquePrefixes,
24312483
return true;
24322484
}
24332485

2434-
static const char *DefaultCheckPrefixes[] = {"CHECK"};
2435-
static const char *DefaultCommentPrefixes[] = {"COM", "RUN"};
2436-
24372486
bool FileCheck::ValidateCheckPrefixes() {
24382487
StringSet<> UniquePrefixes;
24392488
// Add default prefixes to catch user-supplied duplicates of them below.
@@ -2454,33 +2503,6 @@ bool FileCheck::ValidateCheckPrefixes() {
24542503
return true;
24552504
}
24562505

2457-
Regex FileCheck::buildCheckPrefixRegex() {
2458-
if (Req.CheckPrefixes.empty()) {
2459-
for (const char *Prefix : DefaultCheckPrefixes)
2460-
Req.CheckPrefixes.push_back(Prefix);
2461-
Req.IsDefaultCheckPrefix = true;
2462-
}
2463-
if (Req.CommentPrefixes.empty()) {
2464-
for (const char *Prefix : DefaultCommentPrefixes)
2465-
Req.CommentPrefixes.push_back(Prefix);
2466-
}
2467-
2468-
// We already validated the contents of CheckPrefixes and CommentPrefixes so
2469-
// just concatenate them as alternatives.
2470-
SmallString<32> PrefixRegexStr;
2471-
for (size_t I = 0, E = Req.CheckPrefixes.size(); I != E; ++I) {
2472-
if (I != 0)
2473-
PrefixRegexStr.push_back('|');
2474-
PrefixRegexStr.append(Req.CheckPrefixes[I]);
2475-
}
2476-
for (StringRef Prefix : Req.CommentPrefixes) {
2477-
PrefixRegexStr.push_back('|');
2478-
PrefixRegexStr.append(Prefix);
2479-
}
2480-
2481-
return Regex(PrefixRegexStr);
2482-
}
2483-
24842506
Error FileCheckPatternContext::defineCmdlineVariables(
24852507
ArrayRef<StringRef> CmdlineDefines, SourceMgr &SM) {
24862508
assert(GlobalVariableTable.empty() && GlobalNumericVariableTable.empty() &&

llvm/unittests/CodeGen/GlobalISel/GISelMITest.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -189,8 +189,7 @@ static inline bool CheckMachineFunction(const MachineFunction &MF,
189189
SourceMgr SM;
190190
SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(CheckFileText, "CheckFile"),
191191
SMLoc());
192-
Regex PrefixRE = FC.buildCheckPrefixRegex();
193-
if (FC.readCheckFile(SM, CheckFileText, PrefixRE))
192+
if (FC.readCheckFile(SM, CheckFileText))
194193
return false;
195194

196195
auto OutBuffer = OutputBuf->getBuffer();

llvm/unittests/MIR/MachineMetadata.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -193,8 +193,7 @@ static bool checkOutput(std::string CheckString, std::string Output) {
193193
SourceMgr SM;
194194
SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(CheckFileText, "CheckFile"),
195195
SMLoc());
196-
Regex PrefixRE = FC.buildCheckPrefixRegex();
197-
if (FC.readCheckFile(SM, CheckFileText, PrefixRE))
196+
if (FC.readCheckFile(SM, CheckFileText))
198197
return false;
199198

200199
auto OutBuffer = OutputBuffer->getBuffer();

llvm/utils/FileCheck/FileCheck.cpp

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -810,17 +810,6 @@ int main(int argc, char **argv) {
810810
if (!FC.ValidateCheckPrefixes())
811811
return 2;
812812

813-
Regex PrefixRE = FC.buildCheckPrefixRegex();
814-
std::string REError;
815-
if (!PrefixRE.isValid(REError)) {
816-
errs() << "Unable to combine check-prefix strings into a prefix regular "
817-
"expression! This is likely a bug in FileCheck's verification of "
818-
"the check-prefix strings. Regular expression parsing failed "
819-
"with the following error: "
820-
<< REError << "\n";
821-
return 2;
822-
}
823-
824813
SourceMgr SM;
825814

826815
// Read the expected strings from the check file.
@@ -842,7 +831,7 @@ int main(int argc, char **argv) {
842831
SMLoc());
843832

844833
std::pair<unsigned, unsigned> ImpPatBufferIDRange;
845-
if (FC.readCheckFile(SM, CheckFileText, PrefixRE, &ImpPatBufferIDRange))
834+
if (FC.readCheckFile(SM, CheckFileText, &ImpPatBufferIDRange))
846835
return 2;
847836

848837
// Open the file to check and add it to SourceMgr.

0 commit comments

Comments
 (0)