Skip to content

[FileCheck] Don't use regex to find prefixes #72237

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 1 addition & 11 deletions llvm/include/llvm/FileCheck/FileCheck.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,24 +187,14 @@ class FileCheck {
explicit FileCheck(FileCheckRequest Req);
~FileCheck();

// Combines the check prefixes into a single regex so that we can efficiently
// scan for any of the set.
//
// The semantics are that the longest-match wins which matches our regex
// library.
Regex buildCheckPrefixRegex();

/// Reads the check file from \p Buffer and records the expected strings it
/// contains. Errors are reported against \p SM.
///
/// Only expected strings whose prefix is one of those listed in \p PrefixRE
/// are recorded. \returns true in case of an error, false otherwise.
///
/// If \p ImpPatBufferIDRange, then the range (inclusive start, exclusive end)
/// of IDs for source buffers added to \p SM for implicit patterns are
/// recorded in it. The range is empty if there are none.
bool
readCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
readCheckFile(SourceMgr &SM, StringRef Buffer,
std::pair<unsigned, unsigned> *ImpPatBufferIDRange = nullptr);

bool ValidateCheckPrefixes();
Expand Down
102 changes: 62 additions & 40 deletions llvm/lib/FileCheck/FileCheck.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1634,6 +1634,60 @@ static size_t SkipWord(StringRef Str, size_t Loc) {
return Loc;
}

static const char *DefaultCheckPrefixes[] = {"CHECK"};
static const char *DefaultCommentPrefixes[] = {"COM", "RUN"};

static void addDefaultPrefixes(FileCheckRequest &Req) {
if (Req.CheckPrefixes.empty()) {
for (const char *Prefix : DefaultCheckPrefixes)
Req.CheckPrefixes.push_back(Prefix);
Req.IsDefaultCheckPrefix = true;
}
if (Req.CommentPrefixes.empty())
for (const char *Prefix : DefaultCommentPrefixes)
Req.CommentPrefixes.push_back(Prefix);
}

struct PrefixMatcher {
/// Prefixes and their first occurrence past the current position.
SmallVector<std::pair<StringRef, size_t>> Prefixes;
StringRef Input;

PrefixMatcher(ArrayRef<StringRef> CheckPrefixes,
ArrayRef<StringRef> CommentPrefixes, StringRef Input)
: Input(Input) {
for (StringRef Prefix : CheckPrefixes)
Prefixes.push_back({Prefix, Input.find(Prefix)});
for (StringRef Prefix : CommentPrefixes)
Prefixes.push_back({Prefix, Input.find(Prefix)});

// Sort by descending length.
llvm::sort(Prefixes,
[](auto A, auto B) { return A.first.size() > B.first.size(); });
}

/// Find the next match of a prefix in Buffer.
/// Returns empty StringRef if not found.
StringRef match(StringRef Buffer) {
assert(Buffer.data() >= Input.data() &&
Buffer.data() + Buffer.size() == Input.data() + Input.size() &&
"Buffer must be suffix of Input");

size_t From = Buffer.data() - Input.data();
StringRef Match;
for (auto &[Prefix, Pos] : Prefixes) {
// If the last occurrence was before From, find the next one after From.
if (Pos < From)
Pos = Input.find(Prefix, From);
// Find the first prefix with the lowest position.
if (Pos != StringRef::npos &&
(Match.empty() || size_t(Match.data() - Input.data()) > Pos))
Match = StringRef(Input.substr(Pos, Prefix.size()));
}
return Match;
}
};

/// Searches the buffer for the first prefix in the prefix regular expression.
///
/// This searches the buffer using the provided regular expression, however it
Expand All @@ -1658,20 +1712,16 @@ static size_t SkipWord(StringRef Str, size_t Loc) {
/// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy
/// is unspecified.
static std::pair<StringRef, StringRef>
FindFirstMatchingPrefix(const FileCheckRequest &Req, Regex &PrefixRE,
FindFirstMatchingPrefix(const FileCheckRequest &Req, PrefixMatcher &Matcher,
StringRef &Buffer, unsigned &LineNumber,
Check::FileCheckType &CheckTy) {
SmallVector<StringRef, 2> Matches;

while (!Buffer.empty()) {
// Find the first (longest) match using the RE.
if (!PrefixRE.match(Buffer, &Matches))
// Find the first (longest) prefix match.
StringRef Prefix = Matcher.match(Buffer);
if (Prefix.empty())
// No match at all, bail.
return {StringRef(), StringRef()};

StringRef Prefix = Matches[0];
Matches.clear();

assert(Prefix.data() >= Buffer.data() &&
Prefix.data() < Buffer.data() + Buffer.size() &&
"Prefix doesn't start inside of buffer!");
Expand Down Expand Up @@ -1720,7 +1770,7 @@ FileCheck::FileCheck(FileCheckRequest Req)
FileCheck::~FileCheck() = default;

bool FileCheck::readCheckFile(
SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
SourceMgr &SM, StringRef Buffer,
std::pair<unsigned, unsigned> *ImpPatBufferIDRange) {
if (ImpPatBufferIDRange)
ImpPatBufferIDRange->first = ImpPatBufferIDRange->second = 0;
Expand Down Expand Up @@ -1769,6 +1819,8 @@ bool FileCheck::readCheckFile(
// found.
unsigned LineNumber = 1;

addDefaultPrefixes(Req);
PrefixMatcher Matcher(Req.CheckPrefixes, Req.CommentPrefixes, Buffer);
std::set<StringRef> PrefixesNotFound(Req.CheckPrefixes.begin(),
Req.CheckPrefixes.end());
const size_t DistinctPrefixes = PrefixesNotFound.size();
Expand All @@ -1779,7 +1831,7 @@ bool FileCheck::readCheckFile(
StringRef UsedPrefix;
StringRef AfterSuffix;
std::tie(UsedPrefix, AfterSuffix) =
FindFirstMatchingPrefix(Req, PrefixRE, Buffer, LineNumber, CheckTy);
FindFirstMatchingPrefix(Req, Matcher, Buffer, LineNumber, CheckTy);
if (UsedPrefix.empty())
break;
if (CheckTy != Check::CheckComment)
Expand Down Expand Up @@ -2431,9 +2483,6 @@ static bool ValidatePrefixes(StringRef Kind, StringSet<> &UniquePrefixes,
return true;
}

static const char *DefaultCheckPrefixes[] = {"CHECK"};
static const char *DefaultCommentPrefixes[] = {"COM", "RUN"};

bool FileCheck::ValidateCheckPrefixes() {
StringSet<> UniquePrefixes;
// Add default prefixes to catch user-supplied duplicates of them below.
Expand All @@ -2454,33 +2503,6 @@ bool FileCheck::ValidateCheckPrefixes() {
return true;
}

Regex FileCheck::buildCheckPrefixRegex() {
if (Req.CheckPrefixes.empty()) {
for (const char *Prefix : DefaultCheckPrefixes)
Req.CheckPrefixes.push_back(Prefix);
Req.IsDefaultCheckPrefix = true;
}
if (Req.CommentPrefixes.empty()) {
for (const char *Prefix : DefaultCommentPrefixes)
Req.CommentPrefixes.push_back(Prefix);
}

// We already validated the contents of CheckPrefixes and CommentPrefixes so
// just concatenate them as alternatives.
SmallString<32> PrefixRegexStr;
for (size_t I = 0, E = Req.CheckPrefixes.size(); I != E; ++I) {
if (I != 0)
PrefixRegexStr.push_back('|');
PrefixRegexStr.append(Req.CheckPrefixes[I]);
}
for (StringRef Prefix : Req.CommentPrefixes) {
PrefixRegexStr.push_back('|');
PrefixRegexStr.append(Prefix);
}

return Regex(PrefixRegexStr);
}

Error FileCheckPatternContext::defineCmdlineVariables(
ArrayRef<StringRef> CmdlineDefines, SourceMgr &SM) {
assert(GlobalVariableTable.empty() && GlobalNumericVariableTable.empty() &&
Expand Down
3 changes: 1 addition & 2 deletions llvm/unittests/CodeGen/GlobalISel/GISelMITest.h
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,7 @@ static inline bool CheckMachineFunction(const MachineFunction &MF,
SourceMgr SM;
SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(CheckFileText, "CheckFile"),
SMLoc());
Regex PrefixRE = FC.buildCheckPrefixRegex();
if (FC.readCheckFile(SM, CheckFileText, PrefixRE))
if (FC.readCheckFile(SM, CheckFileText))
return false;

auto OutBuffer = OutputBuf->getBuffer();
Expand Down
3 changes: 1 addition & 2 deletions llvm/unittests/MIR/MachineMetadata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,8 +193,7 @@ static bool checkOutput(std::string CheckString, std::string Output) {
SourceMgr SM;
SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(CheckFileText, "CheckFile"),
SMLoc());
Regex PrefixRE = FC.buildCheckPrefixRegex();
if (FC.readCheckFile(SM, CheckFileText, PrefixRE))
if (FC.readCheckFile(SM, CheckFileText))
return false;

auto OutBuffer = OutputBuffer->getBuffer();
Expand Down
13 changes: 1 addition & 12 deletions llvm/utils/FileCheck/FileCheck.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -810,17 +810,6 @@ int main(int argc, char **argv) {
if (!FC.ValidateCheckPrefixes())
return 2;

Regex PrefixRE = FC.buildCheckPrefixRegex();
std::string REError;
if (!PrefixRE.isValid(REError)) {
errs() << "Unable to combine check-prefix strings into a prefix regular "
"expression! This is likely a bug in FileCheck's verification of "
"the check-prefix strings. Regular expression parsing failed "
"with the following error: "
<< REError << "\n";
return 2;
}

SourceMgr SM;

// Read the expected strings from the check file.
Expand All @@ -842,7 +831,7 @@ int main(int argc, char **argv) {
SMLoc());

std::pair<unsigned, unsigned> ImpPatBufferIDRange;
if (FC.readCheckFile(SM, CheckFileText, PrefixRE, &ImpPatBufferIDRange))
if (FC.readCheckFile(SM, CheckFileText, &ImpPatBufferIDRange))
return 2;

// Open the file to check and add it to SourceMgr.
Expand Down