Skip to content

[TableGen] Add !match operator to do regex matching #130759

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Mar 13, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions llvm/docs/TableGen/ProgRef.rst
Original file line number Diff line number Diff line change
Expand Up @@ -225,11 +225,11 @@ TableGen provides "bang operators" that have a wide variety of uses:
: !getdagname !getdagop !gt !head !if
: !initialized !interleave !isa !le !listconcat
: !listflatten !listremove !listsplat !logtwo !lt
: !mul !ne !not !or !range
: !repr !setdagarg !setdagname !setdagop !shl
: !size !sra !srl !strconcat !sub
: !subst !substr !tail !tolower !toupper
: !xor
: !match !mul !ne !not !or
: !range !repr !setdagarg !setdagname !setdagop
: !shl !size !sra !srl !strconcat
: !sub !subst !substr !tail !tolower
: !toupper !xor

The ``!cond`` operator has a slightly different
syntax compared to other bang operators, so it is defined separately:
Expand Down Expand Up @@ -1878,6 +1878,10 @@ and non-0 as true.
This operator produces 1 if *a* is less than *b*; 0 otherwise.
The arguments must be ``bit``, ``bits``, ``int``, or ``string`` values.

``!match(``\ *str*\ `,` *regex*\ ``)``
This operator produces 1 if the *str* matches the regular expression
*regex*. The format of *regex* is ERE (Extended POSIX Regular Expressions).

``!mul(``\ *a*\ ``,`` *b*\ ``, ...)``
This operator multiplies *a*, *b*, etc., and produces the product.

Expand Down
33 changes: 33 additions & 0 deletions llvm/include/llvm/TableGen/Record.h
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,7 @@ class Init {
IK_FoldOpInit,
IK_IsAOpInit,
IK_ExistsOpInit,
IK_MatchOpInit,
IK_AnonymousNameInit,
IK_StringInit,
IK_VarInit,
Expand Down Expand Up @@ -1191,6 +1192,38 @@ class ExistsOpInit final : public TypedInit, public FoldingSetNode {
std::string getAsString() const override;
};

/// !match(str, regex) - This operator produces 1 if the `str` matches the
/// regular expression `regex`.
class MatchOpInit final : public TypedInit, public FoldingSetNode {
private:
const Init *Str;
const Init *Regex;

MatchOpInit(const Init *Str, const Init *Regex)
: TypedInit(IK_MatchOpInit, BitRecTy::get(Str->getRecordKeeper())),
Str(Str), Regex(Regex) {}

public:
MatchOpInit(const MatchOpInit &) = delete;
MatchOpInit &operator=(const MatchOpInit &) = delete;

static bool classof(const Init *I) { return I->getKind() == IK_MatchOpInit; }

static const MatchOpInit *get(const Init *Str, const Init *Regex);

void Profile(FoldingSetNodeID &ID) const;

const Init *Fold() const;

bool isComplete() const override { return false; }

const Init *resolveReferences(Resolver &R) const override;

const Init *getBit(unsigned Bit) const override;

std::string getAsString() const override;
};

/// 'Opcode' - Represent a reference to an entire variable object.
class VarInit final : public TypedInit {
const Init *VarName;
Expand Down
57 changes: 57 additions & 0 deletions llvm/lib/TableGen/Record.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Regex.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/Error.h"
Expand Down Expand Up @@ -83,6 +84,7 @@ struct RecordKeeperImpl {
FoldingSet<FoldOpInit> TheFoldOpInitPool;
FoldingSet<IsAOpInit> TheIsAOpInitPool;
FoldingSet<ExistsOpInit> TheExistsOpInitPool;
FoldingSet<MatchOpInit> TheMatchOpInitPool;
DenseMap<std::pair<const RecTy *, const Init *>, VarInit *> TheVarInitPool;
DenseMap<std::pair<const TypedInit *, unsigned>, VarBitInit *>
TheVarBitInitPool;
Expand Down Expand Up @@ -2199,6 +2201,61 @@ std::string ExistsOpInit::getAsString() const {
.str();
}

static void ProfileMatchOpInit(FoldingSetNodeID &ID, const Init *Str,
const Init *Regex) {
ID.AddPointer(Str);
ID.AddPointer(Regex);
}

const MatchOpInit *MatchOpInit::get(const Init *Str, const Init *Regex) {
FoldingSetNodeID ID;
ProfileMatchOpInit(ID, Str, Regex);

detail::RecordKeeperImpl &RK = Regex->getRecordKeeper().getImpl();
void *IP = nullptr;
if (const MatchOpInit *I = RK.TheMatchOpInitPool.FindNodeOrInsertPos(ID, IP))
return I;

MatchOpInit *I = new (RK.Allocator) MatchOpInit(Str, Regex);
RK.TheMatchOpInitPool.InsertNode(I, IP);
return I;
}

void MatchOpInit::Profile(FoldingSetNodeID &ID) const {
ProfileMatchOpInit(ID, Str, Regex);
}

const Init *MatchOpInit::Fold() const {
const auto *StrInit = dyn_cast<StringInit>(Str);
const auto *RegexInit = dyn_cast<StringInit>(Regex);
if (!(StrInit && RegexInit))
return this;

StringRef RegexStr = RegexInit->getValue();
llvm::Regex Matcher(RegexStr);
if (!Matcher.isValid())
PrintFatalError(Twine("invalid regex '") + RegexStr + Twine("'"));

return BitInit::get(Str->getRecordKeeper(),
Matcher.match(StrInit->getValue()));
}

const Init *MatchOpInit::resolveReferences(Resolver &R) const {
const Init *NewStr = Str->resolveReferences(R);
const Init *NewRegex = Regex->resolveReferences(R);
if (Str != NewStr || Regex != NewRegex)
return get(NewStr, NewRegex)->Fold();
return this;
}

const Init *MatchOpInit::getBit(unsigned Bit) const {
return VarBitInit::get(this, Bit);
}

std::string MatchOpInit::getAsString() const {
return "!match(" + Str->getAsString() + ", " + Regex->getAsString() + ")";
}

const RecTy *TypedInit::getFieldType(const StringInit *FieldName) const {
if (const auto *RecordType = dyn_cast<RecordRecTy>(getType())) {
for (const Record *Rec : RecordType->getClasses()) {
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/TableGen/TGLexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -644,6 +644,7 @@ tgtok::TokKind TGLexer::LexExclaim() {
.Case("tolower", tgtok::XToLower)
.Case("toupper", tgtok::XToUpper)
.Case("repr", tgtok::XRepr)
.Case("match", tgtok::XMatch)
.Default(tgtok::Error);

return Kind != tgtok::Error ? Kind
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/TableGen/TGLexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ enum TokKind {
XInterleave,
XSubstr,
XFind,
XMatch,
XCast,
XSubst,
XForEach,
Expand Down
43 changes: 43 additions & 0 deletions llvm/lib/TableGen/TGParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1455,6 +1455,49 @@ const Init *TGParser::ParseOperation(Record *CurRec, const RecTy *ItemType) {
return (ExistsOpInit::get(Type, Expr))->Fold(CurRec);
}

case tgtok::XMatch: {
// Value ::= !match '(' Str ',' Regex ')'
Lex.Lex(); // eat the operation.

if (!consume(tgtok::l_paren)) {
TokError("expected '(' after !match");
return nullptr;
}

SMLoc StrLoc = Lex.getLoc();
const Init *Str = ParseValue(CurRec);
if (!Str)
return nullptr;

const auto *StrType = dyn_cast<TypedInit>(Str);
if (!StrType || !isa<StringRecTy>(StrType->getType())) {
Error(StrLoc, "expected string type argument in !match operator");
return nullptr;
}

// eat the comma.
if (!consume(tgtok::comma))
return nullptr;

SMLoc RegexLoc = Lex.getLoc();
const Init *Regex = ParseValue(CurRec);
if (!Regex)
return nullptr;

const auto *RegexType = dyn_cast<TypedInit>(Regex);
if (!RegexType || !isa<StringRecTy>(RegexType->getType())) {
Error(RegexLoc, "expected string type argument in !match operator");
return nullptr;
}

if (!consume(tgtok::r_paren)) {
TokError("expected ')' in !match");
return nullptr;
}

return MatchOpInit::get(Str, Regex)->Fold();
}

case tgtok::XConcat:
case tgtok::XADD:
case tgtok::XSUB:
Expand Down
30 changes: 30 additions & 0 deletions llvm/test/TableGen/match.td
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// RUN: llvm-tblgen %s | FileCheck %s
// RUN: not llvm-tblgen -DERROR1 %s 2>&1 | FileCheck --check-prefix=ERROR1 %s
// RUN: not llvm-tblgen -DERROR2 %s 2>&1 | FileCheck --check-prefix=ERROR2 %s
// RUN: not llvm-tblgen -DERROR3 %s 2>&1 | FileCheck --check-prefix=ERROR3 %s
// XFAIL: vg_leak

def test {
bit test0 = !match("123 abc ABC", "[0-9 a-z A-Z]+");
bit test1 = !match("abc", "[0-9]+");
}

// CHECK-LABEL: def test {
// CHECK-NEXT: bit test0 = 1;
// CHECK-NEXT: bit test1 = 0;
// CHECK-NEXT: }

#ifdef ERROR1
defvar error1 = !match(123, ".*");
// ERROR1: error: expected string type argument in !match operator
#endif

#ifdef ERROR2
defvar error2 = !match("abc", 123);
// ERROR2: error: expected string type argument in !match operator
#endif

#ifdef ERROR3
defvar error3 = !match("abc", "([)]");
// ERROR3: error: invalid regex '([)]'
#endif
Loading