Skip to content

[TableGen] Add !match operator to do regex matching #130759

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Mar 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions llvm/docs/TableGen/ProgRef.rst
Original file line number Diff line number Diff line change
Expand Up @@ -225,11 +225,11 @@ TableGen provides "bang operators" that have a wide variety of uses:
: !getdagname !getdagop !gt !head !if
: !initialized !interleave !isa !le !listconcat
: !listflatten !listremove !listsplat !logtwo !lt
: !mul !ne !not !or !range
: !repr !setdagarg !setdagname !setdagop !shl
: !size !sra !srl !strconcat !sub
: !subst !substr !tail !tolower !toupper
: !xor
: !match !mul !ne !not !or
: !range !repr !setdagarg !setdagname !setdagop
: !shl !size !sra !srl !strconcat
: !sub !subst !substr !tail !tolower
: !toupper !xor

The ``!cond`` operator has a slightly different
syntax compared to other bang operators, so it is defined separately:
Expand Down Expand Up @@ -1878,6 +1878,10 @@ and non-0 as true.
This operator produces 1 if *a* is less than *b*; 0 otherwise.
The arguments must be ``bit``, ``bits``, ``int``, or ``string`` values.

``!match(``\ *str*\ `,` *regex*\ ``)``
This operator produces 1 if the *str* matches the regular expression
*regex*. The format of *regex* is ERE (Extended POSIX Regular Expressions).

``!mul(``\ *a*\ ``,`` *b*\ ``, ...)``
This operator multiplies *a*, *b*, etc., and produces the product.

Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/TableGen/Record.h
Original file line number Diff line number Diff line change
Expand Up @@ -910,6 +910,7 @@ class BinOpInit final : public OpInit, public FoldingSetNode {
STRCONCAT,
INTERLEAVE,
CONCAT,
MATCH,
EQ,
NE,
LE,
Expand Down
21 changes: 21 additions & 0 deletions llvm/lib/TableGen/Record.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Regex.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/Error.h"
Expand Down Expand Up @@ -1318,6 +1319,23 @@ const Init *BinOpInit::Fold(const Record *CurRec) const {
}
break;
}
case MATCH: {
const auto *StrInit = dyn_cast<StringInit>(LHS);
if (!StrInit)
return this;

const auto *RegexInit = dyn_cast<StringInit>(RHS);
if (!RegexInit)
return this;

StringRef RegexStr = RegexInit->getValue();
llvm::Regex Matcher(RegexStr);
if (!Matcher.isValid())
PrintFatalError(Twine("invalid regex '") + RegexStr + Twine("'"));

return BitInit::get(LHS->getRecordKeeper(),
Matcher.match(StrInit->getValue()));
}
case LISTCONCAT: {
const auto *LHSs = dyn_cast<ListInit>(LHS);
const auto *RHSs = dyn_cast<ListInit>(RHS);
Expand Down Expand Up @@ -1586,6 +1604,9 @@ std::string BinOpInit::getAsString() const {
case RANGEC:
return LHS->getAsString() + "..." + RHS->getAsString();
case CONCAT: Result = "!con"; break;
case MATCH:
Result = "!match";
break;
case ADD: Result = "!add"; break;
case SUB: Result = "!sub"; break;
case MUL: Result = "!mul"; break;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/TableGen/TGLexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -644,6 +644,7 @@ tgtok::TokKind TGLexer::LexExclaim() {
.Case("tolower", tgtok::XToLower)
.Case("toupper", tgtok::XToUpper)
.Case("repr", tgtok::XRepr)
.Case("match", tgtok::XMatch)
.Default(tgtok::Error);

return Kind != tgtok::Error ? Kind
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/TableGen/TGLexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ enum TokKind {
XInterleave,
XSubstr,
XFind,
XMatch,
XCast,
XSubst,
XForEach,
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/TableGen/TGParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1456,6 +1456,7 @@ const Init *TGParser::ParseOperation(Record *CurRec, const RecTy *ItemType) {
}

case tgtok::XConcat:
case tgtok::XMatch:
case tgtok::XADD:
case tgtok::XSUB:
case tgtok::XMUL:
Expand Down Expand Up @@ -1488,6 +1489,9 @@ const Init *TGParser::ParseOperation(Record *CurRec, const RecTy *ItemType) {
switch (OpTok) {
default: llvm_unreachable("Unhandled code!");
case tgtok::XConcat: Code = BinOpInit::CONCAT; break;
case tgtok::XMatch:
Code = BinOpInit::MATCH;
break;
case tgtok::XADD: Code = BinOpInit::ADD; break;
case tgtok::XSUB: Code = BinOpInit::SUB; break;
case tgtok::XMUL: Code = BinOpInit::MUL; break;
Expand Down Expand Up @@ -1525,6 +1529,10 @@ const Init *TGParser::ParseOperation(Record *CurRec, const RecTy *ItemType) {
switch (OpTok) {
default:
llvm_unreachable("Unhandled code!");
case tgtok::XMatch:
Type = BitRecTy::get(Records);
ArgType = StringRecTy::get(Records);
break;
case tgtok::XConcat:
case tgtok::XSetDagOp:
Type = DagRecTy::get(Records);
Expand Down
36 changes: 36 additions & 0 deletions llvm/test/TableGen/match.td
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// RUN: llvm-tblgen %s | FileCheck %s
// RUN: not llvm-tblgen -DERROR1 %s 2>&1 | FileCheck --check-prefix=ERROR1 %s
// RUN: not llvm-tblgen -DERROR2 %s 2>&1 | FileCheck --check-prefix=ERROR2 %s
// RUN: not llvm-tblgen -DERROR3 %s 2>&1 | FileCheck --check-prefix=ERROR3 %s
// RUN: not llvm-tblgen -DERROR4 %s 2>&1 | FileCheck --check-prefix=ERROR4 %s
// XFAIL: vg_leak

def test {
bit test0 = !match("123 abc ABC", "[0-9 a-z A-Z]+");
bit test1 = !match("abc", "[0-9]+");
}

// CHECK-LABEL: def test {
// CHECK-NEXT: bit test0 = 1;
// CHECK-NEXT: bit test1 = 0;
// CHECK-NEXT: }

#ifdef ERROR1
defvar error1 = !match(123, ".*");
// ERROR1: error: expected value of type 'string', got 'int'
#endif

#ifdef ERROR2
defvar error2 = !match("abc", 123);
// ERROR2: error: expected value of type 'string', got 'int'
#endif

#ifdef ERROR3
defvar error3 = !match("abc", "abc", "abc");
// ERROR3: error: expected two operands to operator
#endif

#ifdef ERROR4
defvar error4 = !match("abc", "([)]");
// ERROR4: error: invalid regex '([)]'
#endif