Skip to content

Commit 376e3b6

Browse files
authored
[TableGen] Add !match operator to do regex matching (#130759)
The grammar is `!match(str, regex)` and this operator produces 1 if the `str` matches the regular expression `regex`. The format of `regex` is ERE (Extended POSIX Regular Expressions).
1 parent f291ec6 commit 376e3b6

File tree

7 files changed

+77
-5
lines changed

7 files changed

+77
-5
lines changed

llvm/docs/TableGen/ProgRef.rst

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -225,11 +225,11 @@ TableGen provides "bang operators" that have a wide variety of uses:
225225
: !getdagname !getdagop !gt !head !if
226226
: !initialized !interleave !isa !le !listconcat
227227
: !listflatten !listremove !listsplat !logtwo !lt
228-
: !mul !ne !not !or !range
229-
: !repr !setdagarg !setdagname !setdagop !shl
230-
: !size !sra !srl !strconcat !sub
231-
: !subst !substr !tail !tolower !toupper
232-
: !xor
228+
: !match !mul !ne !not !or
229+
: !range !repr !setdagarg !setdagname !setdagop
230+
: !shl !size !sra !srl !strconcat
231+
: !sub !subst !substr !tail !tolower
232+
: !toupper !xor
233233

234234
The ``!cond`` operator has a slightly different
235235
syntax compared to other bang operators, so it is defined separately:
@@ -1878,6 +1878,10 @@ and non-0 as true.
18781878
This operator produces 1 if *a* is less than *b*; 0 otherwise.
18791879
The arguments must be ``bit``, ``bits``, ``int``, or ``string`` values.
18801880

1881+
``!match(``\ *str*\ `,` *regex*\ ``)``
1882+
This operator produces 1 if the *str* matches the regular expression
1883+
*regex*. The format of *regex* is ERE (Extended POSIX Regular Expressions).
1884+
18811885
``!mul(``\ *a*\ ``,`` *b*\ ``, ...)``
18821886
This operator multiplies *a*, *b*, etc., and produces the product.
18831887

llvm/include/llvm/TableGen/Record.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -910,6 +910,7 @@ class BinOpInit final : public OpInit, public FoldingSetNode {
910910
STRCONCAT,
911911
INTERLEAVE,
912912
CONCAT,
913+
MATCH,
913914
EQ,
914915
NE,
915916
LE,

llvm/lib/TableGen/Record.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "llvm/Support/Compiler.h"
2626
#include "llvm/Support/ErrorHandling.h"
2727
#include "llvm/Support/MathExtras.h"
28+
#include "llvm/Support/Regex.h"
2829
#include "llvm/Support/SMLoc.h"
2930
#include "llvm/Support/raw_ostream.h"
3031
#include "llvm/TableGen/Error.h"
@@ -1318,6 +1319,23 @@ const Init *BinOpInit::Fold(const Record *CurRec) const {
13181319
}
13191320
break;
13201321
}
1322+
case MATCH: {
1323+
const auto *StrInit = dyn_cast<StringInit>(LHS);
1324+
if (!StrInit)
1325+
return this;
1326+
1327+
const auto *RegexInit = dyn_cast<StringInit>(RHS);
1328+
if (!RegexInit)
1329+
return this;
1330+
1331+
StringRef RegexStr = RegexInit->getValue();
1332+
llvm::Regex Matcher(RegexStr);
1333+
if (!Matcher.isValid())
1334+
PrintFatalError(Twine("invalid regex '") + RegexStr + Twine("'"));
1335+
1336+
return BitInit::get(LHS->getRecordKeeper(),
1337+
Matcher.match(StrInit->getValue()));
1338+
}
13211339
case LISTCONCAT: {
13221340
const auto *LHSs = dyn_cast<ListInit>(LHS);
13231341
const auto *RHSs = dyn_cast<ListInit>(RHS);
@@ -1586,6 +1604,9 @@ std::string BinOpInit::getAsString() const {
15861604
case RANGEC:
15871605
return LHS->getAsString() + "..." + RHS->getAsString();
15881606
case CONCAT: Result = "!con"; break;
1607+
case MATCH:
1608+
Result = "!match";
1609+
break;
15891610
case ADD: Result = "!add"; break;
15901611
case SUB: Result = "!sub"; break;
15911612
case MUL: Result = "!mul"; break;

llvm/lib/TableGen/TGLexer.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -644,6 +644,7 @@ tgtok::TokKind TGLexer::LexExclaim() {
644644
.Case("tolower", tgtok::XToLower)
645645
.Case("toupper", tgtok::XToUpper)
646646
.Case("repr", tgtok::XRepr)
647+
.Case("match", tgtok::XMatch)
647648
.Default(tgtok::Error);
648649

649650
return Kind != tgtok::Error ? Kind

llvm/lib/TableGen/TGLexer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ enum TokKind {
126126
XInterleave,
127127
XSubstr,
128128
XFind,
129+
XMatch,
129130
XCast,
130131
XSubst,
131132
XForEach,

llvm/lib/TableGen/TGParser.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1456,6 +1456,7 @@ const Init *TGParser::ParseOperation(Record *CurRec, const RecTy *ItemType) {
14561456
}
14571457

14581458
case tgtok::XConcat:
1459+
case tgtok::XMatch:
14591460
case tgtok::XADD:
14601461
case tgtok::XSUB:
14611462
case tgtok::XMUL:
@@ -1488,6 +1489,9 @@ const Init *TGParser::ParseOperation(Record *CurRec, const RecTy *ItemType) {
14881489
switch (OpTok) {
14891490
default: llvm_unreachable("Unhandled code!");
14901491
case tgtok::XConcat: Code = BinOpInit::CONCAT; break;
1492+
case tgtok::XMatch:
1493+
Code = BinOpInit::MATCH;
1494+
break;
14911495
case tgtok::XADD: Code = BinOpInit::ADD; break;
14921496
case tgtok::XSUB: Code = BinOpInit::SUB; break;
14931497
case tgtok::XMUL: Code = BinOpInit::MUL; break;
@@ -1525,6 +1529,10 @@ const Init *TGParser::ParseOperation(Record *CurRec, const RecTy *ItemType) {
15251529
switch (OpTok) {
15261530
default:
15271531
llvm_unreachable("Unhandled code!");
1532+
case tgtok::XMatch:
1533+
Type = BitRecTy::get(Records);
1534+
ArgType = StringRecTy::get(Records);
1535+
break;
15281536
case tgtok::XConcat:
15291537
case tgtok::XSetDagOp:
15301538
Type = DagRecTy::get(Records);

llvm/test/TableGen/match.td

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// RUN: llvm-tblgen %s | FileCheck %s
2+
// RUN: not llvm-tblgen -DERROR1 %s 2>&1 | FileCheck --check-prefix=ERROR1 %s
3+
// RUN: not llvm-tblgen -DERROR2 %s 2>&1 | FileCheck --check-prefix=ERROR2 %s
4+
// RUN: not llvm-tblgen -DERROR3 %s 2>&1 | FileCheck --check-prefix=ERROR3 %s
5+
// RUN: not llvm-tblgen -DERROR4 %s 2>&1 | FileCheck --check-prefix=ERROR4 %s
6+
// XFAIL: vg_leak
7+
8+
def test {
9+
bit test0 = !match("123 abc ABC", "[0-9 a-z A-Z]+");
10+
bit test1 = !match("abc", "[0-9]+");
11+
}
12+
13+
// CHECK-LABEL: def test {
14+
// CHECK-NEXT: bit test0 = 1;
15+
// CHECK-NEXT: bit test1 = 0;
16+
// CHECK-NEXT: }
17+
18+
#ifdef ERROR1
19+
defvar error1 = !match(123, ".*");
20+
// ERROR1: error: expected value of type 'string', got 'int'
21+
#endif
22+
23+
#ifdef ERROR2
24+
defvar error2 = !match("abc", 123);
25+
// ERROR2: error: expected value of type 'string', got 'int'
26+
#endif
27+
28+
#ifdef ERROR3
29+
defvar error3 = !match("abc", "abc", "abc");
30+
// ERROR3: error: expected two operands to operator
31+
#endif
32+
33+
#ifdef ERROR4
34+
defvar error4 = !match("abc", "([)]");
35+
// ERROR4: error: invalid regex '([)]'
36+
#endif

0 commit comments

Comments
 (0)