Skip to content

Commit e122a71

Browse files
author
Paul C. Anagnostopoulos
committed
[TableGen] Add the !substr() bang operator
Update the documentation and add a test. Build failed: Change SIZE_MAX to std::numeric_limits<int64_t>::max(). Differential Revision: https://reviews.llvm.org/D93419
1 parent 9fb074e commit e122a71

File tree

8 files changed

+215
-9
lines changed

8 files changed

+215
-9
lines changed

llvm/docs/TableGen/ProgRef.rst

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,8 @@ TableGen provides "bang operators" that have a wide variety of uses:
216216
: !interleave !isa !le !listconcat !listsplat
217217
: !lt !mul !ne !not !or
218218
: !setdagop !shl !size !sra !srl
219-
: !strconcat !sub !subst !tail !xor
219+
: !strconcat !sub !subst !substr !tail
220+
: !xor
220221

221222
The ``!cond`` operator has a slightly different
222223
syntax compared to other bang operators, so it is defined separately:
@@ -1723,6 +1724,13 @@ and non-0 as true.
17231724
record if the *target* record name equals the *value* record name; otherwise it
17241725
produces the *value*.
17251726

1727+
``!substr(``\ *string*\ ``,`` *start*\ [``,`` *length*]\ ``)``
1728+
This operator extracts a substring of the given *string*. The starting
1729+
position of the substring is specified by *start*, which can range
1730+
between 0 and the length of the string. The length of the substring
1731+
is specified by *length*; if not specified, the rest of the string is
1732+
extracted. The *start* and *length* arguments must be integers.
1733+
17261734
``!tail(``\ *a*\ ``)``
17271735
This operator produces a new list with all the elements
17281736
of the list *a* except for the zeroth one. (See also ``!head``.)

llvm/include/llvm/TableGen/Record.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -829,7 +829,7 @@ class BinOpInit : public OpInit, public FoldingSetNode {
829829
/// !op (X, Y, Z) - Combine two inits.
830830
class TernOpInit : public OpInit, public FoldingSetNode {
831831
public:
832-
enum TernaryOp : uint8_t { SUBST, FOREACH, FILTER, IF, DAG };
832+
enum TernaryOp : uint8_t { SUBST, FOREACH, FILTER, IF, DAG, SUBSTR };
833833

834834
private:
835835
Init *LHS, *MHS, *RHS;

llvm/lib/TableGen/Record.cpp

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1325,6 +1325,27 @@ Init *TernOpInit::Fold(Record *CurRec) const {
13251325
}
13261326
break;
13271327
}
1328+
1329+
case SUBSTR: {
1330+
StringInit *LHSs = dyn_cast<StringInit>(LHS);
1331+
IntInit *MHSi = dyn_cast<IntInit>(MHS);
1332+
IntInit *RHSi = dyn_cast<IntInit>(RHS);
1333+
if (LHSs && MHSi && RHSi) {
1334+
int64_t StringSize = LHSs->getValue().size();
1335+
int64_t Start = MHSi->getValue();
1336+
int64_t Length = RHSi->getValue();
1337+
if (Start < 0 || Start > StringSize)
1338+
PrintError(CurRec->getLoc(),
1339+
Twine("!substr start position is out of range 0...") +
1340+
std::to_string(StringSize) + ": " +
1341+
std::to_string(Start));
1342+
if (Length < 0)
1343+
PrintError(CurRec->getLoc(), "!substr length must be nonnegative");
1344+
return StringInit::get(LHSs->getValue().substr(Start, Length),
1345+
LHSs->getFormat());
1346+
}
1347+
break;
1348+
}
13281349
}
13291350

13301351
return const_cast<TernOpInit *>(this);
@@ -1364,11 +1385,12 @@ std::string TernOpInit::getAsString() const {
13641385
std::string Result;
13651386
bool UnquotedLHS = false;
13661387
switch (getOpcode()) {
1367-
case SUBST: Result = "!subst"; break;
1368-
case FOREACH: Result = "!foreach"; UnquotedLHS = true; break;
1388+
case DAG: Result = "!dag"; break;
13691389
case FILTER: Result = "!filter"; UnquotedLHS = true; break;
1390+
case FOREACH: Result = "!foreach"; UnquotedLHS = true; break;
13701391
case IF: Result = "!if"; break;
1371-
case DAG: Result = "!dag"; break;
1392+
case SUBST: Result = "!subst"; break;
1393+
case SUBSTR: Result = "!substr"; break;
13721394
}
13731395
return (Result + "(" +
13741396
(UnquotedLHS ? LHS->getAsUnquotedString() : LHS->getAsString()) +

llvm/lib/TableGen/TGLexer.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -589,6 +589,7 @@ tgtok::TokKind TGLexer::LexExclaim() {
589589
.Case("listsplat", tgtok::XListSplat)
590590
.Case("strconcat", tgtok::XStrConcat)
591591
.Case("interleave", tgtok::XInterleave)
592+
.Case("substr", tgtok::XSubstr)
592593
.Cases("setdagop", "setop", tgtok::XSetDagOp) // !setop is deprecated.
593594
.Cases("getdagop", "getop", tgtok::XGetDagOp) // !getop is deprecated.
594595
.Default(tgtok::Error);

llvm/lib/TableGen/TGLexer.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,9 @@ namespace tgtok {
5353

5454
// Bang operators.
5555
XConcat, XADD, XSUB, XMUL, XNOT, XAND, XOR, XXOR, XSRA, XSRL, XSHL,
56-
XListConcat, XListSplat, XStrConcat, XInterleave, XCast, XSubst, XForEach,
57-
XFilter, XFoldl, XHead, XTail, XSize, XEmpty, XIf, XCond, XEq, XIsA,
58-
XDag, XNe, XLe, XLt, XGe, XGt, XSetDagOp, XGetDagOp,
56+
XListConcat, XListSplat, XStrConcat, XInterleave, XSubstr, XCast,
57+
XSubst, XForEach, XFilter, XFoldl, XHead, XTail, XSize, XEmpty, XIf,
58+
XCond, XEq, XIsA, XDag, XNe, XLe, XLt, XGe, XGt, XSetDagOp, XGetDagOp,
5959

6060
// Boolean literals.
6161
TrueVal, FalseVal,

llvm/lib/TableGen/TGParser.cpp

Lines changed: 94 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include <algorithm>
2626
#include <cassert>
2727
#include <cstdint>
28+
#include <limits>
2829

2930
using namespace llvm;
3031

@@ -1496,6 +1497,9 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
14961497
return (TernOpInit::get(Code, LHS, MHS, RHS, Type))->Fold(CurRec);
14971498
}
14981499

1500+
case tgtok::XSubstr:
1501+
return ParseOperationSubstr(CurRec, ItemType);
1502+
14991503
case tgtok::XCond:
15001504
return ParseOperationCond(CurRec, ItemType);
15011505

@@ -1655,6 +1659,94 @@ RecTy *TGParser::ParseOperatorType() {
16551659
return Type;
16561660
}
16571661

1662+
/// Parse the !substr operation. Return null on error.
1663+
///
1664+
/// Substr ::= !substr(string, start-int [, length-int]) => string
1665+
Init *TGParser::ParseOperationSubstr(Record *CurRec, RecTy *ItemType) {
1666+
TernOpInit::TernaryOp Code = TernOpInit::SUBSTR;
1667+
RecTy *Type = StringRecTy::get();
1668+
1669+
Lex.Lex(); // eat the operation
1670+
1671+
if (!consume(tgtok::l_paren)) {
1672+
TokError("expected '(' after !substr operator");
1673+
return nullptr;
1674+
}
1675+
1676+
Init *LHS = ParseValue(CurRec);
1677+
if (!LHS)
1678+
return nullptr;
1679+
1680+
if (!consume(tgtok::comma)) {
1681+
TokError("expected ',' in !substr operator");
1682+
return nullptr;
1683+
}
1684+
1685+
SMLoc MHSLoc = Lex.getLoc();
1686+
Init *MHS = ParseValue(CurRec);
1687+
if (!MHS)
1688+
return nullptr;
1689+
1690+
SMLoc RHSLoc = Lex.getLoc();
1691+
Init *RHS;
1692+
if (consume(tgtok::comma)) {
1693+
RHSLoc = Lex.getLoc();
1694+
RHS = ParseValue(CurRec);
1695+
if (!RHS)
1696+
return nullptr;
1697+
} else {
1698+
RHS = IntInit::get(std::numeric_limits<int64_t>::max());
1699+
}
1700+
1701+
if (!consume(tgtok::r_paren)) {
1702+
TokError("expected ')' in !substr operator");
1703+
return nullptr;
1704+
}
1705+
1706+
if (ItemType && !Type->typeIsConvertibleTo(ItemType)) {
1707+
Error(RHSLoc, Twine("expected value of type '") +
1708+
ItemType->getAsString() + "', got '" +
1709+
Type->getAsString() + "'");
1710+
}
1711+
1712+
TypedInit *LHSt = dyn_cast<TypedInit>(LHS);
1713+
if (!LHSt && !isa<UnsetInit>(LHS)) {
1714+
TokError("could not determine type of the string in !substr");
1715+
return nullptr;
1716+
}
1717+
if (LHSt && !isa<StringRecTy>(LHSt->getType())) {
1718+
TokError(Twine("expected string, got type '") +
1719+
LHSt->getType()->getAsString() + "'");
1720+
return nullptr;
1721+
}
1722+
1723+
TypedInit *MHSt = dyn_cast<TypedInit>(MHS);
1724+
if (!MHSt && !isa<UnsetInit>(MHS)) {
1725+
TokError("could not determine type of the start position in !substr");
1726+
return nullptr;
1727+
}
1728+
if (MHSt && !isa<IntRecTy>(MHSt->getType())) {
1729+
Error(MHSLoc, Twine("expected int, got type '") +
1730+
MHSt->getType()->getAsString() + "'");
1731+
return nullptr;
1732+
}
1733+
1734+
if (RHS) {
1735+
TypedInit *RHSt = dyn_cast<TypedInit>(RHS);
1736+
if (!RHSt && !isa<UnsetInit>(RHS)) {
1737+
TokError("could not determine type of the length in !substr");
1738+
return nullptr;
1739+
}
1740+
if (RHSt && !isa<IntRecTy>(RHSt->getType())) {
1741+
TokError(Twine("expected int, got type '") +
1742+
RHSt->getType()->getAsString() + "'");
1743+
return nullptr;
1744+
}
1745+
}
1746+
1747+
return (TernOpInit::get(Code, LHS, MHS, RHS, Type))->Fold(CurRec);
1748+
}
1749+
16581750
/// Parse the !foreach and !filter operations. Return null on error.
16591751
///
16601752
/// ForEach ::= !foreach(ID, list-or-dag, expr) => list<expr type>
@@ -2206,7 +2298,8 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
22062298
case tgtok::XFoldl:
22072299
case tgtok::XForEach:
22082300
case tgtok::XFilter:
2209-
case tgtok::XSubst: { // Value ::= !ternop '(' Value ',' Value ',' Value ')'
2301+
case tgtok::XSubst:
2302+
case tgtok::XSubstr: { // Value ::= !ternop '(' Value ',' Value ',' Value ')'
22102303
return ParseOperation(CurRec, ItemType);
22112304
}
22122305
}

llvm/lib/TableGen/TGParser.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,7 @@ class TGParser {
254254
TypedInit *FirstItem = nullptr);
255255
RecTy *ParseType();
256256
Init *ParseOperation(Record *CurRec, RecTy *ItemType);
257+
Init *ParseOperationSubstr(Record *CurRec, RecTy *ItemType);
257258
Init *ParseOperationForEachFilter(Record *CurRec, RecTy *ItemType);
258259
Init *ParseOperationCond(Record *CurRec, RecTy *ItemType);
259260
RecTy *ParseOperatorType();

llvm/test/TableGen/substr.td

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
// RUN: llvm-tblgen %s | FileCheck %s
2+
// RUN: not llvm-tblgen -DERROR1 %s 2>&1 | FileCheck --check-prefix=ERROR1 %s
3+
4+
defvar claim = "This is the end of the world!";
5+
6+
// CHECK: def Rec1
7+
// CHECK: fullNoLength = "This is the end of the world!";
8+
// CHECK: fullLength = "This is the end of the world!";
9+
// CHECK: thisIsTheEnd = "This is the end";
10+
// CHECK: DoorsSong = "the end";
11+
// CHECK: finalNoLength = "end of the world!";
12+
// CHECK: finalLength = "end of the world!";
13+
14+
def Rec1 {
15+
string fullNoLength = !substr(claim, 0);
16+
string fullLength = !substr(claim, 0, 999);
17+
string thisIsTheEnd = !substr(claim, 0, 15);
18+
string DoorsSong = !substr(claim, 8, 7);
19+
string finalNoLength = !substr(claim, 12);
20+
string finalLength = !substr(claim, 12, !sub(!size(claim), 12));
21+
}
22+
23+
// CHECK: def Rec2 {
24+
// CHECK: lastName = "Flintstone";
25+
26+
def Rec2 {
27+
string firstName = "Fred";
28+
string name = firstName # " " # "Flintstone";
29+
string lastName = !substr(name, !add(!size(firstName), 1));
30+
}
31+
32+
// CHECK: def Rec3 {
33+
// CHECK: test1 = "";
34+
// CHECK: test2 = "";
35+
// CHECK: test3 = "";
36+
// CHECK: test4 = "h";
37+
// CHECK: test5 = "hello";
38+
// CHECK: test6 = "";
39+
40+
def Rec3 {
41+
string test1 = !substr("", 0, 0);
42+
string test2 = !substr("", 0, 9);
43+
string test3 = !substr("hello", 0, 0);
44+
string test4 = !substr("hello", 0, 1);
45+
string test5 = !substr("hello", 0, 99);
46+
string test6 = !substr("hello", 5, 99);
47+
}
48+
49+
// CHECK: def Rec4
50+
// CHECK: message = "This is the end of the world!";
51+
// CHECK: messagePrefix = "This is th...";
52+
// CHECK: warning = "Bad message: 'This is th...'";
53+
54+
class C<string msg> {
55+
string message = msg;
56+
string messagePrefix = !substr(message, 0, 10) # "...";
57+
}
58+
59+
def Rec4 : C<claim> {
60+
string warning = "Bad message: '" # messagePrefix # "'";
61+
}
62+
63+
#ifdef ERROR1
64+
65+
// ERROR1: expected string, got type 'int'
66+
// ERROR1: expected int, got type 'bits<3>'
67+
// ERROR1: expected int, got type 'string'
68+
// ERROR1: !substr start position is out of range 0...29: 30
69+
// ERROR1: !substr length must be nonnegative
70+
71+
def Rec8 {
72+
string claim1 = !substr(42, 0, 3);
73+
string claim2 = !substr(claim, 0b101);
74+
string claim3 = !substr(claim, 0, "oops");
75+
}
76+
77+
def Rec9 {
78+
string claim1 = !substr(claim, !add(!size(claim), 1));
79+
string claim2 = !substr(claim, 0, -13);
80+
}
81+
#endif

0 commit comments

Comments
 (0)