Skip to content

Commit 0058263

Browse files
authored
[clang-format] Support of TableGen tokens with unary operator like form, bang operators and numeric literals. (#78996)
Adds the support for tokens that have forms like unary operators. - bang operators: `!name` - cond operator: `!cond` - numeric literals: `+1`, `-1` cond operator are one of bang operators but is distinguished because it has very specific syntax.
1 parent 0129ff1 commit 0058263

File tree

3 files changed

+60
-11
lines changed

3 files changed

+60
-11
lines changed

clang/lib/Format/FormatToken.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,8 @@ namespace format {
148148
TYPE(StructLBrace) \
149149
TYPE(StructRBrace) \
150150
TYPE(StructuredBindingLSquare) \
151+
TYPE(TableGenBangOperator) \
152+
TYPE(TableGenCondOperator) \
151153
TYPE(TableGenMultiLineString) \
152154
TYPE(TemplateCloser) \
153155
TYPE(TemplateOpener) \

clang/lib/Format/FormatTokenLexer.cpp

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -276,13 +276,44 @@ void FormatTokenLexer::tryMergePreviousTokens() {
276276
return;
277277
}
278278
}
279-
// TableGen's Multi line string starts with [{
280-
if (Style.isTableGen() && tryMergeTokens({tok::l_square, tok::l_brace},
281-
TT_TableGenMultiLineString)) {
282-
// Set again with finalizing. This must never be annotated as other types.
283-
Tokens.back()->setFinalizedType(TT_TableGenMultiLineString);
284-
Tokens.back()->Tok.setKind(tok::string_literal);
285-
return;
279+
if (Style.isTableGen()) {
280+
// TableGen's Multi line string starts with [{
281+
if (tryMergeTokens({tok::l_square, tok::l_brace},
282+
TT_TableGenMultiLineString)) {
283+
// Set again with finalizing. This must never be annotated as other types.
284+
Tokens.back()->setFinalizedType(TT_TableGenMultiLineString);
285+
Tokens.back()->Tok.setKind(tok::string_literal);
286+
return;
287+
}
288+
// TableGen's bang operator is the form !<name>.
289+
// !cond is a special case with specific syntax.
290+
if (tryMergeTokens({tok::exclaim, tok::identifier},
291+
TT_TableGenBangOperator)) {
292+
Tokens.back()->Tok.setKind(tok::identifier);
293+
Tokens.back()->Tok.setIdentifierInfo(nullptr);
294+
if (Tokens.back()->TokenText == "!cond")
295+
Tokens.back()->setFinalizedType(TT_TableGenCondOperator);
296+
else
297+
Tokens.back()->setFinalizedType(TT_TableGenBangOperator);
298+
return;
299+
}
300+
if (tryMergeTokens({tok::exclaim, tok::kw_if}, TT_TableGenBangOperator)) {
301+
// Here, "! if" becomes "!if". That is, ! captures if even when the space
302+
// exists. That is only one possibility in TableGen's syntax.
303+
Tokens.back()->Tok.setKind(tok::identifier);
304+
Tokens.back()->Tok.setIdentifierInfo(nullptr);
305+
Tokens.back()->setFinalizedType(TT_TableGenBangOperator);
306+
return;
307+
}
308+
// +, - with numbers are literals. Not unary operators.
309+
if (tryMergeTokens({tok::plus, tok::numeric_constant}, TT_Unknown)) {
310+
Tokens.back()->Tok.setKind(tok::numeric_constant);
311+
return;
312+
}
313+
if (tryMergeTokens({tok::minus, tok::numeric_constant}, TT_Unknown)) {
314+
Tokens.back()->Tok.setKind(tok::numeric_constant);
315+
return;
316+
}
286317
}
287318
}
288319

clang/unittests/Format/TokenAnnotatorTest.cpp

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2215,16 +2215,24 @@ TEST_F(TokenAnnotatorTest, UnderstandTableGenTokens) {
22152215
EXPECT_TRUE(Tokens[0]->IsMultiline);
22162216
EXPECT_EQ(Tokens[0]->LastLineColumnWidth, sizeof(" the string. }]") - 1);
22172217

2218+
// Numeric literals.
2219+
Tokens = Annotate("1234");
2220+
EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
2221+
Tokens = Annotate("-1");
2222+
EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
2223+
Tokens = Annotate("+1234");
2224+
EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
2225+
Tokens = Annotate("0b0110");
2226+
EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
2227+
Tokens = Annotate("0x1abC");
2228+
EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
2229+
22182230
// Identifier tokens. In TableGen, identifiers can begin with a number.
22192231
// In ambiguous cases, the lexer tries to lex it as a number.
22202232
// Even if the try fails, it does not fall back to identifier lexing and
22212233
// regard as an error.
22222234
// The ambiguity is not documented. The result of those tests are based on the
22232235
// implementation of llvm::TGLexer::LexToken.
2224-
Tokens = Annotate("1234");
2225-
EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
2226-
Tokens = Annotate("0x1abC");
2227-
EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
22282236
// This is invalid syntax of number, but not an identifier.
22292237
Tokens = Annotate("0x1234x");
22302238
EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
@@ -2249,6 +2257,14 @@ TEST_F(TokenAnnotatorTest, UnderstandTableGenTokens) {
22492257
EXPECT_TOKEN(Tokens[6], tok::l_brace, TT_ElseLBrace);
22502258
Tokens = Annotate("defset Foo Def2 = {}");
22512259
EXPECT_TOKEN(Tokens[4], tok::l_brace, TT_FunctionLBrace);
2260+
2261+
// Bang Operators.
2262+
Tokens = Annotate("!foreach");
2263+
EXPECT_TOKEN(Tokens[0], tok::identifier, TT_TableGenBangOperator);
2264+
Tokens = Annotate("!if");
2265+
EXPECT_TOKEN(Tokens[0], tok::identifier, TT_TableGenBangOperator);
2266+
Tokens = Annotate("!cond");
2267+
EXPECT_TOKEN(Tokens[0], tok::identifier, TT_TableGenCondOperator);
22522268
}
22532269

22542270
TEST_F(TokenAnnotatorTest, UnderstandConstructors) {

0 commit comments

Comments
 (0)