-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[clang-format] TableGen multi line string support. #78032
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[clang-format] TableGen multi line string support. #78032
Conversation
@llvm/pr-subscribers-clang-format Author: Hirofumi Nakamura (hnakamura5) ChangesSupport the handling of TableGen's multiline string (code) literal. This is a separated part from #76059. Full diff: https://github.com/llvm/llvm-project/pull/78032.diff 6 Files Affected:
diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp
index 102504182c4505..e6eaaa9ab45706 100644
--- a/clang/lib/Format/ContinuationIndenter.cpp
+++ b/clang/lib/Format/ContinuationIndenter.cpp
@@ -1591,6 +1591,9 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
State.StartOfStringLiteral = State.Column + 1;
if (Current.is(TT_CSharpStringLiteral) && State.StartOfStringLiteral == 0) {
State.StartOfStringLiteral = State.Column + 1;
+ } else if (Current.is(TT_TableGenMultiLineString) &&
+ State.StartOfStringLiteral == 0) {
+ State.StartOfStringLiteral = State.Column + 1;
} else if (Current.isStringLiteral() && State.StartOfStringLiteral == 0) {
State.StartOfStringLiteral = State.Column;
} else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash) &&
diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index d5ef627f1348d3..dede89f2600150 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -148,6 +148,7 @@ namespace format {
TYPE(StructLBrace) \
TYPE(StructRBrace) \
TYPE(StructuredBindingLSquare) \
+ TYPE(TableGenMultiLineString) \
TYPE(TemplateCloser) \
TYPE(TemplateOpener) \
TYPE(TemplateString) \
diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index a1fd6dd6effe6c..1060009bdcf131 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -93,6 +93,8 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() {
// string literals are correctly identified.
handleCSharpVerbatimAndInterpolatedStrings();
}
+ if (Style.isTableGen())
+ handleTableGenMultilineString();
if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
FirstInLineIndex = Tokens.size() - 1;
} while (Tokens.back()->isNot(tok::eof));
@@ -272,6 +274,14 @@ void FormatTokenLexer::tryMergePreviousTokens() {
return;
}
}
+ if (Style.isTableGen()) {
+ if (tryMergeTokens({tok::l_square, tok::l_brace},
+ TT_TableGenMultiLineString)) {
+ // Multi line string starts with [{
+ Tokens.back()->Tok.setKind(tok::string_literal);
+ return;
+ }
+ }
}
bool FormatTokenLexer::tryMergeNSStringLiteral() {
@@ -763,6 +773,53 @@ void FormatTokenLexer::handleCSharpVerbatimAndInterpolatedStrings() {
resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset + 1)));
}
+void FormatTokenLexer::handleTableGenMultilineString() {
+ FormatToken *MultiLineString = Tokens.back();
+ if (MultiLineString->isNot(TT_TableGenMultiLineString))
+ return;
+
+ bool PrevIsRBrace = false;
+ const char *FirstBreak = nullptr;
+ const char *LastBreak = nullptr;
+ const char *Begin = MultiLineString->TokenText.begin();
+ // Skip until }], the closer of multi line string found.
+ for (const char *Current = Begin, *End = Lex->getBuffer().end();
+ Current != End; ++Current) {
+ if (PrevIsRBrace && *Current == ']') {
+ // }] is the end of multi line string.
+ if (!FirstBreak)
+ FirstBreak = Current;
+ MultiLineString->TokenText = StringRef(Begin, Current - Begin + 1);
+ // ColumnWidth is only the width of the first line.
+ MultiLineString->ColumnWidth = encoding::columnWidthWithTabs(
+ StringRef(Begin, FirstBreak - Begin + 1),
+ MultiLineString->OriginalColumn, Style.TabWidth, Encoding);
+ if (LastBreak) {
+ // Set LastLineColumnWidth if multi line string has multiple lines.
+ MultiLineString->LastLineColumnWidth = encoding::columnWidthWithTabs(
+ StringRef(LastBreak + 1, Current - LastBreak),
+ MultiLineString->OriginalColumn, Style.TabWidth, Encoding);
+ }
+ resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Current + 1)));
+ return;
+ }
+ PrevIsRBrace = false;
+ if (*Current == '\n') {
+ MultiLineString->IsMultiline = true;
+ // Assure LastBreak is not equal to FirstBreak.
+ if (!FirstBreak)
+ FirstBreak = Current;
+ LastBreak = Current;
+ continue;
+ }
+ if (*Current == '}') {
+ // Memorize '}'. If next character is ']', they are the closer.
+ PrevIsRBrace = true;
+ continue;
+ }
+ }
+}
+
void FormatTokenLexer::handleTemplateStrings() {
FormatToken *BacktickToken = Tokens.back();
diff --git a/clang/lib/Format/FormatTokenLexer.h b/clang/lib/Format/FormatTokenLexer.h
index bb6a8ab69c1be1..1dec6bbc41514c 100644
--- a/clang/lib/Format/FormatTokenLexer.h
+++ b/clang/lib/Format/FormatTokenLexer.h
@@ -95,6 +95,9 @@ class FormatTokenLexer {
void handleCSharpVerbatimAndInterpolatedStrings();
+ // Handles TableGen multiline strings. It has the form [{ ... }].
+ void handleTableGenMultilineString();
+
void tryParsePythonComment();
bool tryMerge_TMacro();
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 24ce18a64348c1..661118970336a2 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -1710,7 +1710,7 @@ class AnnotatingParser {
TT_UnionLBrace, TT_RequiresClause,
TT_RequiresClauseInARequiresExpression, TT_RequiresExpression,
TT_RequiresExpressionLParen, TT_RequiresExpressionLBrace,
- TT_BracedListLBrace)) {
+ TT_BracedListLBrace, TT_TableGenMultiLineString)) {
CurrentToken->setType(TT_Unknown);
}
CurrentToken->Role.reset();
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index 92f57a77cdaf01..5ca6a76f840bdf 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -2193,6 +2193,11 @@ TEST_F(TokenAnnotatorTest, UnderstandTableGenTokens) {
ASSERT_TRUE(Keywords.isTableGenDefinition(*Tokens[0]));
ASSERT_TRUE(Tokens[0]->is(Keywords.kw_def));
ASSERT_TRUE(Tokens[1]->is(TT_StartOfName));
+
+ // Code, the multiline string token.
+ Tokens = Annotate("[{ code is multiline string }]");
+ ASSERT_EQ(Tokens.size(), 2u) << Tokens;
+ EXPECT_TOKEN(Tokens[0], tok::string_literal, TT_TableGenMultiLineString);
}
TEST_F(TokenAnnotatorTest, UnderstandConstructors) {
|
if (Style.isTableGen() && tryMergeTokens({tok::l_square, tok::l_brace}, | ||
TT_TableGenMultiLineString)) { | ||
// This must never be annotated as other types. | ||
Tokens.back()->setTypeIsFinalized(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd just call setFinalizedType(...)
instead of adding setTypeIsFinalized
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is to clarify the intention is only finalizing the merged token. And not setting type to something else.
How about that?
I know the both the plan does not differ so much. If you regard setTypeIsFinalized()
is unsuitable API, I will change the way.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wouldn't add that function, it is so specific. Normally you set the type.
Most likely tryMergeTokens
should just be refactored to call setFinalizedType
, that types may change happens only in the TokenAnnotator
all prior set types shouldn't change.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Changed to use setFinalizedType
.
I failed to get the idea of other usage of setTypeIsFinalized
. It may be so specific.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can just mark conversations as resolved. You still can comment on them. But if it's clear no need to keep it open.
if (Style.isTableGen() && tryMergeTokens({tok::l_square, tok::l_brace}, | ||
TT_TableGenMultiLineString)) { | ||
// This must never be annotated as other types. | ||
Tokens.back()->setTypeIsFinalized(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wouldn't add that function, it is so specific. Normally you set the type.
Most likely tryMergeTokens
should just be refactored to call setFinalizedType
, that types may change happens only in the TokenAnnotator
all prior set types shouldn't change.
c0c412e
to
fce68fa
Compare
Thank you very much! |
Support the handling of TableGen's multiline string (code) literal. That has the form, [{ this is the string possibly with multi line... }]
Support the handling of TableGen's multiline string (code) literal.
That has the form,
[{ this is the string possibly with multi line... }]
This is a separated part from #76059.