Skip to content

Commit 66bfdd7

Browse files
committed
[clang-format] Handle C++ keywords in other languages better (llvm#132941)
There is some code to make sure that C++ keywords that are identifiers in the other languages are not treated as keywords. Right now, the kind is set to identifier, and the identifier info is cleared. The latter is probably so that the code for identifying C++ structures does not recognize those structures by mistake when formatting a language that does not have those structures. But we did not find an instance where the language can have the sequence of tokens, the code tries to parse the structure as if it is C++ using the identifier info instead of the token kind, but without checking for the language setting. However, there are places where the code checks whether the identifier info field is null or not. They are places where an identifier and a keyword are treated the same way. For example, the name of a function in JavaScript. This patch removes the lines that clear the identifier info. This way, a C++ keyword gets treated in the same way as an identifier in those places. JavaScript New ```JavaScript async function union( myparamnameiswaytooloooong) { } ``` Old ```JavaScript async function union( myparamnameiswaytooloooong) { } ``` Java New ```Java enum union { ABC, CDE } ``` Old ```Java enum union { ABC, CDE } ```
1 parent b0338c3 commit 66bfdd7

File tree

3 files changed

+29
-19
lines changed

3 files changed

+29
-19
lines changed

clang/lib/Format/FormatTokenLexer.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1306,15 +1306,12 @@ FormatToken *FormatTokenLexer::getNextToken() {
13061306
FormatTok->isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete,
13071307
tok::kw_operator)) {
13081308
FormatTok->Tok.setKind(tok::identifier);
1309-
FormatTok->Tok.setIdentifierInfo(nullptr);
13101309
} else if (Style.isJavaScript() &&
13111310
FormatTok->isOneOf(tok::kw_struct, tok::kw_union,
13121311
tok::kw_operator)) {
13131312
FormatTok->Tok.setKind(tok::identifier);
1314-
FormatTok->Tok.setIdentifierInfo(nullptr);
13151313
} else if (Style.isTableGen() && !Keywords.isTableGenKeyword(*FormatTok)) {
13161314
FormatTok->Tok.setKind(tok::identifier);
1317-
FormatTok->Tok.setIdentifierInfo(nullptr);
13181315
}
13191316
} else if (FormatTok->is(tok::greatergreater)) {
13201317
FormatTok->Tok.setKind(tok::greater);

clang/unittests/Format/FormatTestJS.cpp

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -828,20 +828,25 @@ TEST_F(FormatTestJS, AsyncFunctions) {
828828
"} ");
829829
// clang-format must not insert breaks between async and function, otherwise
830830
// automatic semicolon insertion may trigger (in particular in a class body).
831+
auto Style = getGoogleJSStyleWithColumns(10);
831832
verifyFormat("async function\n"
832833
"hello(\n"
833834
" myparamnameiswaytooloooong) {\n"
834835
"}",
835-
"async function hello(myparamnameiswaytooloooong) {}",
836-
getGoogleJSStyleWithColumns(10));
836+
"async function hello(myparamnameiswaytooloooong) {}", Style);
837+
verifyFormat("async function\n"
838+
"union(\n"
839+
" myparamnameiswaytooloooong) {\n"
840+
"}",
841+
Style);
837842
verifyFormat("class C {\n"
838843
" async hello(\n"
839844
" myparamnameiswaytooloooong) {\n"
840845
" }\n"
841846
"}",
842847
"class C {\n"
843848
" async hello(myparamnameiswaytooloooong) {} }",
844-
getGoogleJSStyleWithColumns(10));
849+
Style);
845850
verifyFormat("async function* f() {\n"
846851
" yield fetch(x);\n"
847852
"}");
@@ -1338,15 +1343,16 @@ TEST_F(FormatTestJS, WrapRespectsAutomaticSemicolonInsertion) {
13381343
// The following statements must not wrap, as otherwise the program meaning
13391344
// would change due to automatic semicolon insertion.
13401345
// See http://www.ecma-international.org/ecma-262/5.1/#sec-7.9.1.
1341-
verifyFormat("return aaaaa;", getGoogleJSStyleWithColumns(10));
1342-
verifyFormat("yield aaaaa;", getGoogleJSStyleWithColumns(10));
1343-
verifyFormat("return /* hello! */ aaaaa;", getGoogleJSStyleWithColumns(10));
1344-
verifyFormat("continue aaaaa;", getGoogleJSStyleWithColumns(10));
1345-
verifyFormat("continue /* hello! */ aaaaa;", getGoogleJSStyleWithColumns(10));
1346-
verifyFormat("break aaaaa;", getGoogleJSStyleWithColumns(10));
1347-
verifyFormat("throw aaaaa;", getGoogleJSStyleWithColumns(10));
1348-
verifyFormat("aaaaaaaaa++;", getGoogleJSStyleWithColumns(10));
1349-
verifyFormat("aaaaaaaaa--;", getGoogleJSStyleWithColumns(10));
1346+
auto Style = getGoogleJSStyleWithColumns(10);
1347+
verifyFormat("return aaaaa;", Style);
1348+
verifyFormat("yield aaaaa;", Style);
1349+
verifyFormat("return /* hello! */ aaaaa;", Style);
1350+
verifyFormat("continue aaaaa;", Style);
1351+
verifyFormat("continue /* hello! */ aaaaa;", Style);
1352+
verifyFormat("break aaaaa;", Style);
1353+
verifyFormat("throw aaaaa;", Style);
1354+
verifyFormat("aaaaaaaaa++;", Style);
1355+
verifyFormat("aaaaaaaaa--;", Style);
13501356
verifyFormat("return [\n"
13511357
" aaa\n"
13521358
"];",
@@ -1366,12 +1372,13 @@ TEST_F(FormatTestJS, WrapRespectsAutomaticSemicolonInsertion) {
13661372
// Ideally the foo() bit should be indented relative to the async function().
13671373
verifyFormat("async function\n"
13681374
"foo() {}",
1369-
getGoogleJSStyleWithColumns(10));
1370-
verifyFormat("await theReckoning;", getGoogleJSStyleWithColumns(10));
1371-
verifyFormat("some['a']['b']", getGoogleJSStyleWithColumns(10));
1375+
Style);
1376+
verifyFormat("await theReckoning;", Style);
1377+
verifyFormat("some['a']['b']", Style);
1378+
verifyFormat("union['a']['b']", Style);
13721379
verifyFormat("x = (a['a']\n"
13731380
" ['b']);",
1374-
getGoogleJSStyleWithColumns(10));
1381+
Style);
13751382
verifyFormat("function f() {\n"
13761383
" return foo.bar(\n"
13771384
" (param): param is {\n"
@@ -2500,6 +2507,10 @@ TEST_F(FormatTestJS, NonNullAssertionOperator) {
25002507
TEST_F(FormatTestJS, CppKeywords) {
25012508
// Make sure we don't mess stuff up because of C++ keywords.
25022509
verifyFormat("return operator && (aa);");
2510+
verifyFormat("enum operator {\n"
2511+
" A = 1,\n"
2512+
" B\n"
2513+
"}");
25032514
// .. or QT ones.
25042515
verifyFormat("const slots: Slot[];");
25052516
// use the "!" assertion operator to validate that clang-format understands

clang/unittests/Format/FormatTestJava.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,8 @@ TEST_F(FormatTestJava, AnonymousClasses) {
158158

159159
TEST_F(FormatTestJava, EnumDeclarations) {
160160
verifyFormat("enum SomeThing { ABC, CDE }");
161+
// A C++ keyword should not mess things up.
162+
verifyFormat("enum union { ABC, CDE }");
161163
verifyFormat("enum SomeThing {\n"
162164
" ABC,\n"
163165
" CDE,\n"

0 commit comments

Comments
 (0)