Skip to content

Commit dd47b84

Browse files
authored
[clang-format] Handle Trailing Whitespace After Line Continuation (P2223R2) (#145243)
Fixes #145226. Implement [P2223R2](https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2021/p2223r2.pdf) in clang-format to correctly handle cases where a backslash '\\' is followed by trailing whitespace before the newline. Previously, `clang-format` failed to properly detect and handle such cases, leading to misformatted code. With this, `clang-format` matches the behavior already implemented in Clang's lexer and `DependencyDirectivesScanner.cpp`, which allow trailing whitespace after a line continuation in any C++ standard.
1 parent 9a7720a commit dd47b84

File tree

2 files changed

+36
-9
lines changed

2 files changed

+36
-9
lines changed

clang/lib/Format/FormatTokenLexer.cpp

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
#include "FormatTokenLexer.h"
1616
#include "FormatToken.h"
17+
#include "clang/Basic/CharInfo.h"
1718
#include "clang/Basic/SourceLocation.h"
1819
#include "clang/Basic/SourceManager.h"
1920
#include "clang/Format/Format.h"
@@ -1203,16 +1204,22 @@ static size_t countLeadingWhitespace(StringRef Text) {
12031204
const unsigned char *const End = Text.bytes_end();
12041205
const unsigned char *Cur = Begin;
12051206
while (Cur < End) {
1206-
if (isspace(Cur[0])) {
1207+
if (isWhitespace(Cur[0])) {
12071208
++Cur;
1208-
} else if (Cur[0] == '\\' && (Cur[1] == '\n' || Cur[1] == '\r')) {
1209-
// A '\' followed by a newline always escapes the newline, regardless
1210-
// of whether there is another '\' before it.
1209+
} else if (Cur[0] == '\\') {
1210+
// A backslash followed by optional horizontal whitespaces (P22232R2) and
1211+
// then a newline always escapes the newline.
12111212
// The source has a null byte at the end. So the end of the entire input
12121213
// isn't reached yet. Also the lexer doesn't break apart an escaped
12131214
// newline.
1214-
assert(End - Cur >= 2);
1215-
Cur += 2;
1215+
const auto *Lookahead = Cur + 1;
1216+
while (isHorizontalWhitespace(*Lookahead))
1217+
++Lookahead;
1218+
// No line splice found; the backslash is a token.
1219+
if (!isVerticalWhitespace(*Lookahead))
1220+
break;
1221+
// Splice found, consume it.
1222+
Cur = Lookahead + 1;
12161223
} else if (Cur[0] == '?' && Cur[1] == '?' && Cur[2] == '/' &&
12171224
(Cur[3] == '\n' || Cur[3] == '\r')) {
12181225
// Newlines can also be escaped by a '?' '?' '/' trigraph. By the way, the
@@ -1295,13 +1302,18 @@ FormatToken *FormatTokenLexer::getNextToken() {
12951302
case '/':
12961303
// The text was entirely whitespace when this loop was entered. Thus
12971304
// this has to be an escape sequence.
1298-
assert(Text.substr(i, 2) == "\\\r" || Text.substr(i, 2) == "\\\n" ||
1299-
Text.substr(i, 4) == "\?\?/\r" ||
1305+
assert(Text.substr(i, 4) == "\?\?/\r" ||
13001306
Text.substr(i, 4) == "\?\?/\n" ||
13011307
(i >= 1 && (Text.substr(i - 1, 4) == "\?\?/\r" ||
13021308
Text.substr(i - 1, 4) == "\?\?/\n")) ||
13031309
(i >= 2 && (Text.substr(i - 2, 4) == "\?\?/\r" ||
1304-
Text.substr(i - 2, 4) == "\?\?/\n")));
1310+
Text.substr(i - 2, 4) == "\?\?/\n")) ||
1311+
(Text[i] == '\\' && [&]() -> bool {
1312+
size_t j = i + 1;
1313+
while (j < Text.size() && isHorizontalWhitespace(Text[j]))
1314+
++j;
1315+
return j < Text.size() && (Text[j] == '\n' || Text[j] == '\r');
1316+
}()));
13051317
InEscape = true;
13061318
break;
13071319
default:

clang/unittests/Format/FormatTest.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25768,6 +25768,21 @@ TEST_F(FormatTest, OperatorPassedAsAFunctionPtr) {
2576825768
verifyFormat("foo(operator, , -42);", Style);
2576925769
}
2577025770

25771+
TEST_F(FormatTest, LineSpliceWithTrailingWhitespace) {
25772+
auto Style = getLLVMStyle();
25773+
Style.AlignEscapedNewlines = FormatStyle::ENAS_DontAlign;
25774+
Style.UseTab = FormatStyle::UT_Never;
25775+
25776+
verifyFormat("int i;", " \\ \n"
25777+
" int i;");
25778+
verifyFormat("#define FOO(args) \\\n"
25779+
" struct a {};",
25780+
"#define FOO( args ) \\ \n"
25781+
"struct a{\\\t\t\t\n"
25782+
" };",
25783+
Style);
25784+
}
25785+
2577125786
TEST_F(FormatTest, WhitespaceSensitiveMacros) {
2577225787
FormatStyle Style = getLLVMStyle();
2577325788
Style.WhitespaceSensitiveMacros.push_back("FOO");

0 commit comments

Comments
 (0)