|
14 | 14 |
|
15 | 15 | #include "FormatTokenLexer.h"
|
16 | 16 | #include "FormatToken.h"
|
| 17 | +#include "clang/Basic/CharInfo.h" |
17 | 18 | #include "clang/Basic/SourceLocation.h"
|
18 | 19 | #include "clang/Basic/SourceManager.h"
|
19 | 20 | #include "clang/Format/Format.h"
|
@@ -1203,16 +1204,22 @@ static size_t countLeadingWhitespace(StringRef Text) {
|
1203 | 1204 | const unsigned char *const End = Text.bytes_end();
|
1204 | 1205 | const unsigned char *Cur = Begin;
|
1205 | 1206 | while (Cur < End) {
|
1206 |
| - if (isspace(Cur[0])) { |
| 1207 | + if (isWhitespace(Cur[0])) { |
1207 | 1208 | ++Cur;
|
1208 |
| - } else if (Cur[0] == '\\' && (Cur[1] == '\n' || Cur[1] == '\r')) { |
1209 |
| - // A '\' followed by a newline always escapes the newline, regardless |
1210 |
| - // of whether there is another '\' before it. |
| 1209 | + } else if (Cur[0] == '\\') { |
| 1210 | + // A backslash followed by optional horizontal whitespaces (P22232R2) and |
| 1211 | + // then a newline always escapes the newline. |
1211 | 1212 | // The source has a null byte at the end. So the end of the entire input
|
1212 | 1213 | // isn't reached yet. Also the lexer doesn't break apart an escaped
|
1213 | 1214 | // newline.
|
1214 |
| - assert(End - Cur >= 2); |
1215 |
| - Cur += 2; |
| 1215 | + const auto *Lookahead = Cur + 1; |
| 1216 | + while (isHorizontalWhitespace(*Lookahead)) |
| 1217 | + ++Lookahead; |
| 1218 | + // No line splice found; the backslash is a token. |
| 1219 | + if (!isVerticalWhitespace(*Lookahead)) |
| 1220 | + break; |
| 1221 | + // Splice found, consume it. |
| 1222 | + Cur = Lookahead + 1; |
1216 | 1223 | } else if (Cur[0] == '?' && Cur[1] == '?' && Cur[2] == '/' &&
|
1217 | 1224 | (Cur[3] == '\n' || Cur[3] == '\r')) {
|
1218 | 1225 | // Newlines can also be escaped by a '?' '?' '/' trigraph. By the way, the
|
@@ -1295,13 +1302,18 @@ FormatToken *FormatTokenLexer::getNextToken() {
|
1295 | 1302 | case '/':
|
1296 | 1303 | // The text was entirely whitespace when this loop was entered. Thus
|
1297 | 1304 | // this has to be an escape sequence.
|
1298 |
| - assert(Text.substr(i, 2) == "\\\r" || Text.substr(i, 2) == "\\\n" || |
1299 |
| - Text.substr(i, 4) == "\?\?/\r" || |
| 1305 | + assert(Text.substr(i, 4) == "\?\?/\r" || |
1300 | 1306 | Text.substr(i, 4) == "\?\?/\n" ||
|
1301 | 1307 | (i >= 1 && (Text.substr(i - 1, 4) == "\?\?/\r" ||
|
1302 | 1308 | Text.substr(i - 1, 4) == "\?\?/\n")) ||
|
1303 | 1309 | (i >= 2 && (Text.substr(i - 2, 4) == "\?\?/\r" ||
|
1304 |
| - Text.substr(i - 2, 4) == "\?\?/\n"))); |
| 1310 | + Text.substr(i - 2, 4) == "\?\?/\n")) || |
| 1311 | + (Text[i] == '\\' && [&]() -> bool { |
| 1312 | + size_t j = i + 1; |
| 1313 | + while (j < Text.size() && isHorizontalWhitespace(Text[j])) |
| 1314 | + ++j; |
| 1315 | + return j < Text.size() && (Text[j] == '\n' || Text[j] == '\r'); |
| 1316 | + }())); |
1305 | 1317 | InEscape = true;
|
1306 | 1318 | break;
|
1307 | 1319 | default:
|
|
0 commit comments