Skip to content

Commit 9269c5c

Browse files
authored
Merge pull request #34414 from maustinstar/sr-10011
[SR-10011] [Lexer] Raw Strings escape character sequence resembling multiline delimiter
2 parents 290923c + 75eed47 commit 9269c5c

File tree

3 files changed

+98
-19
lines changed

3 files changed

+98
-19
lines changed

lib/Parse/Lexer.cpp

Lines changed: 40 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1241,19 +1241,6 @@ static bool diagnoseZeroWidthMatchAndAdvance(char Target, const char *&CurPtr,
12411241
return *CurPtr == Target && CurPtr++;
12421242
}
12431243

1244-
/// advanceIfMultilineDelimiter - Centralized check for multiline delimiter.
1245-
static bool advanceIfMultilineDelimiter(const char *&CurPtr,
1246-
DiagnosticEngine *Diags) {
1247-
const char *TmpPtr = CurPtr;
1248-
if (*(TmpPtr - 1) == '"' &&
1249-
diagnoseZeroWidthMatchAndAdvance('"', TmpPtr, Diags) &&
1250-
diagnoseZeroWidthMatchAndAdvance('"', TmpPtr, Diags)) {
1251-
CurPtr = TmpPtr;
1252-
return true;
1253-
}
1254-
return false;
1255-
}
1256-
12571244
/// advanceIfCustomDelimiter - Extracts/detects any custom delimiter on
12581245
/// opening a string literal, advances CurPtr if a delimiter is found and
12591246
/// returns a non-zero delimiter length. CurPtr[-1] must be '#' when called.
@@ -1300,6 +1287,37 @@ static bool delimiterMatches(unsigned CustomDelimiterLen, const char *&BytesPtr,
13001287
return true;
13011288
}
13021289

1290+
/// advanceIfMultilineDelimiter - Centralized check for multiline delimiter.
1291+
static bool advanceIfMultilineDelimiter(unsigned CustomDelimiterLen,
1292+
const char *&CurPtr,
1293+
DiagnosticEngine *Diags,
1294+
bool IsOpening = false) {
1295+
1296+
// Test for single-line string literals that resemble multiline delimiter.
1297+
const char *TmpPtr = CurPtr + 1;
1298+
if (IsOpening && CustomDelimiterLen) {
1299+
while (*TmpPtr != '\r' && *TmpPtr != '\n') {
1300+
if (*TmpPtr == '"') {
1301+
if (delimiterMatches(CustomDelimiterLen, ++TmpPtr, nullptr)) {
1302+
return false;
1303+
}
1304+
continue;
1305+
}
1306+
++TmpPtr;
1307+
}
1308+
}
1309+
1310+
TmpPtr = CurPtr;
1311+
if (*(TmpPtr - 1) == '"' &&
1312+
diagnoseZeroWidthMatchAndAdvance('"', TmpPtr, Diags) &&
1313+
diagnoseZeroWidthMatchAndAdvance('"', TmpPtr, Diags)) {
1314+
CurPtr = TmpPtr;
1315+
return true;
1316+
}
1317+
1318+
return false;
1319+
}
1320+
13031321
/// lexCharacter - Read a character and return its UTF32 code. If this is the
13041322
/// end of enclosing string/character sequence (i.e. the character is equal to
13051323
/// 'StopQuote'), this returns ~0U and advances 'CurPtr' pointing to the end of
@@ -1342,7 +1360,8 @@ unsigned Lexer::lexCharacter(const char *&CurPtr, char StopQuote,
13421360

13431361
DiagnosticEngine *D = EmitDiagnostics ? Diags : nullptr;
13441362
auto TmpPtr = CurPtr;
1345-
if (IsMultilineString && !advanceIfMultilineDelimiter(TmpPtr, D))
1363+
if (IsMultilineString &&
1364+
!advanceIfMultilineDelimiter(CustomDelimiterLen, TmpPtr, D))
13461365
return '"';
13471366
if (CustomDelimiterLen &&
13481367
!delimiterMatches(CustomDelimiterLen, TmpPtr, D, /*IsClosing=*/true))
@@ -1478,7 +1497,9 @@ static const char *skipToEndOfInterpolatedExpression(const char *CurPtr,
14781497
if (!inStringLiteral()) {
14791498
// Open string literal.
14801499
OpenDelimiters.push_back(CurPtr[-1]);
1481-
AllowNewline.push_back(advanceIfMultilineDelimiter(CurPtr, nullptr));
1500+
AllowNewline.push_back(advanceIfMultilineDelimiter(CustomDelimiterLen,
1501+
CurPtr, nullptr,
1502+
true));
14821503
CustomDelimiter.push_back(CustomDelimiterLen);
14831504
continue;
14841505
}
@@ -1490,7 +1511,8 @@ static const char *skipToEndOfInterpolatedExpression(const char *CurPtr,
14901511
continue;
14911512

14921513
// Multi-line string can only be closed by '"""'.
1493-
if (AllowNewline.back() && !advanceIfMultilineDelimiter(CurPtr, nullptr))
1514+
if (AllowNewline.back() &&
1515+
!advanceIfMultilineDelimiter(CustomDelimiterLen, CurPtr, nullptr))
14941516
continue;
14951517

14961518
// Check whether we have equivalent number of '#'s.
@@ -1827,7 +1849,8 @@ void Lexer::lexStringLiteral(unsigned CustomDelimiterLen) {
18271849
// diagnostics about changing them to double quotes.
18281850
assert((QuoteChar == '"' || QuoteChar == '\'') && "Unexpected start");
18291851

1830-
bool IsMultilineString = advanceIfMultilineDelimiter(CurPtr, Diags);
1852+
bool IsMultilineString = advanceIfMultilineDelimiter(CustomDelimiterLen,
1853+
CurPtr, Diags, true);
18311854
if (IsMultilineString && *CurPtr != '\n' && *CurPtr != '\r')
18321855
diagnose(CurPtr, diag::lex_illegal_multiline_string_start)
18331856
.fixItInsert(Lexer::getSourceLoc(CurPtr), "\n");

test/Parse/raw_string.swift

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,49 @@ _ = ##"""
6868
"""##
6969
// CHECK: "a raw string with \"\"\" in it"
7070

71+
// ===---------- False Multiline Delimiters --------===
72+
73+
/// Source code contains zero-width character in this format: `#"[U+200B]"[U+200B]"#`
74+
/// The check contains zero-width character in this format: `"[U+200B]\"[U+200B]"`
75+
/// If this check fails after you implement `diagnoseZeroWidthMatchAndAdvance`,
76+
/// then you may need to tweak how to test for single-line string literals that
77+
/// resemble a multiline delimiter in `advanceIfMultilineDelimiter` so that it
78+
/// passes again.
79+
/// See https://bugs.swift.org/browse/SR-8678
80+
_ = #"​"​"#
81+
// CHECK: "​\"​"
82+
83+
_ = #""""#
84+
// CHECK: "\"\""
85+
86+
_ = #"""""#
87+
// CHECK: "\"\"\""
88+
89+
_ = #""""""#
90+
// CHECK: "\"\"\"\""
91+
92+
_ = #"""#
93+
// CHECK: "\""
94+
95+
_ = ##""" foo # "# "##
96+
// CHECK: "\"\" foo # \"# "
97+
98+
_ = ###""" "# "## "###
99+
// CHECK: "\"\" \"# \"## "
100+
101+
_ = ###"""##"###
102+
// CHECK: "\"\"##"
103+
104+
_ = "interpolating \(#"""false delimiter"#)"
105+
// CHECK: "interpolating "
106+
// CHECK: "\"\"false delimiter"
107+
108+
_ = """
109+
interpolating \(#"""false delimiters"""#)
110+
"""
111+
// CHECK: "interpolating "
112+
// CHECK: "\"\"false delimiters\"\""
113+
71114
let foo = "Interpolation"
72115
_ = #"\b\b \#(foo)\#(foo) Kappa"#
73116
// CHECK: "\\b\\b "

test/Parse/raw_string_errors.swift

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@ let _ = #"\##("invalid")"#
99
// expected-error@-1{{too many '#' characters in delimited escape}}
1010
// expected-error@-2{{invalid escape sequence in literal}}
1111

12+
let _ = ###"""invalid"######
13+
// expected-error@-1{{too many '#' characters in closing delimiter}}{{26-29=}}
14+
// expected-error@-2{{consecutive statements on a line must be separated by ';'}}
15+
// expected-error@-3{{expected expression}}
16+
1217
let _ = ####"invalid"###
1318
// expected-error@-1{{unterminated string literal}}
1419
@@ -17,8 +22,16 @@ let _ = ###"invalid"######
1722
// expected-error@-2{{consecutive statements on a line must be separated by ';'}}
1823
// expected-error@-3{{expected expression}}
1924

20-
let _ = ##"""##
25+
let _ = ##"""aa
2126
foobar
22-
##"""##
27+
aa"""##
2328
// expected-error@-3{{multi-line string literal content must begin on a new line}}{{14-14=\n}}
2429
// expected-error@-2{{multi-line string literal closing delimiter must begin on a new line}}{{5-5=\n}}
30+
31+
let _ = #""" foo "bar" #baz
32+
"""#
33+
// expected-error@-2{{multi-line string literal content must begin on a new line}}{{13-13=\n}}
34+
35+
let _ = ###""" "# "##
36+
"""###
37+
// expected-error@-2{{multi-line string literal content must begin on a new line}}{{15-15=\n}}

0 commit comments

Comments
 (0)