Skip to content

Commit ebffdd1

Browse files
johnno1962DougGregor
authored andcommitted
[Parse] An implementation for SE-0182
(cherry picked from commit c0fcc1a)
1 parent c176490 commit ebffdd1

File tree

3 files changed

+91
-2
lines changed

3 files changed

+91
-2
lines changed

lib/Parse/Lexer.cpp

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1122,6 +1122,27 @@ unsigned Lexer::lexUnicodeEscape(const char *&CurPtr, Lexer *Diags) {
11221122
return CharValue;
11231123
}
11241124

1125+
/// maybeConsumeNewlineEscape - Check for valid elided newline escape and
1126+
/// move pointer passed in to the character after the end of the line.
1127+
static bool maybeConsumeNewlineEscape(const char *&CurPtr, ssize_t Offset) {
1128+
const char *TmpPtr = CurPtr + Offset;
1129+
while (true) {
1130+
switch (*TmpPtr++) {
1131+
case ' ': case '\t':
1132+
continue;
1133+
case '\r':
1134+
if (*TmpPtr == '\n')
1135+
TmpPtr++;
1136+
LLVM_FALLTHROUGH;
1137+
case '\n':
1138+
CurPtr = TmpPtr;
1139+
return true;
1140+
case 0:
1141+
default:
1142+
return false;
1143+
}
1144+
}
1145+
}
11251146

11261147
/// lexCharacter - Read a character and return its UTF32 code. If this is the
11271148
/// end of enclosing string/character sequence (i.e. the character is equal to
@@ -1187,6 +1208,10 @@ unsigned Lexer::lexCharacter(const char *&CurPtr, char StopQuote,
11871208
unsigned CharValue = 0;
11881209
// Escape processing. We already ate the "\".
11891210
switch (*CurPtr) {
1211+
case ' ': case '\t': case '\n': case '\r':
1212+
if (MultilineString && maybeConsumeNewlineEscape(CurPtr, 0))
1213+
return '\n';
1214+
LLVM_FALLTHROUGH;
11901215
default: // Invalid escape.
11911216
if (EmitDiagnostics)
11921217
diagnose(CurPtr, diag::lex_invalid_escape);
@@ -1313,7 +1338,11 @@ static const char *skipToEndOfInterpolatedExpression(const char *CurPtr,
13131338
// Entering a recursive interpolated expression
13141339
OpenDelimiters.push_back('(');
13151340
continue;
1316-
case '\n': case '\r': case 0:
1341+
case '\n': case '\r':
1342+
if (AllowNewline.back())
1343+
continue;
1344+
LLVM_FALLTHROUGH;
1345+
case 0:
13171346
// Don't jump over newline/EOF due to preceding backslash!
13181347
return CurPtr-1;
13191348
default:
@@ -1816,12 +1845,14 @@ StringRef Lexer::getEncodedStringSegment(StringRef Bytes,
18161845
// we know that there is a terminating " character. Use BytesPtr to avoid a
18171846
// range check subscripting on the StringRef.
18181847
const char *BytesPtr = Bytes.begin();
1848+
bool IsEscapedNewline = false;
18191849
while (BytesPtr < Bytes.end()) {
18201850
char CurChar = *BytesPtr++;
18211851

18221852
// Multiline string line ending normalization and indent stripping.
18231853
if (CurChar == '\r' || CurChar == '\n') {
1824-
bool stripNewline = IsFirstSegment && BytesPtr - 1 == Bytes.begin();
1854+
bool stripNewline = IsEscapedNewline ||
1855+
(IsFirstSegment && BytesPtr - 1 == Bytes.begin());
18251856
if (CurChar == '\r' && *BytesPtr == '\n')
18261857
BytesPtr++;
18271858
if (*BytesPtr != '\r' && *BytesPtr != '\n')
@@ -1830,6 +1861,7 @@ StringRef Lexer::getEncodedStringSegment(StringRef Bytes,
18301861
stripNewline = true;
18311862
if (!stripNewline)
18321863
TempString.push_back('\n');
1864+
IsEscapedNewline = false;
18331865
continue;
18341866
}
18351867

@@ -1854,6 +1886,12 @@ StringRef Lexer::getEncodedStringSegment(StringRef Bytes,
18541886
case '\'': TempString.push_back('\''); continue;
18551887
case '\\': TempString.push_back('\\'); continue;
18561888

1889+
case ' ': case '\t': case '\n': case '\r':
1890+
if (maybeConsumeNewlineEscape(BytesPtr, -1)) {
1891+
IsEscapedNewline = true;
1892+
BytesPtr--;
1893+
}
1894+
continue;
18571895

18581896
// String interpolation.
18591897
case '(':

test/Parse/multiline_errors.swift

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,3 +120,9 @@ _ = "hello\(
120120
""")!"
121121
// expected-error@-4 {{unterminated string literal}}
122122
// expected-error@-2 {{unterminated string literal}}
123+
124+
_ = """
125+
line one \ non-whitepace
126+
line two
127+
"""
128+
// expected-error@-3 {{invalid escape sequence in literal}}

test/Parse/multiline_string.swift

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,51 @@ _ = """
108108
"""
109109
// CHECK: "Twelve\nNu"
110110

111+
_ = """
112+
newline \
113+
elided
114+
"""
115+
// CHECK: "newline elided"
116+
117+
// contains trailing whitepsace
118+
_ = """
119+
trailing \
120+
\("""
121+
substring1 \
122+
\("""
123+
substring2 \
124+
substring3
125+
""")\
126+
""") \
127+
whitepsace
128+
"""
129+
// CHECK: "trailing "
130+
// CHECK: "substring1 "
131+
// CHECK: "substring2 substring3"
132+
// CHECK: " whitepsace"
133+
134+
// contains trailing whitepsace
135+
_ = """
136+
foo\
137+
138+
bar
139+
"""
140+
// CHECK: "foo\nbar"
141+
142+
// contains trailing whitepsace
143+
_ = """
144+
foo\
145+
146+
bar
147+
"""
148+
// CHECK: "foo\nbar"
149+
150+
_ = """
151+
foo \
152+
bar
153+
"""
154+
// CHECK: "foo bar"
155+
111156
_ = """
112157
113158
ABC

0 commit comments

Comments
 (0)