Skip to content

Commit c0fcc1a

Browse files
committed
[Parse] An implementation for SE-0182
1 parent 3f308b7 commit c0fcc1a

File tree

3 files changed

+91
-2
lines changed

3 files changed

+91
-2
lines changed

lib/Parse/Lexer.cpp

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1189,6 +1189,27 @@ unsigned Lexer::lexUnicodeEscape(const char *&CurPtr, Lexer *Diags) {
11891189
return CharValue;
11901190
}
11911191

1192+
/// maybeConsumeNewlineEscape - Check for valid elided newline escape and
1193+
/// move pointer passed in to the character after the end of the line.
1194+
static bool maybeConsumeNewlineEscape(const char *&CurPtr, ssize_t Offset) {
1195+
const char *TmpPtr = CurPtr + Offset;
1196+
while (true) {
1197+
switch (*TmpPtr++) {
1198+
case ' ': case '\t':
1199+
continue;
1200+
case '\r':
1201+
if (*TmpPtr == '\n')
1202+
TmpPtr++;
1203+
LLVM_FALLTHROUGH;
1204+
case '\n':
1205+
CurPtr = TmpPtr;
1206+
return true;
1207+
case 0:
1208+
default:
1209+
return false;
1210+
}
1211+
}
1212+
}
11921213

11931214
/// lexCharacter - Read a character and return its UTF32 code. If this is the
11941215
/// end of enclosing string/character sequence (i.e. the character is equal to
@@ -1254,6 +1275,10 @@ unsigned Lexer::lexCharacter(const char *&CurPtr, char StopQuote,
12541275
unsigned CharValue = 0;
12551276
// Escape processing. We already ate the "\".
12561277
switch (*CurPtr) {
1278+
case ' ': case '\t': case '\n': case '\r':
1279+
if (MultilineString && maybeConsumeNewlineEscape(CurPtr, 0))
1280+
return '\n';
1281+
LLVM_FALLTHROUGH;
12571282
default: // Invalid escape.
12581283
if (EmitDiagnostics)
12591284
diagnose(CurPtr, diag::lex_invalid_escape);
@@ -1380,7 +1405,11 @@ static const char *skipToEndOfInterpolatedExpression(const char *CurPtr,
13801405
// Entering a recursive interpolated expression
13811406
OpenDelimiters.push_back('(');
13821407
continue;
1383-
case '\n': case '\r': case 0:
1408+
case '\n': case '\r':
1409+
if (AllowNewline.back())
1410+
continue;
1411+
LLVM_FALLTHROUGH;
1412+
case 0:
13841413
// Don't jump over newline/EOF due to preceding backslash!
13851414
return CurPtr-1;
13861415
default:
@@ -1883,12 +1912,14 @@ StringRef Lexer::getEncodedStringSegment(StringRef Bytes,
18831912
// we know that there is a terminating " character. Use BytesPtr to avoid a
18841913
// range check subscripting on the StringRef.
18851914
const char *BytesPtr = Bytes.begin();
1915+
bool IsEscapedNewline = false;
18861916
while (BytesPtr < Bytes.end()) {
18871917
char CurChar = *BytesPtr++;
18881918

18891919
// Multiline string line ending normalization and indent stripping.
18901920
if (CurChar == '\r' || CurChar == '\n') {
1891-
bool stripNewline = IsFirstSegment && BytesPtr - 1 == Bytes.begin();
1921+
bool stripNewline = IsEscapedNewline ||
1922+
(IsFirstSegment && BytesPtr - 1 == Bytes.begin());
18921923
if (CurChar == '\r' && *BytesPtr == '\n')
18931924
BytesPtr++;
18941925
if (*BytesPtr != '\r' && *BytesPtr != '\n')
@@ -1897,6 +1928,7 @@ StringRef Lexer::getEncodedStringSegment(StringRef Bytes,
18971928
stripNewline = true;
18981929
if (!stripNewline)
18991930
TempString.push_back('\n');
1931+
IsEscapedNewline = false;
19001932
continue;
19011933
}
19021934

@@ -1921,6 +1953,12 @@ StringRef Lexer::getEncodedStringSegment(StringRef Bytes,
19211953
case '\'': TempString.push_back('\''); continue;
19221954
case '\\': TempString.push_back('\\'); continue;
19231955

1956+
case ' ': case '\t': case '\n': case '\r':
1957+
if (maybeConsumeNewlineEscape(BytesPtr, -1)) {
1958+
IsEscapedNewline = true;
1959+
BytesPtr--;
1960+
}
1961+
continue;
19241962

19251963
// String interpolation.
19261964
case '(':

test/Parse/multiline_errors.swift

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,3 +120,9 @@ _ = "hello\(
120120
""")!"
121121
// expected-error@-4 {{unterminated string literal}}
122122
// expected-error@-2 {{unterminated string literal}}
123+
124+
_ = """
125+
line one \ non-whitepace
126+
line two
127+
"""
128+
// expected-error@-3 {{invalid escape sequence in literal}}

test/Parse/multiline_string.swift

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,51 @@ _ = """
108108
"""
109109
// CHECK: "Twelve\nNu"
110110

111+
_ = """
112+
newline \
113+
elided
114+
"""
115+
// CHECK: "newline elided"
116+
117+
// contains trailing whitepsace
118+
_ = """
119+
trailing \
120+
\("""
121+
substring1 \
122+
\("""
123+
substring2 \
124+
substring3
125+
""")\
126+
""") \
127+
whitepsace
128+
"""
129+
// CHECK: "trailing "
130+
// CHECK: "substring1 "
131+
// CHECK: "substring2 substring3"
132+
// CHECK: " whitepsace"
133+
134+
// contains trailing whitepsace
135+
_ = """
136+
foo\
137+
138+
bar
139+
"""
140+
// CHECK: "foo\nbar"
141+
142+
// contains trailing whitepsace
143+
_ = """
144+
foo\
145+
146+
bar
147+
"""
148+
// CHECK: "foo\nbar"
149+
150+
_ = """
151+
foo \
152+
bar
153+
"""
154+
// CHECK: "foo bar"
155+
111156
_ = """
112157
113158
ABC

0 commit comments

Comments
 (0)