Skip to content

Commit 4da8cbe

Browse files
johnno1962beccadax
authored andcommitted
Implement SE-0200 (extended escaping in string literals)
Supports string literals like #"foo"\n"bar"#.
1 parent 5e2b705 commit 4da8cbe

File tree

9 files changed

+357
-79
lines changed

9 files changed

+357
-79
lines changed

include/swift/AST/DiagnosticsParse.def

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,10 @@ ERROR(lex_invalid_u_escape,none,
138138
"\\u{...} escape sequence expects between 1 and 8 hex digits", ())
139139
ERROR(lex_invalid_u_escape_rbrace,none,
140140
"expected '}' in \\u{...} escape sequence", ())
141+
ERROR(lex_invalid_escape_delimiter,none,
142+
"too many '#' characters in delimited escape", ())
143+
ERROR(lex_invalid_closing_delimiter,none,
144+
"too many '#' characters in closing delimiter", ())
141145

142146
ERROR(lex_invalid_unicode_scalar,none,
143147
"invalid unicode scalar", ())

include/swift/Parse/Lexer.h

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -364,19 +364,21 @@ class Lexer {
364364
enum : char { Literal, Expr } Kind;
365365
// Loc+Length for the segment inside the string literal, without quotes.
366366
SourceLoc Loc;
367-
unsigned Length, IndentToStrip;
367+
unsigned Length, IndentToStrip, CustomDelimiterLen;
368368
bool IsFirstSegment, IsLastSegment;
369369

370370
static StringSegment getLiteral(SourceLoc Loc, unsigned Length,
371371
bool IsFirstSegment, bool IsLastSegment,
372-
unsigned IndentToStrip) {
372+
unsigned IndentToStrip,
373+
unsigned CustomDelimiterLen) {
373374
StringSegment Result;
374375
Result.Kind = Literal;
375376
Result.Loc = Loc;
376377
Result.Length = Length;
377378
Result.IsFirstSegment = IsFirstSegment;
378379
Result.IsLastSegment = IsLastSegment;
379380
Result.IndentToStrip = IndentToStrip;
381+
Result.CustomDelimiterLen = CustomDelimiterLen;
380382
return Result;
381383
}
382384

@@ -388,6 +390,7 @@ class Lexer {
388390
Result.IsFirstSegment = false;
389391
Result.IsLastSegment = false;
390392
Result.IndentToStrip = 0;
393+
Result.CustomDelimiterLen = 0;
391394
return Result;
392395
}
393396

@@ -404,13 +407,14 @@ class Lexer {
404407
SmallVectorImpl<char> &Buffer,
405408
bool IsFirstSegment = false,
406409
bool IsLastSegment = false,
407-
unsigned IndentToStrip = 0);
410+
unsigned IndentToStrip = 0,
411+
unsigned CustomDelimiterLen = 0);
408412
StringRef getEncodedStringSegment(StringSegment Segment,
409413
SmallVectorImpl<char> &Buffer) const {
410414
return getEncodedStringSegment(
411415
StringRef(getBufferPtrForSourceLoc(Segment.Loc), Segment.Length),
412416
Buffer, Segment.IsFirstSegment, Segment.IsLastSegment,
413-
Segment.IndentToStrip);
417+
Segment.IndentToStrip, Segment.CustomDelimiterLen);
414418
}
415419

416420
/// \brief Given a string literal token, separate it into string/expr segments
@@ -474,7 +478,8 @@ class Lexer {
474478
return diagnose(Loc, Diagnostic(DiagID, std::forward<ArgTypes>(Args)...));
475479
}
476480

477-
void formToken(tok Kind, const char *TokStart, bool MultilineString = false);
481+
void formToken(tok Kind, const char *TokStart, bool IsMultilineString = false,
482+
unsigned CustomDelimiterLen = 0);
478483
void formEscapedIdentifierToken(const char *TokStart);
479484

480485
/// Advance to the end of the line.
@@ -498,10 +503,10 @@ class Lexer {
498503
void lexTrivia(syntax::Trivia &T, bool IsForTrailingTrivia);
499504
static unsigned lexUnicodeEscape(const char *&CurPtr, Lexer *Diags);
500505

501-
unsigned lexCharacter(const char *&CurPtr,
502-
char StopQuote, bool EmitDiagnostics,
503-
bool MultilineString = false);
504-
void lexStringLiteral();
506+
unsigned lexCharacter(const char *&CurPtr, char StopQuote,
507+
bool EmitDiagnostics, bool IsMultilineString = false,
508+
unsigned CustomDelimiterLen = 0);
509+
void lexStringLiteral(unsigned CustomDelimiterLen = 0);
505510
void lexEscapedIdentifier();
506511

507512
void tryLexEditorPlaceholder();

include/swift/Parse/Token.h

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,10 @@ class Token {
4545
/// Modifiers for string literals
4646
unsigned MultilineString : 1;
4747

48-
// Padding bits == 32 - sizeof(Kind) * 8 - 3;
48+
/// Length of custom delimiter of "raw" string literals
49+
unsigned CustomDelimiterLen : 8;
50+
51+
// Padding bits == 32 - 11;
4952

5053
/// \brief The length of the comment that precedes the token.
5154
unsigned CommentLength;
@@ -62,8 +65,8 @@ class Token {
6265
public:
6366
Token(tok Kind, StringRef Text, unsigned CommentLength = 0)
6467
: Kind(Kind), AtStartOfLine(false), EscapedIdentifier(false),
65-
MultilineString(false), CommentLength(CommentLength),
66-
Text(Text) {}
68+
MultilineString(false), CustomDelimiterLen(0),
69+
CommentLength(CommentLength), Text(Text) {}
6770

6871
Token() : Token(tok::NUM_TOKENS, {}, 0) {}
6972

@@ -266,17 +269,24 @@ class Token {
266269

267270
/// \brief Set the token to the specified kind and source range.
268271
void setToken(tok K, StringRef T, unsigned CommentLength = 0,
269-
bool MultilineString = false) {
272+
bool IsMultilineString = false, unsigned CustomDelimiterLen = 0) {
270273
Kind = K;
271274
Text = T;
272275
this->CommentLength = CommentLength;
273276
EscapedIdentifier = false;
274-
this->MultilineString = MultilineString;
277+
this->MultilineString = IsMultilineString;
278+
this->CustomDelimiterLen = CustomDelimiterLen;
279+
assert(this->CustomDelimiterLen == CustomDelimiterLen &&
280+
"custom string delimiter length > 255");
275281
}
276282

277-
bool IsMultilineString() const {
283+
bool isMultilineString() const {
278284
return MultilineString;
279285
}
286+
287+
unsigned getCustomDelimiterLen() const {
288+
return CustomDelimiterLen;
289+
}
280290
};
281291

282292
} // end namespace swift

0 commit comments

Comments
 (0)