Skip to content

Implementation for SE-200 (raw string escaping) #19191

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion include/swift/AST/DiagnosticsParse.def
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,10 @@ ERROR(lex_invalid_u_escape,none,
"\\u{...} escape sequence expects between 1 and 8 hex digits", ())
ERROR(lex_invalid_u_escape_rbrace,none,
"expected '}' in \\u{...} escape sequence", ())
ERROR(lex_invalid_escape_delimiter,none,
"too many '#' characters in delimited escape", ())
ERROR(lex_invalid_closing_delimiter,none,
"too many '#' characters in closing delimiter", ())

ERROR(lex_invalid_unicode_scalar,none,
"invalid unicode scalar", ())
Expand Down Expand Up @@ -1302,7 +1306,9 @@ ERROR(swift_native_objc_runtime_base_must_be_identifier,none,
"@_swift_native_objc_runtime_base class name must be an identifier", ())

ERROR(attr_interpolated_string,none,
"%0 cannot be an interpolated string literal", (StringRef))
"'%0' cannot be an interpolated string literal", (StringRef))
ERROR(attr_extended_escaping_string,none,
"'%0' cannot be an extended escaping string literal", (StringRef))

ERROR(attr_only_at_non_local_scope, none,
"attribute '%0' can only be used in a non-local scope", (StringRef))
Expand Down
65 changes: 49 additions & 16 deletions include/swift/Parse/Lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -346,19 +346,21 @@ class Lexer {
enum : char { Literal, Expr } Kind;
// Loc+Length for the segment inside the string literal, without quotes.
SourceLoc Loc;
unsigned Length, IndentToStrip;
unsigned Length, IndentToStrip, CustomDelimiterLen;
bool IsFirstSegment, IsLastSegment;

static StringSegment getLiteral(SourceLoc Loc, unsigned Length,
bool IsFirstSegment, bool IsLastSegment,
unsigned IndentToStrip) {
unsigned IndentToStrip,
unsigned CustomDelimiterLen) {
StringSegment Result;
Result.Kind = Literal;
Result.Loc = Loc;
Result.Length = Length;
Result.IsFirstSegment = IsFirstSegment;
Result.IsLastSegment = IsLastSegment;
Result.IndentToStrip = IndentToStrip;
Result.CustomDelimiterLen = CustomDelimiterLen;
return Result;
}

Expand All @@ -370,6 +372,7 @@ class Lexer {
Result.IsFirstSegment = false;
Result.IsLastSegment = false;
Result.IndentToStrip = 0;
Result.CustomDelimiterLen = 0;
return Result;
}

Expand All @@ -378,21 +381,50 @@ class Lexer {
}

};


/// Implementation of getEncodedStringSegment. Note that \p Str must support
/// reading one byte past the end.
static StringRef getEncodedStringSegmentImpl(StringRef Str,
SmallVectorImpl<char> &Buffer,
bool IsFirstSegment,
bool IsLastSegment,
unsigned IndentToStrip,
unsigned CustomDelimiterLen);

/// \brief Compute the bytes that the actual string literal should codegen to.
/// If a copy needs to be made, it will be allocated out of the provided
/// Buffer.
static StringRef getEncodedStringSegment(StringRef Str,
SmallVectorImpl<char> &Buffer,
bool IsFirstSegment = false,
bool IsLastSegment = false,
unsigned IndentToStrip = 0);
/// \p Buffer.
StringRef getEncodedStringSegment(StringSegment Segment,
SmallVectorImpl<char> &Buffer) const {
return getEncodedStringSegment(
return getEncodedStringSegmentImpl(
StringRef(getBufferPtrForSourceLoc(Segment.Loc), Segment.Length),
Buffer, Segment.IsFirstSegment, Segment.IsLastSegment,
Segment.IndentToStrip);
Segment.IndentToStrip, Segment.CustomDelimiterLen);
}

/// \brief Given a string encoded with escapes like a string literal, compute
/// the byte content.
///
/// If a copy needs to be made, it will be allocated out of the provided
/// \p Buffer.
static StringRef getEncodedStringSegment(StringRef Str,
SmallVectorImpl<char> &Buffer,
bool IsFirstSegment = false,
bool IsLastSegment = false,
unsigned IndentToStrip = 0,
unsigned CustomDelimiterLen = 0) {
SmallString<128> TerminatedStrBuf(Str);
TerminatedStrBuf.push_back('\0');
StringRef TerminatedStr = StringRef(TerminatedStrBuf).drop_back();
StringRef Result = getEncodedStringSegmentImpl(TerminatedStr, Buffer,
IsFirstSegment,
IsLastSegment,
IndentToStrip,
CustomDelimiterLen);
if (Result == TerminatedStr)
return Str;
assert(Result.data() == Buffer.data());
return Result;
}

/// \brief Given a string literal token, separate it into string/expr segments
Expand Down Expand Up @@ -456,7 +488,8 @@ class Lexer {
return diagnose(Loc, Diagnostic(DiagID, std::forward<ArgTypes>(Args)...));
}

void formToken(tok Kind, const char *TokStart, bool MultilineString = false);
void formToken(tok Kind, const char *TokStart, bool IsMultilineString = false,
unsigned CustomDelimiterLen = 0);
void formEscapedIdentifierToken(const char *TokStart);

/// Advance to the end of the line.
Expand All @@ -480,10 +513,10 @@ class Lexer {
void lexTrivia(syntax::Trivia &T, bool IsForTrailingTrivia);
static unsigned lexUnicodeEscape(const char *&CurPtr, Lexer *Diags);

unsigned lexCharacter(const char *&CurPtr,
char StopQuote, bool EmitDiagnostics,
bool MultilineString = false);
void lexStringLiteral();
unsigned lexCharacter(const char *&CurPtr, char StopQuote,
bool EmitDiagnostics, bool IsMultilineString = false,
unsigned CustomDelimiterLen = 0);
void lexStringLiteral(unsigned CustomDelimiterLen = 0);
void lexEscapedIdentifier();

void tryLexEditorPlaceholder();
Expand Down
22 changes: 16 additions & 6 deletions include/swift/Parse/Token.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,10 @@ class Token {
/// Modifiers for string literals
unsigned MultilineString : 1;

// Padding bits == 32 - sizeof(Kind) * 8 - 3;
/// Length of custom delimiter of "raw" string literals
unsigned CustomDelimiterLen : 8;

// Padding bits == 32 - 11;

/// \brief The length of the comment that precedes the token.
unsigned CommentLength;
Expand All @@ -62,8 +65,8 @@ class Token {
public:
Token(tok Kind, StringRef Text, unsigned CommentLength = 0)
: Kind(Kind), AtStartOfLine(false), EscapedIdentifier(false),
MultilineString(false), CommentLength(CommentLength),
Text(Text) {}
MultilineString(false), CustomDelimiterLen(0),
CommentLength(CommentLength), Text(Text) {}

Token() : Token(tok::NUM_TOKENS, {}, 0) {}

Expand Down Expand Up @@ -266,17 +269,24 @@ class Token {

/// \brief Set the token to the specified kind and source range.
void setToken(tok K, StringRef T, unsigned CommentLength = 0,
bool MultilineString = false) {
bool IsMultilineString = false, unsigned CustomDelimiterLen = 0) {
Kind = K;
Text = T;
this->CommentLength = CommentLength;
EscapedIdentifier = false;
this->MultilineString = MultilineString;
this->MultilineString = IsMultilineString;
this->CustomDelimiterLen = CustomDelimiterLen;
assert(this->CustomDelimiterLen == CustomDelimiterLen &&
"custom string delimiter length > 255");
}

bool IsMultilineString() const {
bool isMultilineString() const {
return MultilineString;
}

unsigned getCustomDelimiterLen() const {
return CustomDelimiterLen;
}
};

} // end namespace swift
Expand Down
Loading