@@ -267,7 +267,9 @@ Token Lexer::getTokenAt(SourceLoc Loc) {
267
267
return Result;
268
268
}
269
269
270
- void Lexer::formToken (tok Kind, const char *TokStart, bool MultilineString) {
270
+ void Lexer::formToken (tok Kind, const char *TokStart,
271
+ bool MultilineString, bool RawString,
272
+ size_t DelimiterLength) {
271
273
assert (CurPtr >= BufferStart &&
272
274
CurPtr <= BufferEnd && " Current pointer out of range!" );
273
275
@@ -293,7 +295,9 @@ void Lexer::formToken(tok Kind, const char *TokStart, bool MultilineString) {
293
295
lexTrivia (TrailingTrivia, /* IsForTrailingTrivia */ true );
294
296
}
295
297
296
- NextToken.setToken (Kind, TokenText, CommentLength, MultilineString);
298
+ NextToken.setToken (Kind, TokenText, CommentLength,
299
+ MultilineString, RawString);
300
+ CurPtr += DelimiterLength;
297
301
}
298
302
299
303
void Lexer::formEscapedIdentifierToken (const char *TokStart) {
@@ -1213,7 +1217,8 @@ static bool maybeConsumeNewlineEscape(const char *&CurPtr, ssize_t Offset) {
1213
1217
// / character_escape ::= [\][\] | [\]t | [\]n | [\]r | [\]" | [\]' | [\]0
1214
1218
// / character_escape ::= unicode_character_escape
1215
1219
unsigned Lexer::lexCharacter (const char *&CurPtr, char StopQuote,
1216
- bool EmitDiagnostics, bool MultilineString) {
1220
+ bool EmitDiagnostics, bool MultilineString,
1221
+ bool RawString) {
1217
1222
const char *CharStart = CurPtr;
1218
1223
1219
1224
switch (*CurPtr++) {
@@ -1262,6 +1267,8 @@ unsigned Lexer::lexCharacter(const char *&CurPtr, char StopQuote,
1262
1267
diagnose (CurPtr-1 , diag::lex_unterminated_string);
1263
1268
return ~1U ;
1264
1269
case ' \\ ' : // Escapes.
1270
+ if (RawString)
1271
+ return ' \\ ' ;
1265
1272
break ;
1266
1273
}
1267
1274
@@ -1489,7 +1496,7 @@ getMultilineTrailingIndent(const Token &Str, DiagnosticEngine *Diags) {
1489
1496
auto string = StringRef (start, end - start);
1490
1497
1491
1498
// Disallow escaped newline in the last line.
1492
- if (Diags) {
1499
+ if (Diags && !Str. IsRawString () ) {
1493
1500
auto *Ptr = start - 1 ;
1494
1501
if (*Ptr == ' \n ' ) --Ptr;
1495
1502
if (*Ptr == ' \r ' ) --Ptr;
@@ -1645,25 +1652,28 @@ static void validateMultilineIndents(const Token &Str,
1645
1652
// / lexStringLiteral:
1646
1653
// / string_literal ::= ["]([^"\\\n\r]|character_escape)*["]
1647
1654
// / string_literal ::= ["]["]["].*["]["]["] - approximately
1648
- void Lexer::lexStringLiteral () {
1655
+ void Lexer::lexStringLiteral (bool RawString, std::string Delimiter) {
1656
+ CurPtr += Delimiter.length ();
1649
1657
const char *TokStart = CurPtr-1 ;
1650
1658
assert ((*TokStart == ' "' || *TokStart == ' \' ' ) && " Unexpected start" );
1651
1659
// NOTE: We only allow single-quote string literals so we can emit useful
1652
1660
// diagnostics about changing them to double quotes.
1653
1661
1654
1662
bool wasErroneous = false , MultilineString = false ;
1663
+ Delimiter.insert (0 , 1 , *TokStart);
1655
1664
1656
1665
// Is this the start of a multiline string literal?
1657
1666
if (*TokStart == ' "' && *CurPtr == ' "' && *(CurPtr + 1 ) == ' "' ) {
1658
1667
MultilineString = true ;
1659
1668
CurPtr += 2 ;
1669
+ Delimiter.insert (0 , 2 , *TokStart);
1660
1670
if (*CurPtr != ' \n ' && *CurPtr != ' \r ' )
1661
1671
diagnose (CurPtr, diag::lex_illegal_multiline_string_start)
1662
1672
.fixItInsert (Lexer::getSourceLoc (CurPtr), " \n " );
1663
1673
}
1664
1674
1665
1675
while (true ) {
1666
- if (*CurPtr == ' \\ ' && *(CurPtr + 1 ) == ' (' ) {
1676
+ if (*CurPtr == ' \\ ' && *(CurPtr + 1 ) == ' (' && !RawString ) {
1667
1677
// Consume tokens until we hit the corresponding ')'.
1668
1678
CurPtr += 2 ;
1669
1679
const char *EndPtr =
@@ -1687,7 +1697,8 @@ void Lexer::lexStringLiteral() {
1687
1697
return formToken (tok::unknown, TokStart);
1688
1698
}
1689
1699
1690
- unsigned CharValue = lexCharacter (CurPtr, *TokStart, true , MultilineString);
1700
+ unsigned CharValue = lexCharacter (CurPtr, *TokStart, true ,
1701
+ MultilineString, RawString);
1691
1702
wasErroneous |= CharValue == ~1U ;
1692
1703
1693
1704
// If this is the end of string, we are done. If it is a normal character
@@ -1731,20 +1742,20 @@ void Lexer::lexStringLiteral() {
1731
1742
replacement);
1732
1743
}
1733
1744
1734
- // Is this the end of a multiline string literal?
1735
- if (MultilineString ) {
1736
- if (*CurPtr == ' " ' && *(CurPtr + 1 ) == ' " ' && *(CurPtr + 2 ) != ' " ' ) {
1745
+ // Is this the end of a delimited/ multiline string literal?
1746
+ if ( StringRef (CurPtr - 1 , Delimiter. length ()) == Delimiter ) {
1747
+ if (MultilineString ) {
1737
1748
CurPtr += 2 ;
1738
- formToken (tok::string_literal, TokStart, MultilineString);
1749
+ formToken (tok::string_literal, TokStart,
1750
+ MultilineString, RawString, Delimiter.length () - 3 );
1739
1751
if (Diags)
1740
1752
validateMultilineIndents (NextToken, Diags);
1741
1753
return ;
1742
1754
}
1743
1755
else
1744
- continue ;
1756
+ return formToken (tok::string_literal, TokStart,
1757
+ MultilineString, RawString, Delimiter.length () - 1 );
1745
1758
}
1746
-
1747
- return formToken (tok::string_literal, TokStart, MultilineString);
1748
1759
}
1749
1760
}
1750
1761
}
@@ -2009,7 +2020,8 @@ StringRef Lexer::getEncodedStringSegment(StringRef Bytes,
2009
2020
SmallVectorImpl<char > &TempString,
2010
2021
bool IsFirstSegment,
2011
2022
bool IsLastSegment,
2012
- unsigned IndentToStrip) {
2023
+ unsigned IndentToStrip,
2024
+ bool RawString) {
2013
2025
2014
2026
TempString.clear ();
2015
2027
// Note that it is always safe to read one over the end of "Bytes" because
@@ -2036,7 +2048,7 @@ StringRef Lexer::getEncodedStringSegment(StringRef Bytes,
2036
2048
continue ;
2037
2049
}
2038
2050
2039
- if (CurChar != ' \\ ' ) {
2051
+ if (CurChar != ' \\ ' || RawString ) {
2040
2052
TempString.push_back (CurChar);
2041
2053
continue ;
2042
2054
}
@@ -2117,11 +2129,10 @@ void Lexer::getStringLiteralSegments(
2117
2129
// range check subscripting on the StringRef.
2118
2130
const char *SegmentStartPtr = Bytes.begin ();
2119
2131
const char *BytesPtr = SegmentStartPtr;
2120
- // FIXME: Use SSE to scan for '\'.
2121
- while (BytesPtr != Bytes.end ()) {
2122
- char CurChar = *BytesPtr++;
2123
- if (CurChar != ' \\ ' )
2124
- continue ;
2132
+ size_t pos;
2133
+ while (!Str.IsRawString () &&
2134
+ (pos = Bytes.find (' \\ ' , BytesPtr-Bytes.begin ())) != StringRef::npos) {
2135
+ BytesPtr = Bytes.begin () + pos + 1 ;
2125
2136
2126
2137
if (*BytesPtr++ != ' (' )
2127
2138
continue ;
@@ -2132,7 +2143,7 @@ void Lexer::getStringLiteralSegments(
2132
2143
Segments.push_back (
2133
2144
StringSegment::getLiteral (getSourceLoc (SegmentStartPtr),
2134
2145
BytesPtr-SegmentStartPtr-2 ,
2135
- IsFirstSegment, false , IndentToStrip));
2146
+ IsFirstSegment, false , IndentToStrip, false ));
2136
2147
IsFirstSegment = false ;
2137
2148
2138
2149
// Find the closing ')'.
@@ -2155,9 +2166,16 @@ void Lexer::getStringLiteralSegments(
2155
2166
Segments.push_back (
2156
2167
StringSegment::getLiteral (getSourceLoc (SegmentStartPtr),
2157
2168
Bytes.end ()-SegmentStartPtr,
2158
- IsFirstSegment, true , IndentToStrip));
2169
+ IsFirstSegment, true , IndentToStrip,
2170
+ Str.IsRawString ()));
2159
2171
}
2160
2172
2173
+ // / A custom delimiter is zero or more # characters surrounding a quoted string
2174
+ static bool isDelimitedString (const char *CurPtr, std::string &delimiter) {
2175
+ while (*CurPtr == ' #' )
2176
+ delimiter.push_back (*CurPtr++);
2177
+ return *CurPtr == ' "' ;
2178
+ }
2161
2179
2162
2180
// ===----------------------------------------------------------------------===//
2163
2181
// Main Lexer Loop
@@ -2250,9 +2268,20 @@ void Lexer::lexImpl() {
2250
2268
case ' ,' : return formToken (tok::comma, TokStart);
2251
2269
case ' ;' : return formToken (tok::semi, TokStart);
2252
2270
case ' :' : return formToken (tok::colon, TokStart);
2253
- case ' \\ ' : return formToken (tok::backslash, TokStart);
2271
+ case ' \\ ' : {
2272
+ std::string Delimiter;
2273
+ if (isDelimitedString (CurPtr, Delimiter)) {
2274
+ CurPtr++;
2275
+ return lexStringLiteral (true , Delimiter);
2276
+ }
2277
+ }
2278
+ return formToken (tok::backslash, TokStart);
2254
2279
2255
- case ' #' :
2280
+ case ' #' : {
2281
+ std::string Delimiter;
2282
+ if (isDelimitedString (CurPtr - 1 , Delimiter))
2283
+ return lexStringLiteral (false , Delimiter);
2284
+ }
2256
2285
return lexHash ();
2257
2286
2258
2287
// Operator characters.
0 commit comments