@@ -268,8 +268,7 @@ Token Lexer::getTokenAt(SourceLoc Loc) {
268
268
}
269
269
270
270
void Lexer::formToken (tok Kind, const char *TokStart,
271
- bool MultilineString, bool RawString,
272
- size_t DelimiterLength) {
271
+ bool MultilineString, unsigned DelimiterLength) {
273
272
assert (CurPtr >= BufferStart &&
274
273
CurPtr <= BufferEnd && " Current pointer out of range!" );
275
274
@@ -296,7 +295,7 @@ void Lexer::formToken(tok Kind, const char *TokStart,
296
295
}
297
296
298
297
NextToken.setToken (Kind, TokenText, CommentLength,
299
- MultilineString, RawString );
298
+ MultilineString, DelimiterLength );
300
299
CurPtr += DelimiterLength;
301
300
}
302
301
@@ -1208,6 +1207,21 @@ static bool maybeConsumeNewlineEscape(const char *&CurPtr, ssize_t Offset) {
1208
1207
}
1209
1208
}
1210
1209
1210
+ // / delimiterMatches - Does custom delimiter (# characters surrounding quotes)
1211
+ // / match the number of # charatters after \ inside the string? This allows
1212
+ // / interpolation inside a "raw" string. Normal/cooked string processing is
1213
+ // / the degenerate case of there being no # characters surrounding the quotes.
1214
+ // / If delimiter matches, advances byte pointer passed in and returns true.
1215
+ static bool delimiterMatches (unsigned DelimiterLength, const char *&BytesPtr) {
1216
+ if (!DelimiterLength)
1217
+ return true ;
1218
+ for (unsigned i = 0 ; i < DelimiterLength ; i++)
1219
+ if (BytesPtr[i] != ' #' )
1220
+ return false ;
1221
+ BytesPtr += DelimiterLength;
1222
+ return true ;
1223
+ }
1224
+
1211
1225
// / lexCharacter - Read a character and return its UTF32 code. If this is the
1212
1226
// / end of enclosing string/character sequence (i.e. the character is equal to
1213
1227
// / 'StopQuote'), this returns ~0U and leaves 'CurPtr' pointing to the terminal
@@ -1218,7 +1232,7 @@ static bool maybeConsumeNewlineEscape(const char *&CurPtr, ssize_t Offset) {
1218
1232
// / character_escape ::= unicode_character_escape
1219
1233
unsigned Lexer::lexCharacter (const char *&CurPtr, char StopQuote,
1220
1234
bool EmitDiagnostics, bool MultilineString,
1221
- bool RawString ) {
1235
+ unsigned DelimiterLength ) {
1222
1236
const char *CharStart = CurPtr;
1223
1237
1224
1238
switch (*CurPtr++) {
@@ -1267,7 +1281,7 @@ unsigned Lexer::lexCharacter(const char *&CurPtr, char StopQuote,
1267
1281
diagnose (CurPtr-1 , diag::lex_unterminated_string);
1268
1282
return ~1U ;
1269
1283
case ' \\ ' : // Escapes.
1270
- if (RawString )
1284
+ if (! delimiterMatches (DelimiterLength, CurPtr) )
1271
1285
return ' \\ ' ;
1272
1286
break ;
1273
1287
}
@@ -1496,7 +1510,7 @@ getMultilineTrailingIndent(const Token &Str, DiagnosticEngine *Diags) {
1496
1510
auto string = StringRef (start, end - start);
1497
1511
1498
1512
// Disallow escaped newline in the last line.
1499
- if (Diags && ! Str.IsRawString () ) {
1513
+ if (Diags && Str.DelimiterLength () == 0 ) {
1500
1514
auto *Ptr = start - 1 ;
1501
1515
if (*Ptr == ' \n ' ) --Ptr;
1502
1516
if (*Ptr == ' \r ' ) --Ptr;
@@ -1652,30 +1666,34 @@ static void validateMultilineIndents(const Token &Str,
1652
1666
// / lexStringLiteral:
1653
1667
// / string_literal ::= ["]([^"\\\n\r]|character_escape)*["]
1654
1668
// / string_literal ::= ["]["]["].*["]["]["] - approximately
1655
- void Lexer::lexStringLiteral (bool RawString, std::string Delimiter) {
1656
- CurPtr += Delimiter.length ();
1669
+ void Lexer::lexStringLiteral (unsigned DelimiterLength) {
1657
1670
const char *TokStart = CurPtr-1 ;
1658
1671
assert ((*TokStart == ' "' || *TokStart == ' \' ' ) && " Unexpected start" );
1659
1672
// NOTE: We only allow single-quote string literals so we can emit useful
1660
1673
// diagnostics about changing them to double quotes.
1661
1674
1662
1675
bool wasErroneous = false , MultilineString = false ;
1663
- Delimiter.insert (0 , 1 , *TokStart);
1676
+ std::string Delimiter;
1677
+ Delimiter.push_back (*TokStart);
1664
1678
1665
1679
// Is this the start of a multiline string literal?
1666
1680
if (*TokStart == ' "' && *CurPtr == ' "' && *(CurPtr + 1 ) == ' "' ) {
1667
1681
MultilineString = true ;
1668
1682
CurPtr += 2 ;
1669
- Delimiter.insert (0 , 2 , *TokStart);
1683
+ Delimiter.push_back (*TokStart);
1684
+ Delimiter.push_back (*TokStart);
1670
1685
if (*CurPtr != ' \n ' && *CurPtr != ' \r ' )
1671
1686
diagnose (CurPtr, diag::lex_illegal_multiline_string_start)
1672
1687
.fixItInsert (Lexer::getSourceLoc (CurPtr), " \n " );
1673
1688
}
1689
+ Delimiter.insert (Delimiter.size (), DelimiterLength, ' #' );
1674
1690
1675
1691
while (true ) {
1676
- if (*CurPtr == ' \\ ' && *(CurPtr + 1 ) == ' (' && !RawString) {
1692
+ const char *TmpPtr = CurPtr + 1 ;
1693
+ if (*CurPtr == ' \\ ' &&
1694
+ delimiterMatches (DelimiterLength, TmpPtr) && *TmpPtr == ' (' ) {
1677
1695
// Consume tokens until we hit the corresponding ')'.
1678
- CurPtr += 2 ;
1696
+ CurPtr = TmpPtr + 1 ;
1679
1697
const char *EndPtr =
1680
1698
skipToEndOfInterpolatedExpression (CurPtr, BufferEnd,
1681
1699
Diags, MultilineString);
@@ -1698,7 +1716,7 @@ void Lexer::lexStringLiteral(bool RawString, std::string Delimiter) {
1698
1716
}
1699
1717
1700
1718
unsigned CharValue = lexCharacter (CurPtr, *TokStart, true ,
1701
- MultilineString, RawString );
1719
+ MultilineString, DelimiterLength );
1702
1720
wasErroneous |= CharValue == ~1U ;
1703
1721
1704
1722
// If this is the end of string, we are done. If it is a normal character
@@ -1747,14 +1765,14 @@ void Lexer::lexStringLiteral(bool RawString, std::string Delimiter) {
1747
1765
if (MultilineString) {
1748
1766
CurPtr += 2 ;
1749
1767
formToken (tok::string_literal, TokStart,
1750
- MultilineString, RawString, Delimiter. length () - 3 );
1768
+ MultilineString, DelimiterLength );
1751
1769
if (Diags)
1752
1770
validateMultilineIndents (NextToken, Diags);
1753
1771
return ;
1754
1772
}
1755
1773
else
1756
1774
return formToken (tok::string_literal, TokStart,
1757
- MultilineString, RawString, Delimiter. length () - 1 );
1775
+ MultilineString, DelimiterLength );
1758
1776
}
1759
1777
}
1760
1778
}
@@ -2021,7 +2039,7 @@ StringRef Lexer::getEncodedStringSegment(StringRef Bytes,
2021
2039
bool IsFirstSegment,
2022
2040
bool IsLastSegment,
2023
2041
unsigned IndentToStrip,
2024
- bool RawString ) {
2042
+ unsigned DelmiterLength ) {
2025
2043
2026
2044
TempString.clear ();
2027
2045
// Note that it is always safe to read one over the end of "Bytes" because
@@ -2048,7 +2066,7 @@ StringRef Lexer::getEncodedStringSegment(StringRef Bytes,
2048
2066
continue ;
2049
2067
}
2050
2068
2051
- if (CurChar != ' \\ ' || RawString ) {
2069
+ if (CurChar != ' \\ ' || ! delimiterMatches (DelmiterLength, BytesPtr) ) {
2052
2070
TempString.push_back (CurChar);
2053
2071
continue ;
2054
2072
}
@@ -2119,7 +2137,7 @@ void Lexer::getStringLiteralSegments(
2119
2137
// Are substitutions required either for indent stripping or line ending
2120
2138
// normalization?
2121
2139
bool MultilineString = Str.IsMultilineString (), IsFirstSegment = true ;
2122
- unsigned IndentToStrip = 0 ;
2140
+ unsigned IndentToStrip = 0 , DelimiterLength = Str. DelimiterLength () ;
2123
2141
if (MultilineString)
2124
2142
IndentToStrip =
2125
2143
std::get<0 >(getMultilineTrailingIndent (Str, /* Diags=*/ nullptr )).size ();
@@ -2130,20 +2148,20 @@ void Lexer::getStringLiteralSegments(
2130
2148
const char *SegmentStartPtr = Bytes.begin ();
2131
2149
const char *BytesPtr = SegmentStartPtr;
2132
2150
size_t pos;
2133
- while (!Str.IsRawString () &&
2134
- (pos = Bytes.find (' \\ ' , BytesPtr-Bytes.begin ())) != StringRef::npos) {
2151
+ while ((pos = Bytes.find (' \\ ' , BytesPtr-Bytes.begin ())) != StringRef::npos) {
2135
2152
BytesPtr = Bytes.begin () + pos + 1 ;
2136
2153
2137
- if (*BytesPtr++ != ' (' )
2154
+ if (! delimiterMatches (DelimiterLength, BytesPtr) || *BytesPtr++ != ' (' )
2138
2155
continue ;
2139
2156
2140
2157
// String interpolation.
2141
2158
2142
2159
// Push the current segment.
2143
2160
Segments.push_back (
2144
2161
StringSegment::getLiteral (getSourceLoc (SegmentStartPtr),
2145
- BytesPtr-SegmentStartPtr-2 ,
2146
- IsFirstSegment, false , IndentToStrip, false ));
2162
+ BytesPtr-SegmentStartPtr-2 -DelimiterLength,
2163
+ IsFirstSegment, false , IndentToStrip,
2164
+ DelimiterLength));
2147
2165
IsFirstSegment = false ;
2148
2166
2149
2167
// Find the closing ')'.
@@ -2167,14 +2185,7 @@ void Lexer::getStringLiteralSegments(
2167
2185
StringSegment::getLiteral (getSourceLoc (SegmentStartPtr),
2168
2186
Bytes.end ()-SegmentStartPtr,
2169
2187
IsFirstSegment, true , IndentToStrip,
2170
- Str.IsRawString ()));
2171
- }
2172
-
2173
- // / A custom delimiter is zero or more # characters surrounding a quoted string
2174
- static bool isDelimitedString (const char *CurPtr, std::string &delimiter) {
2175
- while (*CurPtr == ' #' )
2176
- delimiter.push_back (*CurPtr++);
2177
- return *CurPtr == ' "' ;
2188
+ DelimiterLength));
2178
2189
}
2179
2190
2180
2191
// ===----------------------------------------------------------------------===//
@@ -2268,19 +2279,17 @@ void Lexer::lexImpl() {
2268
2279
case ' ,' : return formToken (tok::comma, TokStart);
2269
2280
case ' ;' : return formToken (tok::semi, TokStart);
2270
2281
case ' :' : return formToken (tok::colon, TokStart);
2271
- case ' \\ ' : {
2272
- std::string Delimiter;
2273
- if (isDelimitedString (CurPtr, Delimiter)) {
2274
- CurPtr++;
2275
- return lexStringLiteral (true , Delimiter);
2276
- }
2277
- }
2278
- return formToken (tok::backslash, TokStart);
2282
+ case ' \\ ' : return formToken (tok::backslash, TokStart);
2279
2283
2280
2284
case ' #' : {
2281
- std::string Delimiter;
2282
- if (isDelimitedString (CurPtr - 1 , Delimiter))
2283
- return lexStringLiteral (false , Delimiter);
2285
+ const char *Lookahead = CurPtr;
2286
+ while (*Lookahead == ' #' )
2287
+ Lookahead++;
2288
+ if (*Lookahead++ == ' "' ) {
2289
+ unsigned DelimiterLength = Lookahead - CurPtr;
2290
+ CurPtr = Lookahead;
2291
+ return lexStringLiteral (DelimiterLength);
2292
+ }
2284
2293
}
2285
2294
return lexHash ();
2286
2295
0 commit comments