@@ -873,9 +873,13 @@ extension Lexer.Cursor {
873
873
return self . lexOperatorIdentifier ( sourceBufferStart: sourceBufferStart)
874
874
}
875
875
876
- let unknownClassification = self . lexUnknown ( )
877
- assert ( unknownClassification == . lexemeContents, " Invalid UTF-8 sequence should be eaten by lexTrivia as LeadingTrivia " )
878
- return Lexer . Result ( . unknown)
876
+ switch self . lexUnknown ( ) {
877
+ case . lexemeContents( let result) :
878
+ return result
879
+ case . trivia:
880
+ assertionFailure ( " Invalid UTF-8 sequence should be eaten by lexTrivia as LeadingTrivia " )
881
+ return Lexer . Result ( . unknown, error: ( . invalidUtf8, self ) )
882
+ }
879
883
}
880
884
}
881
885
@@ -1125,7 +1129,7 @@ extension Lexer.Cursor {
1125
1129
1126
1130
// `lexUnknown` expects that the first character has not been consumed yet.
1127
1131
self = start
1128
- if self . lexUnknown ( ) == . trivia {
1132
+ if case . trivia = self . lexUnknown ( ) {
1129
1133
continue
1130
1134
} else {
1131
1135
break
@@ -1632,7 +1636,7 @@ extension Lexer.Cursor {
1632
1636
}
1633
1637
1634
1638
guard self . advance ( matching: " } " ) else {
1635
- return . error( . excpectedClosingBraceInUnicodeEscape )
1639
+ return . error( . expectedClosingBraceInUnicodeEscape )
1636
1640
}
1637
1641
1638
1642
if numDigits == 0 || numDigits > 8 {
@@ -1770,12 +1774,6 @@ extension Lexer.Cursor {
1770
1774
}
1771
1775
1772
1776
mutating func lexInStringLiteral( stringLiteralKind: StringLiteralKind , delimiterLength: Int ) -> Lexer . Result {
1773
- /*
1774
- if IsMultilineString && *CurPtr != '\n' && *CurPtr != '\r' {
1775
- diagnose(CurPtr, diag::lex_illegal_multiline_string_start)
1776
- .fixItInsert(Lexer::getSourceLoc(CurPtr), "\n")
1777
- }
1778
- */
1779
1777
var error : ( LexerError . Kind , Lexer . Cursor ) ? = nil
1780
1778
1781
1779
while true {
@@ -1971,15 +1969,6 @@ extension Lexer.Cursor {
1971
1969
if self . input. baseAddress! - tokStart. input. baseAddress! == 1 {
1972
1970
switch tokStart. peek ( ) {
1973
1971
case UInt8 ( ascii: " = " ) :
1974
- // Refrain from emitting this message in operator name position.
1975
- // if (NextToken.isNot(tok::kw_operator) && leftBound != rightBound) {
1976
- // auto d = diagnose(TokStart, diag::lex_unary_equal)
1977
- // if (leftBound)
1978
- // d.fixItInsert(getSourceLoc(TokStart), " ")
1979
- // else
1980
- // d.fixItInsert(getSourceLoc(TokStart+1), " ")
1981
- // }
1982
- // always emit 'tok::equal' to avoid trickle down parse errors
1983
1972
return Lexer . Result ( . equal)
1984
1973
case UInt8 ( ascii: " & " ) :
1985
1974
if leftBound == rightBound || leftBound {
@@ -2001,17 +1990,15 @@ extension Lexer.Cursor {
2001
1990
case ( UInt8 ( ascii: " - " ) , UInt8 ( ascii: " > " ) ) : // ->
2002
1991
return Lexer . Result ( . arrow)
2003
1992
case ( UInt8 ( ascii: " * " ) , UInt8 ( ascii: " / " ) ) : // */
2004
- // diagnose(TokStart, diag::lex_unexpected_block_comment_end)
2005
- return Lexer . Result ( . unknown)
1993
+ return Lexer . Result ( . unknown, error: ( . unexpectedBlockCommentEnd, tokStart) )
2006
1994
default :
2007
1995
break
2008
1996
}
2009
1997
} else {
2010
1998
// Verify there is no "*/" in the middle of the identifier token, we reject
2011
1999
// it as potentially ending a block comment.
2012
2000
if tokStart. text ( upTo: self ) . contains ( " */ " ) {
2013
- // diagnose(TokStart+Pos, diag::lex_unexpected_block_comment_end)
2014
- return Lexer . Result ( . unknown)
2001
+ return Lexer . Result ( . unknown, error: ( . unexpectedBlockCommentEnd, tokStart) )
2015
2002
}
2016
2003
}
2017
2004
@@ -2102,24 +2089,15 @@ extension Lexer.Cursor {
2102
2089
2103
2090
// Get the next character.
2104
2091
switch body. lexCharacterInStringLiteral ( stringLiteralKind: . singleLine, delimiterLength: 0 ) {
2105
- case . error, . endOfString :
2092
+ case . error:
2106
2093
// If the character was incorrectly encoded, give up.
2107
2094
return nil
2108
- case . success( let charValue ) where charValue == Unicode . Scalar ( UInt8 ( ascii : #"""# ) ) :
2109
- // If we found a straight- quote, then we're done. Just return the spot
2095
+ case . endOfString , . success( Unicode . Scalar ( 0x201D ) ) :
2096
+ // If we found a closing quote, then we're done. Just return the spot
2110
2097
// to continue.
2111
2098
return body
2112
- case . validatedEscapeSequence( let charValue) where charValue == Character ( Unicode . Scalar ( 0x0000201D ) !) :
2113
- // If we found an ending curly quote (common since this thing started with
2114
- // an opening curly quote) diagnose it with a fixit and then return.
2115
- // if (EmitDiagnostics) {
2116
- // diagnose(CharStart, diag::lex_invalid_curly_quote)
2117
- // .fixItReplaceChars(getSourceLoc(CharStart), getSourceLoc(Body),
2118
- // "\"")
2119
- // }
2120
- return body
2121
2099
default :
2122
- continue
2100
+ break
2123
2101
}
2124
2102
}
2125
2103
}
@@ -2128,14 +2106,15 @@ extension Lexer.Cursor {
2128
2106
/// The characters consumed by `lexUnknown` should be classified as trivia
2129
2107
case trivia
2130
2108
/// The characters consumed by `lexUnknown` should be classified as the contents of a lexeme
2131
- case lexemeContents
2109
+ case lexemeContents( Lexer . Result )
2132
2110
}
2133
2111
2134
2112
/// Assuming the cursor is positioned at neighter a valid identifier nor a
2135
2113
/// valid operator start, advance the cursor by what can be considered a
2136
2114
/// lexeme.
2137
2115
mutating func lexUnknown( ) -> UnknownCharactersClassification {
2138
2116
assert ( !( self . peekScalar ( ) ? . isValidIdentifierStartCodePoint ?? false ) && !( self . peekScalar ( ) ? . isOperatorStartCodePoint ?? false ) )
2117
+ let start = self
2139
2118
var tmp = self
2140
2119
if tmp. advance ( if: { Unicode . Scalar ( $0) . isValidIdentifierContinuationCodePoint } ) {
2141
2120
// If this is a valid identifier continuation, but not a valid identifier
@@ -2145,7 +2124,7 @@ extension Lexer.Cursor {
2145
2124
// }
2146
2125
tmp. advance ( while: { Unicode . Scalar ( $0) . isValidIdentifierContinuationCodePoint } )
2147
2126
self = tmp
2148
- return . lexemeContents
2127
+ return . lexemeContents( Lexer . Result ( . identifier , error : ( . invalidIdentifierStartCharacter , position : start ) ) )
2149
2128
}
2150
2129
2151
2130
// This character isn't allowed in Swift source.
@@ -2155,8 +2134,7 @@ extension Lexer.Cursor {
2155
2134
self = tmp
2156
2135
return . trivia
2157
2136
}
2158
- if codepoint. value == 0x000000A0 {
2159
- // Non-breaking whitespace (U+00A0)
2137
+ if codepoint. value == 0xA0 { // Non-breaking whitespace (U+00A0)
2160
2138
while tmp. is ( at: 0xC2 ) && tmp. is ( offset: 1 , at: 0xA0 ) {
2161
2139
_ = tmp. advance ( )
2162
2140
_ = tmp. advance ( )
@@ -2169,33 +2147,23 @@ extension Lexer.Cursor {
2169
2147
// Spaces)
2170
2148
self = tmp
2171
2149
return . trivia
2172
- } else if ( codepoint. value == 0x0000201D ) {
2150
+ } else if codepoint. value == 0x201D { // Closing curly quote (U+201D)
2173
2151
// If this is an end curly quote, just diagnose it with a fixit hint.
2174
- // if (EmitDiagnosticsIfToken) {
2175
- // diagnose(CurPtr - 1, diag::lex_invalid_curly_quote)
2176
- // .fixItReplaceChars(getSourceLoc(CurPtr - 1), getSourceLoc(Tmp), "\"")
2177
- // }
2178
2152
self = tmp
2179
- return . lexemeContents
2180
- } else if ( codepoint. value == 0x0000201C ) {
2153
+ return . lexemeContents( Lexer . Result ( . unknown , error : ( . unicodeCurlyQuote , position : start ) ) )
2154
+ } else if codepoint. value == 0x201C { // Opening curly quote (U+201C)
2181
2155
// If this is a start curly quote, do a fuzzy match of a string literal
2182
2156
// to improve recovery.
2183
2157
if let tmp2 = tmp. findEndOfCurlyQuoteStringLiteral ( ) {
2184
2158
tmp = tmp2
2185
2159
}
2186
2160
2187
- // Note, we intentionally diagnose the end quote before the start quote,
2188
- // so that the IDE suggests fixing the end quote before the start quote.
2189
- // This, in turn, works better with our error recovery because we won't
2190
- // diagnose an end curly quote in the middle of a straight quoted
2191
- // literal.
2192
- // if (EmitDiagnosticsIfToken) {
2193
- // diagnose(CurPtr - 1, diag::lex_invalid_curly_quote)
2194
- // .fixItReplaceChars(getSourceLoc(CurPtr - 1), getSourceLoc(EndPtr),
2195
- // "\"")
2196
- // }
2197
2161
self = tmp
2198
- return . lexemeContents
2162
+
2163
+ // Identifiers are the closest representation of static string literals
2164
+ // we have in the parser. Classify the entire curly string as an identifier
2165
+ // for best recovery.
2166
+ return . lexemeContents( Lexer . Result ( . identifier, error: ( . unicodeCurlyQuote, position: start) ) )
2199
2167
}
2200
2168
2201
2169
// diagnose(CurPtr - 1, diag::lex_invalid_character)
0 commit comments