@@ -1479,7 +1479,7 @@ extension Lexer.Cursor {
1479
1479
case endOfString
1480
1480
1481
1481
/// The character could not be lexed because it's not a valid Unicode character.
1482
- case error
1482
+ case error( LexerError . Kind )
1483
1483
}
1484
1484
1485
1485
/// Lexes a single character in a string literal, handling escape sequences
@@ -1524,10 +1524,8 @@ extension Lexer.Cursor {
1524
1524
return . success( Unicode . Scalar ( character) )
1525
1525
}
1526
1526
case 0 :
1527
- // if (EmitDiagnostics)
1528
- // diagnose(CurPtr-1, diag::lex_nul_character)
1529
- let character = self . advance ( ) !
1530
- return . success( Unicode . Scalar ( character) )
1527
+ _ = self . advance ( )
1528
+ return . error( . nulCharacter)
1531
1529
case UInt8 ( ascii: " \n " ) , UInt8 ( ascii: " \r " ) : // String literals cannot have \n or \r in them.
1532
1530
let character = self . advance ( ) !
1533
1531
assert ( stringLiteralKind == . multiLine, " Caller must handle newlines in non-multiline " )
@@ -1538,16 +1536,17 @@ extension Lexer.Cursor {
1538
1536
if !self . advanceIfStringDelimiter ( delimiterLength: delimiterLength) {
1539
1537
return . success( Unicode . Scalar ( " \\ " ) )
1540
1538
}
1541
- guard let escapedCharacterCode = self . lexEscapedCharacter ( isMultilineString: stringLiteralKind == . multiLine) else {
1542
- return . error
1543
- }
1544
-
1545
- // Check to see if the encoding is valid.
1546
- guard let validatedScalar = Unicode . Scalar ( escapedCharacterCode) else {
1547
- return . error
1539
+ switch self . lexEscapedCharacter ( isMultilineString: stringLiteralKind == . multiLine) {
1540
+ case . success( let escapedCharacterCode) :
1541
+ // Check to see if the encoding is valid.
1542
+ if let validatedScalar = Unicode . Scalar ( escapedCharacterCode) {
1543
+ return . validatedEscapeSequence( Character ( validatedScalar) )
1544
+ } else {
1545
+ return . error( . invalidEscapeSequenceInStringLiteral)
1546
+ }
1547
+ case . error( let kind) :
1548
+ return . error( kind)
1548
1549
}
1549
-
1550
- return . validatedEscapeSequence( Character ( validatedScalar) )
1551
1550
default :
1552
1551
_ = self . advance ( )
1553
1552
// Normal characters are part of the string.
@@ -1561,71 +1560,68 @@ extension Lexer.Cursor {
1561
1560
// }
1562
1561
self = charStart
1563
1562
guard let charValue = self . advanceValidatingUTF8Character ( ) else {
1564
- // if (EmitDiagnostics)
1565
- // diagnose(CharStart, diag::lex_invalid_utf8)
1566
- return . error
1563
+ return . error( . invalidUtf8)
1567
1564
}
1568
1565
return . success( charValue)
1569
1566
}
1570
1567
}
1571
1568
1569
+ enum EscapedCharacterLex {
1570
+ // Successfully lexed an escape sequence that represents the Unicode character
1571
+ // at the given codepoint
1572
+ case success( UInt32 )
1573
+ case error( LexerError . Kind )
1574
+ }
1575
+
1572
1576
/// Assuming that we are in a string literal and have already consumed a `\`,
1573
1577
/// consume the escaped characters and return the Unicode character code
1574
1578
/// (i.e. UTF-32 value) that the escaped character represents.
1575
1579
///
1576
1580
/// If the character is not a valid escape sequence, return `nil`.
1577
- private mutating func lexEscapedCharacter( isMultilineString: Bool ) -> UInt32 ? {
1581
+ private mutating func lexEscapedCharacter( isMultilineString: Bool ) -> EscapedCharacterLex {
1578
1582
assert ( self . previous == UInt8 ( ascii: " \\ " ) || self . previous == UInt8 ( ascii: " # " ) )
1579
1583
// Escape processing. We already ate the "\".
1580
1584
switch self . peek ( ) {
1581
1585
// Simple single-character escapes.
1582
- case UInt8 ( ascii: " 0 " ) : _ = self . advance ( ) ; return UInt32 ( UInt8 ( ascii: " \0 " ) )
1583
- case UInt8 ( ascii: " n " ) : _ = self . advance ( ) ; return UInt32 ( UInt8 ( ascii: " \n " ) )
1584
- case UInt8 ( ascii: " r " ) : _ = self . advance ( ) ; return UInt32 ( UInt8 ( ascii: " \r " ) )
1585
- case UInt8 ( ascii: " t " ) : _ = self . advance ( ) ; return UInt32 ( UInt8 ( ascii: " \t " ) )
1586
- case UInt8 ( ascii: #"""# ) : _ = self . advance ( ) ; return UInt32 ( UInt8 ( ascii: #"""# ) )
1587
- case UInt8 ( ascii: " ' " ) : _ = self . advance ( ) ; return UInt32 ( UInt8 ( ascii: " ' " ) )
1588
- case UInt8 ( ascii: " \\ " ) : _ = self . advance ( ) ; return UInt32 ( UInt8 ( ascii: " \\ " ) )
1586
+ case UInt8 ( ascii: " 0 " ) : _ = self . advance ( ) ; return . success ( UInt32 ( UInt8 ( ascii: " \0 " ) ) )
1587
+ case UInt8 ( ascii: " n " ) : _ = self . advance ( ) ; return . success ( UInt32 ( UInt8 ( ascii: " \n " ) ) )
1588
+ case UInt8 ( ascii: " r " ) : _ = self . advance ( ) ; return . success ( UInt32 ( UInt8 ( ascii: " \r " ) ) )
1589
+ case UInt8 ( ascii: " t " ) : _ = self . advance ( ) ; return . success ( UInt32 ( UInt8 ( ascii: " \t " ) ) )
1590
+ case UInt8 ( ascii: #"""# ) : _ = self . advance ( ) ; return . success ( UInt32 ( UInt8 ( ascii: #"""# ) ) )
1591
+ case UInt8 ( ascii: " ' " ) : _ = self . advance ( ) ; return . success ( UInt32 ( UInt8 ( ascii: " ' " ) ) )
1592
+ case UInt8 ( ascii: " \\ " ) : _ = self . advance ( ) ; return . success ( UInt32 ( UInt8 ( ascii: " \\ " ) ) )
1589
1593
1590
1594
case UInt8 ( ascii: " u " ) : // e.g. \u{1234}
1591
1595
_ = self . advance ( )
1592
1596
1593
1597
guard self . is ( at: " { " ) else {
1594
- // if (EmitDiagnostics)
1595
- // diagnose(CurPtr-1, diag::lex_unicode_escape_braces)
1596
- return nil
1598
+ return . error( . expectedHexCodeInUnicodeEscape)
1597
1599
}
1598
1600
1599
- guard let cv = self . lexUnicodeEscape ( ) else {
1600
- return nil
1601
- }
1602
- return cv
1603
-
1601
+ return self . lexUnicodeEscape ( )
1604
1602
case UInt8 ( ascii: " \n " ) , UInt8 ( ascii: " \r " ) :
1605
1603
if isMultilineString && self . maybeConsumeNewlineEscape ( ) {
1606
- return UInt32 ( UInt8 ( ascii: " \n " ) )
1604
+ return . success ( UInt32 ( UInt8 ( ascii: " \n " ) ) )
1607
1605
}
1608
- return nil
1606
+ return . error ( . invalidEscapeSequenceInStringLiteral )
1609
1607
case nil :
1610
- return nil
1608
+ return . error ( . invalidEscapeSequenceInStringLiteral )
1611
1609
case . some( let peekedValue) : // Invalid escape.
1612
- // if (EmitDiagnostics)
1613
- // diagnose(CurPtr, diag::lex_invalid_escape)
1614
1610
// If this looks like a plausible escape character, recover as though this
1615
1611
// is an invalid escape.
1616
1612
let c = Unicode . Scalar ( peekedValue)
1617
1613
if c. isDigit || c. isLetter {
1618
1614
_ = self . advance ( )
1619
1615
}
1620
- return nil
1616
+ return . error ( . invalidEscapeSequenceInStringLiteral )
1621
1617
}
1622
1618
}
1623
1619
1624
1620
/// Lex the contents of a `\u{1234}` escape sequence, assuming that we are
1625
1621
/// placed at the opening `{`.
1626
1622
///
1627
1623
/// If this is not a valid unicode escape, return `nil`.
1628
- private mutating func lexUnicodeEscape( ) -> UInt32 ? {
1624
+ private mutating func lexUnicodeEscape( ) -> EscapedCharacterLex {
1629
1625
let quoteConsumed = self . advance ( matching: " { " )
1630
1626
assert ( quoteConsumed)
1631
1627
@@ -1636,18 +1632,18 @@ extension Lexer.Cursor {
1636
1632
}
1637
1633
1638
1634
guard self . advance ( matching: " } " ) else {
1639
- // if (Diags)
1640
- // Diags->diagnose(CurPtr, diag::lex_invalid_u_escape_rbrace)
1641
- return nil
1635
+ return . error( . excpectedClosingBraceInUnicodeEscape)
1642
1636
}
1643
1637
1644
1638
if numDigits == 0 || numDigits > 8 {
1645
- // if (Diags)
1646
- // Diags->diagnose(CurPtr, diag::lex_invalid_u_escape)
1647
- return nil
1639
+ return . error( . invalidNumberOfHexDigitsInUnicodeEscape)
1648
1640
}
1649
1641
1650
- return UInt32 ( String ( decoding: digitStart. input [ 0 ..< numDigits] , as: UTF8 . self) , radix: 16 )
1642
+ if let codePoint = UInt32 ( String ( decoding: digitStart. input [ 0 ..< numDigits] , as: UTF8 . self) , radix: 16 ) {
1643
+ return . success( codePoint)
1644
+ } else {
1645
+ return . error( . invalidEscapeSequenceInStringLiteral)
1646
+ }
1651
1647
}
1652
1648
1653
1649
private mutating func maybeConsumeNewlineEscape( ) -> Bool {
@@ -1835,8 +1831,8 @@ extension Lexer.Cursor {
1835
1831
// validate the multi-line string literal's indentation.
1836
1832
return Lexer . Result ( . stringSegment, error: error)
1837
1833
}
1838
- case . error:
1839
- error = ( . invalidEscapeSequenceInStringLiteral , self )
1834
+ case . error( let errorKind ) :
1835
+ error = ( errorKind , self )
1840
1836
self = clone
1841
1837
case . endOfString:
1842
1838
return Lexer . Result (
0 commit comments