@@ -1565,8 +1565,12 @@ pub struct Chars<R> {
1565
1565
issue = "27802" ) ]
1566
1566
pub enum CharsError {
1567
1567
/// Variant representing that the underlying stream was read successfully
1568
- /// but it did not contain valid utf8 data.
1569
- NotUtf8 ,
1568
+ /// but contains a byte sequence ill-formed in UTF-8.
1569
+ InvalidUtf8 ,
1570
+
1571
+ /// Variant representing that the underlying stream contains the start
1572
+ /// of a byte sequence well-formed in UTF-8, but ends prematurely.
1573
+ IncompleteUtf8 ,
1570
1574
1571
1575
/// Variant representing that an I/O error occurred.
1572
1576
Io ( Error ) ,
@@ -1603,11 +1607,11 @@ impl<R: Read> Iterator for Chars<R> {
1603
1607
macro_rules! continuation_byte {
1604
1608
( $range: pat) => {
1605
1609
{
1606
- match read_byte!( EOF => return Some ( Err ( CharsError :: NotUtf8 ) ) ) {
1610
+ match read_byte!( EOF => return Some ( Err ( CharsError :: IncompleteUtf8 ) ) ) {
1607
1611
byte @ $range => ( byte & 0b0011_1111 ) as u32 ,
1608
1612
byte => {
1609
1613
self . buffer = Some ( byte) ;
1610
- return Some ( Err ( CharsError :: NotUtf8 ) )
1614
+ return Some ( Err ( CharsError :: InvalidUtf8 ) )
1611
1615
}
1612
1616
}
1613
1617
}
@@ -1643,7 +1647,7 @@ impl<R: Read> Iterator for Chars<R> {
1643
1647
let fourth = continuation_byte ! ( 0x80 ...0xBF ) ;
1644
1648
( ( first & 0b0000_0111 ) as u32 ) << 18 | second << 12 | third << 6 | fourth
1645
1649
}
1646
- _ => return Some ( Err ( CharsError :: NotUtf8 ) )
1650
+ _ => return Some ( Err ( CharsError :: InvalidUtf8 ) )
1647
1651
} ;
1648
1652
unsafe {
1649
1653
Some ( Ok ( char:: from_u32_unchecked ( code_point) ) )
@@ -1656,13 +1660,16 @@ impl<R: Read> Iterator for Chars<R> {
1656
1660
impl std_error:: Error for CharsError {
1657
1661
fn description ( & self ) -> & str {
1658
1662
match * self {
1659
- CharsError :: NotUtf8 => "invalid utf8 encoding" ,
1663
+ CharsError :: InvalidUtf8 => "invalid UTF-8 byte sequence" ,
1664
+ CharsError :: IncompleteUtf8 => {
1665
+ "stream ended in the middle of an UTF-8 byte sequence"
1666
+ }
1660
1667
CharsError :: Io ( ref e) => std_error:: Error :: description ( e) ,
1661
1668
}
1662
1669
}
1663
1670
fn cause ( & self ) -> Option < & std_error:: Error > {
1664
1671
match * self {
1665
- CharsError :: NotUtf8 => None ,
1672
+ CharsError :: InvalidUtf8 | CharsError :: IncompleteUtf8 => None ,
1666
1673
CharsError :: Io ( ref e) => e. cause ( ) ,
1667
1674
}
1668
1675
}
@@ -1673,8 +1680,11 @@ impl std_error::Error for CharsError {
1673
1680
impl fmt:: Display for CharsError {
1674
1681
fn fmt ( & self , f : & mut fmt:: Formatter ) -> fmt:: Result {
1675
1682
match * self {
1676
- CharsError :: NotUtf8 => {
1677
- "byte stream did not contain valid utf8" . fmt ( f)
1683
+ CharsError :: InvalidUtf8 => {
1684
+ "invalid UTF-8 byte sequence" . fmt ( f)
1685
+ }
1686
+ CharsError :: IncompleteUtf8 => {
1687
+ "stream ended in the middle of an UTF-8 byte sequence" . fmt ( f)
1678
1688
}
1679
1689
CharsError :: Io ( ref e) => e. fmt ( f) ,
1680
1690
}
@@ -1761,7 +1771,7 @@ mod tests {
1761
1771
// http://www.unicode.org/versions/Unicode8.0.0/ch05.pdf#G40630
1762
1772
Cursor :: new ( bytes) . chars ( ) . map ( |result| match result {
1763
1773
Ok ( c) => c,
1764
- Err ( CharsError :: NotUtf8 ) => '\u{FFFD}' ,
1774
+ Err ( CharsError :: InvalidUtf8 ) | Err ( CharsError :: IncompleteUtf8 ) => '\u{FFFD}' ,
1765
1775
Err ( CharsError :: Io ( e) ) => panic ! ( "{}" , e) ,
1766
1776
} ) . collect ( )
1767
1777
}
0 commit comments