Skip to content

Commit bffa87b

Browse files
committed
Split CharsError::NotUtf8 into InvalidUtf8 and IncompleteUtf8 variants.
1 parent fd31eb6 commit bffa87b

File tree

1 file changed

+20
-10
lines changed

1 file changed

+20
-10
lines changed

src/libstd/io/mod.rs

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1565,8 +1565,12 @@ pub struct Chars<R> {
15651565
issue = "27802")]
15661566
pub enum CharsError {
15671567
/// Variant representing that the underlying stream was read successfully
1568-
/// but it did not contain valid utf8 data.
1569-
NotUtf8,
1568+
/// but contains a byte sequence ill-formed in UTF-8.
1569+
InvalidUtf8,
1570+
1571+
/// Variant representing that the underlying stream contains the start
1572+
/// of a byte sequence well-formed in UTF-8, but ends prematurely.
1573+
IncompleteUtf8,
15701574

15711575
/// Variant representing that an I/O error occurred.
15721576
Io(Error),
@@ -1603,11 +1607,11 @@ impl<R: Read> Iterator for Chars<R> {
16031607
macro_rules! continuation_byte {
16041608
($range: pat) => {
16051609
{
1606-
match read_byte!(EOF => return Some(Err(CharsError::NotUtf8))) {
1610+
match read_byte!(EOF => return Some(Err(CharsError::IncompleteUtf8))) {
16071611
byte @ $range => (byte & 0b0011_1111) as u32,
16081612
byte => {
16091613
self.buffer = Some(byte);
1610-
return Some(Err(CharsError::NotUtf8))
1614+
return Some(Err(CharsError::InvalidUtf8))
16111615
}
16121616
}
16131617
}
@@ -1643,7 +1647,7 @@ impl<R: Read> Iterator for Chars<R> {
16431647
let fourth = continuation_byte!(0x80...0xBF);
16441648
((first & 0b0000_0111) as u32) << 18 | second << 12 | third << 6 | fourth
16451649
}
1646-
_ => return Some(Err(CharsError::NotUtf8))
1650+
_ => return Some(Err(CharsError::InvalidUtf8))
16471651
};
16481652
unsafe {
16491653
Some(Ok(char::from_u32_unchecked(code_point)))
@@ -1656,13 +1660,16 @@ impl<R: Read> Iterator for Chars<R> {
16561660
impl std_error::Error for CharsError {
16571661
fn description(&self) -> &str {
16581662
match *self {
1659-
CharsError::NotUtf8 => "invalid utf8 encoding",
1663+
CharsError::InvalidUtf8 => "invalid UTF-8 byte sequence",
1664+
CharsError::IncompleteUtf8 => {
1665+
"stream ended in the middle of an UTF-8 byte sequence"
1666+
}
16601667
CharsError::Io(ref e) => std_error::Error::description(e),
16611668
}
16621669
}
16631670
fn cause(&self) -> Option<&std_error::Error> {
16641671
match *self {
1665-
CharsError::NotUtf8 => None,
1672+
CharsError::InvalidUtf8 | CharsError::IncompleteUtf8 => None,
16661673
CharsError::Io(ref e) => e.cause(),
16671674
}
16681675
}
@@ -1673,8 +1680,11 @@ impl std_error::Error for CharsError {
16731680
impl fmt::Display for CharsError {
16741681
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
16751682
match *self {
1676-
CharsError::NotUtf8 => {
1677-
"byte stream did not contain valid utf8".fmt(f)
1683+
CharsError::InvalidUtf8 => {
1684+
"invalid UTF-8 byte sequence".fmt(f)
1685+
}
1686+
CharsError::IncompleteUtf8 => {
1687+
"stream ended in the middle of an UTF-8 byte sequence".fmt(f)
16781688
}
16791689
CharsError::Io(ref e) => e.fmt(f),
16801690
}
@@ -1761,7 +1771,7 @@ mod tests {
17611771
// http://www.unicode.org/versions/Unicode8.0.0/ch05.pdf#G40630
17621772
Cursor::new(bytes).chars().map(|result| match result {
17631773
Ok(c) => c,
1764-
Err(CharsError::NotUtf8) => '\u{FFFD}',
1774+
Err(CharsError::InvalidUtf8) | Err(CharsError::IncompleteUtf8) => '\u{FFFD}',
17651775
Err(CharsError::Io(e)) => panic!("{}", e),
17661776
}).collect()
17671777
}

0 commit comments

Comments
 (0)