Skip to content

Commit 8aeefbb

Browse files
committed
Reimplement a minor optimization in String::from_utf8_lossy that avoids having to loop the slice from the begining.
1 parent 7d4f487 commit 8aeefbb

File tree

1 file changed

+7
-3
lines changed

1 file changed

+7
-3
lines changed

src/libcollections/string.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -141,14 +141,18 @@ impl String {
141141
/// ```
142142
#[stable]
143143
pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> CowString<'a> {
144+
let mut i = 0;
144145
match str::from_utf8(v) {
145146
Ok(s) => return Cow::Borrowed(s),
146-
Err(..) => {}
147+
Err(e) => {
148+
if let Utf8Error::InvalidByte(firstbad) = e {
149+
i = firstbad;
150+
}
151+
}
147152
}
148153

149154
static TAG_CONT_U8: u8 = 128u8;
150155
static REPLACEMENT: &'static [u8] = b"\xEF\xBF\xBD"; // U+FFFD in UTF-8
151-
let mut i = 0;
152156
let total = v.len();
153157
fn unsafe_get(xs: &[u8], i: uint) -> u8 {
154158
unsafe { *xs.get_unchecked(i) }
@@ -172,7 +176,7 @@ impl String {
172176
// subseqidx is the index of the first byte of the subsequence we're looking at.
173177
// It's used to copy a bunch of contiguous good codepoints at once instead of copying
174178
// them one by one.
175-
let mut subseqidx = 0;
179+
let mut subseqidx = i;
176180

177181
while i < total {
178182
let i_ = i;

0 commit comments

Comments
 (0)