Skip to content

Commit 19c1584

Browse files
author
root
committed
---
yaml --- r: 153551 b: refs/heads/try2 c: 4592164 h: refs/heads/master i: 153549: 9b1d8ab 153547: 24d2983 153543: b3c44cf 153535: cf1e4d1 v: v3
1 parent 26206ad commit 19c1584

File tree

2 files changed

+49
-60
lines changed

2 files changed

+49
-60
lines changed

[refs]

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ refs/heads/snap-stage3: 78a7676898d9f80ab540c6df5d4c9ce35bb50463
55
refs/heads/try: 519addf6277dbafccbb4159db4b710c37eaa2ec5
66
refs/tags/release-0.1: 1f5c5126e96c79d22cb7862f75304136e204f105
77
refs/heads/ndm: f3868061cd7988080c30d6d5bf352a5a5fe2460b
8-
refs/heads/try2: bbb299ad9840d02c52eefbd9989b5b18b51a7b8d
8+
refs/heads/try2: 45921648699a42fa1d257f6a54d2dbe9e46b0e20
99
refs/heads/dist-snap: ba4081a5a8573875fed17545846f6f6902c8ba8d
1010
refs/tags/release-0.2: c870d2dffb391e14efb05aa27898f1f6333a9596
1111
refs/tags/release-0.3: b5f0d0f648d9a6153664837026ba1be43d3e2503

branches/try2/src/libcore/str.rs

Lines changed: 48 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -133,40 +133,35 @@ impl<'a> Iterator<char> for Chars<'a> {
133133
#[inline]
134134
fn next(&mut self) -> Option<char> {
135135
// Decode UTF-8, using the valid UTF-8 invariant
136-
#[inline]
137-
fn decode_multibyte<'a>(x: u8, it: &mut slice::Items<'a, u8>) -> char {
138-
// NOTE: Performance is very sensitive to the exact formulation here
139-
// Decode from a byte combination out of: [[[x y] z] w]
140-
let init = utf8_first_byte!(x, 2);
141-
let y = unwrap_or_0(it.next());
142-
let mut ch = utf8_acc_cont_byte!(init, y);
143-
if x >= 0xE0 {
144-
/* [[x y z] w] case
145-
* 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid */
146-
let z = unwrap_or_0(it.next());
147-
let y_z = utf8_acc_cont_byte!((y & CONT_MASK) as u32, z);
148-
ch = init << 12 | y_z;
149-
if x >= 0xF0 {
150-
/* [x y z w] case
151-
* use only the lower 3 bits of `init` */
152-
let w = unwrap_or_0(it.next());
153-
ch = (init & 7) << 18 | utf8_acc_cont_byte!(y_z, w);
154-
}
155-
}
156-
unsafe {
157-
mem::transmute(ch)
136+
let x = match self.iter.next() {
137+
None => return None,
138+
Some(&next_byte) if next_byte < 128 => return Some(next_byte as char),
139+
Some(&next_byte) => next_byte,
140+
};
141+
142+
// Multibyte case follows
143+
// Decode from a byte combination out of: [[[x y] z] w]
144+
// NOTE: Performance is sensitive to the exact formulation here
145+
let init = utf8_first_byte!(x, 2);
146+
let y = unwrap_or_0(self.iter.next());
147+
let mut ch = utf8_acc_cont_byte!(init, y);
148+
if x >= 0xE0 {
149+
// [[x y z] w] case
150+
// 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
151+
let z = unwrap_or_0(self.iter.next());
152+
let y_z = utf8_acc_cont_byte!((y & CONT_MASK) as u32, z);
153+
ch = init << 12 | y_z;
154+
if x >= 0xF0 {
155+
// [x y z w] case
156+
// use only the lower 3 bits of `init`
157+
let w = unwrap_or_0(self.iter.next());
158+
ch = (init & 7) << 18 | utf8_acc_cont_byte!(y_z, w);
158159
}
159160
}
160161

161-
match self.iter.next() {
162-
None => None,
163-
Some(&next_byte) => {
164-
if next_byte < 128 {
165-
Some(next_byte as char)
166-
} else {
167-
Some(decode_multibyte(next_byte, &mut self.iter))
168-
}
169-
}
162+
// str invariant says `ch` is a valid Unicode Scalar Value
163+
unsafe {
164+
Some(mem::transmute(ch))
170165
}
171166
}
172167

@@ -180,38 +175,32 @@ impl<'a> Iterator<char> for Chars<'a> {
180175
impl<'a> DoubleEndedIterator<char> for Chars<'a> {
181176
#[inline]
182177
fn next_back(&mut self) -> Option<char> {
183-
#[inline]
184-
fn decode_multibyte_back<'a>(w: u8, it: &mut slice::Items<'a, u8>) -> char {
185-
// Decode from a byte combination out of: [x [y [z w]]]
186-
let mut ch;
187-
let z = unwrap_or_0(it.next_back());
188-
ch = utf8_first_byte!(z, 2);
189-
if utf8_is_cont_byte!(z) {
190-
let y = unwrap_or_0(it.next_back());
191-
ch = utf8_first_byte!(y, 3);
192-
if utf8_is_cont_byte!(y) {
193-
let x = unwrap_or_0(it.next_back());
194-
ch = utf8_first_byte!(x, 4);
195-
ch = utf8_acc_cont_byte!(ch, y);
196-
}
197-
ch = utf8_acc_cont_byte!(ch, z);
198-
}
199-
ch = utf8_acc_cont_byte!(ch, w);
178+
let w = match self.iter.next_back() {
179+
None => return None,
180+
Some(&back_byte) if back_byte < 128 => return Some(back_byte as char),
181+
Some(&back_byte) => back_byte,
182+
};
200183

201-
unsafe {
202-
mem::transmute(ch)
184+
// Multibyte case follows
185+
// Decode from a byte combination out of: [x [y [z w]]]
186+
let mut ch;
187+
let z = unwrap_or_0(self.iter.next_back());
188+
ch = utf8_first_byte!(z, 2);
189+
if utf8_is_cont_byte!(z) {
190+
let y = unwrap_or_0(self.iter.next_back());
191+
ch = utf8_first_byte!(y, 3);
192+
if utf8_is_cont_byte!(y) {
193+
let x = unwrap_or_0(self.iter.next_back());
194+
ch = utf8_first_byte!(x, 4);
195+
ch = utf8_acc_cont_byte!(ch, y);
203196
}
197+
ch = utf8_acc_cont_byte!(ch, z);
204198
}
199+
ch = utf8_acc_cont_byte!(ch, w);
205200

206-
match self.iter.next_back() {
207-
None => None,
208-
Some(&back_byte) => {
209-
if back_byte < 128 {
210-
Some(back_byte as char)
211-
} else {
212-
Some(decode_multibyte_back(back_byte, &mut self.iter))
213-
}
214-
}
201+
// str invariant says `ch` is a valid Unicode Scalar Value
202+
unsafe {
203+
Some(mem::transmute(ch))
215204
}
216205
}
217206
}

0 commit comments

Comments
 (0)