Skip to content

Commit 9109550

Browse files
committed
Optimize core::unicode::printable.
1 parent 4bcd646 commit 9109550

File tree

2 files changed

+122
-20
lines changed

2 files changed

+122
-20
lines changed

library/core/src/unicode/printable.py

Lines changed: 64 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -187,14 +187,28 @@ def main():
187187
// NOTE: The following code was generated by "library/core/src/unicode/printable.py",
188188
// do not edit directly!
189189
190-
fn check(x: u16, singletons_upper: &[(u8, u8)], singletons_lower: &[u8], normal: &[u8]) -> bool {
191-
let x_upper = (x >> 8) as u8;
190+
/// # Safety
191+
///
192+
/// - The sum of all lengths (i.e. the second field of each pair) in `singletons_upper` must be
193+
/// equal to the length of `singletons_lower`.
194+
/// - `normal` must be encoded such that lengths greater than `0x7f` consist of two bytes in big
195+
/// endian, with the highest bit set and the length contained in the remaining 15 bits.
196+
unsafe fn check(
197+
x: u16,
198+
singletons_upper: &[(u8, u8)],
199+
singletons_lower: &[u8],
200+
normal: &[u8],
201+
) -> bool {
202+
let [x_upper, x_lower] = x.to_be_bytes();
192203
let mut lower_start = 0;
193204
for &(upper, lower_count) in singletons_upper {
194205
let lower_end = lower_start + lower_count as usize;
195-
if x_upper == upper {
196-
for &lower in &singletons_lower[lower_start..lower_end] {
197-
if lower == x as u8 {
206+
if upper == x_upper {
207+
// SAFETY: The caller ensures that the sum of all lengths in `singletons_upper`
208+
// is equal to the length of `singletons_lower`, so `lower_end` is guaranteed to be
209+
// less than `singletons_lower.len()`.
210+
for &lower in unsafe { singletons_lower.get_unchecked(lower_start..lower_end) } {
211+
if lower == x_lower {
198212
return false;
199213
}
200214
}
@@ -209,9 +223,14 @@ def main():
209223
let mut current = true;
210224
while let Some(v) = normal.next() {
211225
let len = if v & 0x80 != 0 {
212-
((v & 0x7f) as i32) << 8 | normal.next().unwrap() as i32
226+
let upper = v & 0x7f;
227+
// SAFETY: The encoding of `normal` is guaranteed by the caller such that
228+
// if the length is greater than 0x7f, it consists of two bytes, so there
229+
// must be a next byte.
230+
let lower = unsafe { normal.next().unwrap_unchecked() };
231+
i32::from(u16::from_be_bytes([upper, lower]))
213232
} else {
214-
v as i32
233+
i32::from(v)
215234
};
216235
x -= len;
217236
if x < 0 {
@@ -229,8 +248,38 @@ def main():
229248
match x {
230249
..32 => false, // ASCII fast path
231250
..127 => true, // ASCII fast path
232-
..0x10000 => check(lower, SINGLETONS0_UPPER, SINGLETONS0_LOWER, NORMAL0),
233-
..0x20000 => check(lower, SINGLETONS1_UPPER, SINGLETONS1_LOWER, NORMAL1),\
251+
..0x10000 => {
252+
const {
253+
let mut lower_count_total = 0;
254+
let mut i = 0;
255+
while i < SINGLETONS0_UPPER.len() {
256+
lower_count_total += SINGLETONS0_UPPER[i].1 as usize;
257+
i += 1;
258+
}
259+
assert!(lower_count_total == SINGLETONS0_LOWER.len());
260+
}
261+
// SAFETY: We just asserted that the sum of all lengths in `SINGLETONS0_UPPER` is equal
262+
// to the length of `SINGLETONS0_LOWER`, and `NORMAL0` is encoded such that lengths
263+
// greater than `0x7f` consist of two bytes in big endian, with the highest bit set and
264+
// the length contained in the remaining 15 bits.
265+
unsafe { check(lower, SINGLETONS0_UPPER, SINGLETONS0_LOWER, NORMAL0) }
266+
}
267+
..0x20000 => {
268+
const {
269+
let mut lower_count_total = 0;
270+
let mut i = 0;
271+
while i < SINGLETONS1_UPPER.len() {
272+
lower_count_total += SINGLETONS1_UPPER[i].1 as usize;
273+
i += 1;
274+
}
275+
assert!(lower_count_total == SINGLETONS1_LOWER.len());
276+
}
277+
// SAFETY: We just asserted that the sum of all lengths in `SINGLETONS1_UPPER` is equal
278+
// to the length of `SINGLETONS1_LOWER`, and `NORMAL1` is encoded such that lengths
279+
// greater than `0x7f` consist of two bytes in big endian, with the highest bit set and
280+
// the length contained in the remaining 15 bits.
281+
unsafe { check(lower, SINGLETONS1_UPPER, SINGLETONS1_LOWER, NORMAL1) }
282+
}\
234283
""")
235284
for a, b in extra:
236285
print(" 0x{:x}..0x{:x} => false,".format(a, a + b))
@@ -240,8 +289,12 @@ def main():
240289
}\
241290
""")
242291
print()
243-
print_singletons(SINGLETONS0_UPPER, SINGLETONS0_LOWER, "SINGLETONS0_UPPER", "SINGLETONS0_LOWER")
244-
print_singletons(SINGLETONS1_UPPER, SINGLETONS1_LOWER, "SINGLETONS1_UPPER", "SINGLETONS1_LOWER")
292+
print_singletons(
293+
SINGLETONS0_UPPER, SINGLETONS0_LOWER, "SINGLETONS0_UPPER", "SINGLETONS0_LOWER"
294+
)
295+
print_singletons(
296+
SINGLETONS1_UPPER, SINGLETONS1_LOWER, "SINGLETONS1_UPPER", "SINGLETONS1_LOWER"
297+
)
245298
print_normal(normal0, "NORMAL0")
246299
print_normal(normal1, "NORMAL1")
247300

library/core/src/unicode/printable.rs

Lines changed: 58 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,28 @@
11
// NOTE: The following code was generated by "library/core/src/unicode/printable.py",
22
// do not edit directly!
33

4-
fn check(x: u16, singletons_upper: &[(u8, u8)], singletons_lower: &[u8], normal: &[u8]) -> bool {
5-
let x_upper = (x >> 8) as u8;
4+
/// # Safety
5+
///
6+
/// - The sum of all lengths (i.e. the second field of each pair) in `singletons_upper` must be
7+
/// equal to the length of `singletons_lower`.
8+
/// - `normal` must be encoded such that lengths greater than `0x7f` consist of two bytes in big
9+
/// endian, with the highest bit set and the length contained in the remaining 15 bits.
10+
unsafe fn check(
11+
x: u16,
12+
singletons_upper: &[(u8, u8)],
13+
singletons_lower: &[u8],
14+
normal: &[u8],
15+
) -> bool {
16+
let [x_upper, x_lower] = x.to_be_bytes();
617
let mut lower_start = 0;
718
for &(upper, lower_count) in singletons_upper {
819
let lower_end = lower_start + lower_count as usize;
9-
if x_upper == upper {
10-
for &lower in &singletons_lower[lower_start..lower_end] {
11-
if lower == x as u8 {
20+
if upper == x_upper {
21+
// SAFETY: The caller ensures that the sum of all lengths in `singletons_upper`
22+
// is equal to the length of `singletons_lower`, so `lower_end` is guaranteed to be
23+
// less than `singletons_lower.len()`.
24+
for &lower in unsafe { singletons_lower.get_unchecked(lower_start..lower_end) } {
25+
if lower == x_lower {
1226
return false;
1327
}
1428
}
@@ -23,9 +37,14 @@ fn check(x: u16, singletons_upper: &[(u8, u8)], singletons_lower: &[u8], normal:
2337
let mut current = true;
2438
while let Some(v) = normal.next() {
2539
let len = if v & 0x80 != 0 {
26-
((v & 0x7f) as i32) << 8 | normal.next().unwrap() as i32
40+
let upper = v & 0x7f;
41+
// SAFETY: The encoding of `normal` is guaranteed by the caller such that
42+
// if the length is greater than 0x7f, it consists of two bytes, so there
43+
// must be a next byte.
44+
let lower = unsafe { normal.next().unwrap_unchecked() };
45+
i32::from(u16::from_be_bytes([upper, lower]))
2746
} else {
28-
v as i32
47+
i32::from(v)
2948
};
3049
x -= len;
3150
if x < 0 {
@@ -43,8 +62,38 @@ pub(crate) fn is_printable(x: char) -> bool {
4362
match x {
4463
..32 => false, // ASCII fast path
4564
..127 => true, // ASCII fast path
46-
..0x10000 => check(lower, SINGLETONS0_UPPER, SINGLETONS0_LOWER, NORMAL0),
47-
..0x20000 => check(lower, SINGLETONS1_UPPER, SINGLETONS1_LOWER, NORMAL1),
65+
..0x10000 => {
66+
const {
67+
let mut lower_count_total = 0;
68+
let mut i = 0;
69+
while i < SINGLETONS0_UPPER.len() {
70+
lower_count_total += SINGLETONS0_UPPER[i].1 as usize;
71+
i += 1;
72+
}
73+
assert!(lower_count_total == SINGLETONS0_LOWER.len());
74+
}
75+
// SAFETY: We just asserted that the sum of all lengths in `SINGLETONS0_UPPER` is equal
76+
// to the length of `SINGLETONS0_LOWER`, and `NORMAL0` is encoded such that lengths
77+
// greater than `0x7f` consist of two bytes in big endian, with the highest bit set and
78+
// the length contained in the remaining 15 bits.
79+
unsafe { check(lower, SINGLETONS0_UPPER, SINGLETONS0_LOWER, NORMAL0) }
80+
}
81+
..0x20000 => {
82+
const {
83+
let mut lower_count_total = 0;
84+
let mut i = 0;
85+
while i < SINGLETONS1_UPPER.len() {
86+
lower_count_total += SINGLETONS1_UPPER[i].1 as usize;
87+
i += 1;
88+
}
89+
assert!(lower_count_total == SINGLETONS1_LOWER.len());
90+
}
91+
// SAFETY: We just asserted that the sum of all lengths in `SINGLETONS1_UPPER` is equal
92+
// to the length of `SINGLETONS1_LOWER`, and `NORMAL1` is encoded such that lengths
93+
// greater than `0x7f` consist of two bytes in big endian, with the highest bit set and
94+
// the length contained in the remaining 15 bits.
95+
unsafe { check(lower, SINGLETONS1_UPPER, SINGLETONS1_LOWER, NORMAL1) }
96+
}
4897
0x2a6e0..0x2a700 => false,
4998
0x2b73a..0x2b740 => false,
5099
0x2b81e..0x2b820 => false,

0 commit comments

Comments
 (0)