@@ -187,14 +187,28 @@ def main():
187
187
// NOTE: The following code was generated by "library/core/src/unicode/printable.py",
188
188
// do not edit directly!
189
189
190
- fn check(x: u16, singletons_upper: &[(u8, u8)], singletons_lower: &[u8], normal: &[u8]) -> bool {
191
- let x_upper = (x >> 8) as u8;
190
+ /// # Safety
191
+ ///
192
+ /// - The sum of all lengths (i.e. the second field of each pair) in `singletons_upper` must be
193
+ /// equal to the length of `singletons_lower`.
194
+ /// - `normal` must be encoded such that lengths greater than `0x7f` consist of two bytes in big
195
+ /// endian, with the highest bit set and the length contained in the remaining 15 bits.
196
+ unsafe fn check(
197
+ x: u16,
198
+ singletons_upper: &[(u8, u8)],
199
+ singletons_lower: &[u8],
200
+ normal: &[u8],
201
+ ) -> bool {
202
+ let [x_upper, x_lower] = x.to_be_bytes();
192
203
let mut lower_start = 0;
193
204
for &(upper, lower_count) in singletons_upper {
194
205
let lower_end = lower_start + lower_count as usize;
195
- if x_upper == upper {
196
- for &lower in &singletons_lower[lower_start..lower_end] {
197
- if lower == x as u8 {
206
+ if upper == x_upper {
207
+ // SAFETY: The caller ensures that the sum of all lengths in `singletons_upper`
208
+ // is equal to the length of `singletons_lower`, so `lower_end` is guaranteed to be
209
+ // less than `singletons_lower.len()`.
210
+ for &lower in unsafe { singletons_lower.get_unchecked(lower_start..lower_end) } {
211
+ if lower == x_lower {
198
212
return false;
199
213
}
200
214
}
@@ -209,9 +223,14 @@ def main():
209
223
let mut current = true;
210
224
while let Some(v) = normal.next() {
211
225
let len = if v & 0x80 != 0 {
212
- ((v & 0x7f) as i32) << 8 | normal.next().unwrap() as i32
226
+ let upper = v & 0x7f;
227
+ // SAFETY: The encoding of `normal` is guaranteed by the caller such that
228
+ // if the length is greater than 0x7f, it consists of two bytes, so there
229
+ // must be a next byte.
230
+ let lower = unsafe { normal.next().unwrap_unchecked() };
231
+ i32::from(u16::from_be_bytes([upper, lower]))
213
232
} else {
214
- v as i32
233
+ i32::from(v)
215
234
};
216
235
x -= len;
217
236
if x < 0 {
@@ -229,8 +248,38 @@ def main():
229
248
match x {
230
249
..32 => false, // ASCII fast path
231
250
..127 => true, // ASCII fast path
232
- ..0x10000 => check(lower, SINGLETONS0_UPPER, SINGLETONS0_LOWER, NORMAL0),
233
- ..0x20000 => check(lower, SINGLETONS1_UPPER, SINGLETONS1_LOWER, NORMAL1),\
251
+ ..0x10000 => {
252
+ const {
253
+ let mut lower_count_total = 0;
254
+ let mut i = 0;
255
+ while i < SINGLETONS0_UPPER.len() {
256
+ lower_count_total += SINGLETONS0_UPPER[i].1 as usize;
257
+ i += 1;
258
+ }
259
+ assert!(lower_count_total == SINGLETONS0_LOWER.len());
260
+ }
261
+ // SAFETY: We just asserted that the sum of all lengths in `SINGLETONS0_UPPER` is equal
262
+ // to the length of `SINGLETONS0_LOWER`, and `NORMAL0` is encoded such that lengths
263
+ // greater than `0x7f` consist of two bytes in big endian, with the highest bit set and
264
+ // the length contained in the remaining 15 bits.
265
+ unsafe { check(lower, SINGLETONS0_UPPER, SINGLETONS0_LOWER, NORMAL0) }
266
+ }
267
+ ..0x20000 => {
268
+ const {
269
+ let mut lower_count_total = 0;
270
+ let mut i = 0;
271
+ while i < SINGLETONS1_UPPER.len() {
272
+ lower_count_total += SINGLETONS1_UPPER[i].1 as usize;
273
+ i += 1;
274
+ }
275
+ assert!(lower_count_total == SINGLETONS1_LOWER.len());
276
+ }
277
+ // SAFETY: We just asserted that the sum of all lengths in `SINGLETONS1_UPPER` is equal
278
+ // to the length of `SINGLETONS1_LOWER`, and `NORMAL1` is encoded such that lengths
279
+ // greater than `0x7f` consist of two bytes in big endian, with the highest bit set and
280
+ // the length contained in the remaining 15 bits.
281
+ unsafe { check(lower, SINGLETONS1_UPPER, SINGLETONS1_LOWER, NORMAL1) }
282
+ }\
234
283
""" )
235
284
for a , b in extra :
236
285
print (" 0x{:x}..0x{:x} => false," .format (a , a + b ))
@@ -240,8 +289,12 @@ def main():
240
289
}\
241
290
""" )
242
291
print ()
243
- print_singletons (SINGLETONS0_UPPER , SINGLETONS0_LOWER , "SINGLETONS0_UPPER" , "SINGLETONS0_LOWER" )
244
- print_singletons (SINGLETONS1_UPPER , SINGLETONS1_LOWER , "SINGLETONS1_UPPER" , "SINGLETONS1_LOWER" )
292
+ print_singletons (
293
+ SINGLETONS0_UPPER , SINGLETONS0_LOWER , "SINGLETONS0_UPPER" , "SINGLETONS0_LOWER"
294
+ )
295
+ print_singletons (
296
+ SINGLETONS1_UPPER , SINGLETONS1_LOWER , "SINGLETONS1_UPPER" , "SINGLETONS1_LOWER"
297
+ )
245
298
print_normal (normal0 , "NORMAL0" )
246
299
print_normal (normal1 , "NORMAL1" )
247
300
0 commit comments