@@ -39,6 +39,7 @@ use crate::result::Result::{Err, Ok};
39
39
/// Pure rust memchr implementation, taken from rust-memchr
40
40
pub mod memchr;
41
41
42
+ mod ascii;
42
43
mod cmp;
43
44
mod index;
44
45
mod iter;
@@ -3197,163 +3198,6 @@ impl<T> [T] {
3197
3198
}
3198
3199
}
3199
3200
3200
- #[ lang = "slice_u8" ]
3201
- #[ cfg( not( test) ) ]
3202
- impl [ u8 ] {
3203
- /// Checks if all bytes in this slice are within the ASCII range.
3204
- #[ stable( feature = "ascii_methods_on_intrinsics" , since = "1.23.0" ) ]
3205
- #[ inline]
3206
- pub fn is_ascii ( & self ) -> bool {
3207
- is_ascii ( self )
3208
- }
3209
-
3210
- /// Checks that two slices are an ASCII case-insensitive match.
3211
- ///
3212
- /// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`,
3213
- /// but without allocating and copying temporaries.
3214
- #[ stable( feature = "ascii_methods_on_intrinsics" , since = "1.23.0" ) ]
3215
- #[ inline]
3216
- pub fn eq_ignore_ascii_case ( & self , other : & [ u8 ] ) -> bool {
3217
- self . len ( ) == other. len ( ) && self . iter ( ) . zip ( other) . all ( |( a, b) | a. eq_ignore_ascii_case ( b) )
3218
- }
3219
-
3220
- /// Converts this slice to its ASCII upper case equivalent in-place.
3221
- ///
3222
- /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
3223
- /// but non-ASCII letters are unchanged.
3224
- ///
3225
- /// To return a new uppercased value without modifying the existing one, use
3226
- /// [`to_ascii_uppercase`].
3227
- ///
3228
- /// [`to_ascii_uppercase`]: #method.to_ascii_uppercase
3229
- #[ stable( feature = "ascii_methods_on_intrinsics" , since = "1.23.0" ) ]
3230
- #[ inline]
3231
- pub fn make_ascii_uppercase ( & mut self ) {
3232
- for byte in self {
3233
- byte. make_ascii_uppercase ( ) ;
3234
- }
3235
- }
3236
-
3237
- /// Converts this slice to its ASCII lower case equivalent in-place.
3238
- ///
3239
- /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
3240
- /// but non-ASCII letters are unchanged.
3241
- ///
3242
- /// To return a new lowercased value without modifying the existing one, use
3243
- /// [`to_ascii_lowercase`].
3244
- ///
3245
- /// [`to_ascii_lowercase`]: #method.to_ascii_lowercase
3246
- #[ stable( feature = "ascii_methods_on_intrinsics" , since = "1.23.0" ) ]
3247
- #[ inline]
3248
- pub fn make_ascii_lowercase ( & mut self ) {
3249
- for byte in self {
3250
- byte. make_ascii_lowercase ( ) ;
3251
- }
3252
- }
3253
- }
3254
-
3255
- /// Returns `true` if any byte in the word `v` is nonascii (>= 128). Snarfed
3256
- /// from `../str/mod.rs`, which does something similar for utf8 validation.
3257
- #[ inline]
3258
- fn contains_nonascii ( v : usize ) -> bool {
3259
- const NONASCII_MASK : usize = 0x80808080_80808080u64 as usize ;
3260
- ( NONASCII_MASK & v) != 0
3261
- }
3262
-
3263
- /// Optimized ASCII test that will use usize-at-a-time operations instead of
3264
- /// byte-at-a-time operations (when possible).
3265
- ///
3266
- /// The algorithm we use here is pretty simple. If `s` is too short, we just
3267
- /// check each byte and be done with it. Otherwise:
3268
- ///
3269
- /// - Read the first word with an unaligned load.
3270
- /// - Align the pointer, read subsequent words until end with aligned loads.
3271
- /// - Read the last `usize` from `s` with an unaligned load.
3272
- ///
3273
- /// If any of these loads produces something for which `contains_nonascii`
3274
- /// (above) returns true, then we know the answer is false.
3275
- #[ inline]
3276
- fn is_ascii ( s : & [ u8 ] ) -> bool {
3277
- const USIZE_SIZE : usize = mem:: size_of :: < usize > ( ) ;
3278
-
3279
- let len = s. len ( ) ;
3280
- let align_offset = s. as_ptr ( ) . align_offset ( USIZE_SIZE ) ;
3281
-
3282
- // If we wouldn't gain anything from the word-at-a-time implementation, fall
3283
- // back to a scalar loop.
3284
- //
3285
- // We also do this for architectures where `size_of::<usize>()` isn't
3286
- // sufficient alignment for `usize`, because it's a weird edge case.
3287
- if len < USIZE_SIZE || len < align_offset || USIZE_SIZE < mem:: align_of :: < usize > ( ) {
3288
- return s. iter ( ) . all ( |b| b. is_ascii ( ) ) ;
3289
- }
3290
-
3291
- // We always read the first word unaligned, which means `align_offset` is
3292
- // 0, we'd read the same value again for the aligned read.
3293
- let offset_to_aligned = if align_offset == 0 { USIZE_SIZE } else { align_offset } ;
3294
-
3295
- let start = s. as_ptr ( ) ;
3296
- // SAFETY: We verify `len < USIZE_SIZE` above.
3297
- let first_word = unsafe { ( start as * const usize ) . read_unaligned ( ) } ;
3298
-
3299
- if contains_nonascii ( first_word) {
3300
- return false ;
3301
- }
3302
- // We checked this above, somewhat implicitly. Note that `offset_to_aligned`
3303
- // is either `align_offset` or `USIZE_SIZE`, both of are explicitly checked
3304
- // above.
3305
- debug_assert ! ( offset_to_aligned <= len) ;
3306
-
3307
- // SAFETY: word_ptr is the (properly aligned) usize ptr we use to read the
3308
- // middle chunk of the slice.
3309
- let mut word_ptr = unsafe { start. add ( offset_to_aligned) as * const usize } ;
3310
-
3311
- // `byte_pos` is the byte index of `word_ptr`, used for loop end checks.
3312
- let mut byte_pos = offset_to_aligned;
3313
-
3314
- // Paranoia check about alignment, since we're about to do a bunch of
3315
- // unaligned loads. In practice this should be impossible barring a bug in
3316
- // `align_offset` though.
3317
- debug_assert_eq ! ( ( word_ptr as usize ) % mem:: align_of:: <usize >( ) , 0 ) ;
3318
-
3319
- // Read subsequent words until the last aligned word, excluding the last
3320
- // aligned word by itself to be done in tail check later, to ensure that
3321
- // tail is always one `usize` at most to extra branch `byte_pos == len`.
3322
- while byte_pos < len - USIZE_SIZE {
3323
- debug_assert ! (
3324
- // Sanity check that the read is in bounds
3325
- ( word_ptr as usize + USIZE_SIZE ) <= ( start. wrapping_add( len) as usize ) &&
3326
- // And that our assumptions about `byte_pos` hold.
3327
- ( word_ptr as usize ) - ( start as usize ) == byte_pos
3328
- ) ;
3329
-
3330
- // SAFETY: We know `word_ptr` is properly aligned (because of
3331
- // `align_offset`), and we know that we have enough bytes between `word_ptr` and the end
3332
- let word = unsafe { word_ptr. read ( ) } ;
3333
- if contains_nonascii ( word) {
3334
- return false ;
3335
- }
3336
-
3337
- byte_pos += USIZE_SIZE ;
3338
- // SAFETY: We know that `byte_pos <= len - USIZE_SIZE`, which means that
3339
- // after this `add`, `word_ptr` will be at most one-past-the-end.
3340
- word_ptr = unsafe { word_ptr. add ( 1 ) } ;
3341
- }
3342
-
3343
- // Sanity check to ensure there really is only one `usize` left. This should
3344
- // be guaranteed by our loop condition.
3345
- debug_assert ! ( byte_pos <= len && len - byte_pos <= USIZE_SIZE ) ;
3346
-
3347
- // SAFETY: This relies on `len >= USIZE_SIZE`, which we check at the start.
3348
- let last_word = unsafe { ( start. add ( len - USIZE_SIZE ) as * const usize ) . read_unaligned ( ) } ;
3349
-
3350
- !contains_nonascii ( last_word)
3351
- }
3352
-
3353
- ////////////////////////////////////////////////////////////////////////////////
3354
- // Common traits
3355
- ////////////////////////////////////////////////////////////////////////////////
3356
-
3357
3201
#[ stable( feature = "rust1" , since = "1.0.0" ) ]
3358
3202
impl < T > Default for & [ T ] {
3359
3203
/// Creates an empty slice.
0 commit comments