@@ -426,53 +426,49 @@ impl<'a> SliceRead<'a> {
426
426
}
427
427
}
428
428
429
- #[ inline( always) ]
430
429
fn skip_to_escape ( & mut self , forbid_control_characters : bool ) {
431
430
let rest = & self . slice [ self . index ..] ;
432
- let end = self . index + memchr:: memchr2 ( b'"' , b'\\' , rest) . unwrap_or ( rest. len ( ) ) ;
433
431
434
432
if !forbid_control_characters {
435
- self . index = end ;
433
+ self . index += memchr :: memchr2 ( b'"' , b'\\' , rest ) . unwrap_or ( rest . len ( ) ) ;
436
434
return ;
437
435
}
438
436
439
- // We now wish to check if the chunk contains a byte in range 0x00..=0x1F. Ideally, this
440
- // would be integrated this into the memchr2 check above, but memchr does not support this
441
- // at the moment. Therefore, use a variation on Mycroft's algorithm [1] to provide
442
- // performance better than a naive loop. It runs faster than just a single memchr call on
443
- // benchmarks and is faster than both SSE2 and AVX-based code, and it's cross-platform, so
444
- // probably the right fit.
437
+ // We wish to find the first byte in range 0x00..=0x1F or " or \. Ideally, we'd use
438
+ // something akin to memchr3, but the memchr crate does not support this at the moment.
439
+ // Therefore, we use a variation on Mycroft's algorithm [1] to provide performance better
440
+ // than a naive loop. It runs faster than equivalent two-pass memchr2+SWAR code on
441
+ // benchmarks and it's cross-platform, so probably the right fit.
445
442
// [1]: https://groups.google.com/forum/#!original/comp.lang.c/2HtQXvg7iKc/xOJeipH6KLMJ
446
- const STEP : usize = mem:: size_of :: < usize > ( ) ;
447
-
448
- // Moving this to a local variable removes a spill in the hot loop.
449
- let mut index = self . index ;
450
-
451
- if self . slice . len ( ) >= STEP {
452
- while index < end. min ( self . slice . len ( ) - STEP + 1 ) {
453
- // We can safely overread past end in most cases. This ensures that SWAR code is
454
- // used to handle the tail in the hot path.
455
- const ONE_BYTES : usize = usize:: MAX / 255 ;
456
- let chars = usize:: from_ne_bytes ( self . slice [ index..] [ ..STEP ] . try_into ( ) . unwrap ( ) ) ;
457
- let mask = chars. wrapping_sub ( ONE_BYTES * 0x20 ) & !chars & ( ONE_BYTES << 7 ) ;
458
-
459
- if mask != 0 {
460
- index += mask. trailing_zeros ( ) as usize / 8 ;
461
- break ;
462
- }
463
-
464
- index += STEP ;
465
- }
466
- }
467
-
468
- if index < end {
469
- if let Some ( offset) = self . slice [ index..end] . iter ( ) . position ( |& c| c <= 0x1F ) {
470
- self . index = index + offset;
443
+ type Chunk = usize ;
444
+ const STEP : usize = mem:: size_of :: < Chunk > ( ) ;
445
+ const ONE_BYTES : Chunk = Chunk :: MAX / 255 ; // 0x0101...01
446
+
447
+ for chunk in rest. chunks_exact ( STEP ) {
448
+ let chars = Chunk :: from_ne_bytes ( chunk. try_into ( ) . unwrap ( ) ) ;
449
+ let contains_ctrl = chars. wrapping_sub ( ONE_BYTES * 0x20 ) & !chars;
450
+ let chars_quote = chars ^ ( ONE_BYTES * Chunk :: from ( b'"' ) ) ;
451
+ let contains_quote = chars_quote. wrapping_sub ( ONE_BYTES ) & !chars_quote;
452
+ let chars_backslash = chars ^ ( ONE_BYTES * Chunk :: from ( b'\\' ) ) ;
453
+ let contains_backslash = chars_backslash. wrapping_sub ( ONE_BYTES ) & !chars_backslash;
454
+ let masked = ( contains_ctrl | contains_quote | contains_backslash) & ( ONE_BYTES << 7 ) ;
455
+ if masked != 0 {
456
+ // SAFETY: chunk is in-bounds for slice
457
+ self . index = unsafe { chunk. as_ptr ( ) . offset_from ( self . slice . as_ptr ( ) ) } as usize
458
+ + masked. trailing_zeros ( ) as usize / 8 ;
471
459
return ;
472
460
}
473
461
}
474
462
475
- self . index = end;
463
+ self . skip_to_escape_slow ( ) ;
464
+ }
465
+
466
+ #[ cold]
467
+ #[ inline( never) ]
468
+ fn skip_to_escape_slow ( & mut self ) {
469
+ while self . index < self . slice . len ( ) && !is_escape ( self . slice [ self . index ] ) {
470
+ self . index += 1 ;
471
+ }
476
472
}
477
473
478
474
/// The big optimization here over IoRead is that if the string contains no
@@ -823,8 +819,6 @@ pub trait Fused: private::Sealed {}
823
819
impl < ' a > Fused for SliceRead < ' a > { }
824
820
impl < ' a > Fused for StrRead < ' a > { }
825
821
826
- // This is only used in IoRead. SliceRead hardcodes the arguments to memchr.
827
- #[ cfg( feature = "std" ) ]
828
822
fn is_escape ( ch : u8 ) -> bool {
829
823
ch == b'"' || ch == b'\\' || ch < 0x20
830
824
}
0 commit comments