@@ -2,6 +2,7 @@ use crate::error::{Error, ErrorCode, Result};
2
2
use alloc:: vec:: Vec ;
3
3
use core:: char;
4
4
use core:: cmp;
5
+ use core:: mem;
5
6
use core:: ops:: Deref ;
6
7
use core:: str;
7
8
@@ -425,6 +426,51 @@ impl<'a> SliceRead<'a> {
425
426
}
426
427
}
427
428
429
+ #[ inline( always) ]
430
+ fn skip_to_escape ( & mut self , forbid_control_characters : bool ) {
431
+ let rest = & self . slice [ self . index ..] ;
432
+ let end = self . index + memchr:: memchr2 ( b'"' , b'\\' , rest) . unwrap_or ( rest. len ( ) ) ;
433
+
434
+ if !forbid_control_characters {
435
+ self . index = end;
436
+ return ;
437
+ }
438
+
439
+ // We now wish to check if the chunk contains a byte in range 0x00..=0x1F. Ideally, this
440
+ // would be integrated this into the memchr2 check above, but memchr does not support this
441
+ // at the moment. Therefore, use a variation on Mycroft's algorithm [1] to provide
442
+ // performance better than a naive loop. It runs faster than just a single memchr call on
443
+ // benchmarks and is faster than both SSE2 and AVX-based code, and it's cross-platform, so
444
+ // probably the right fit.
445
+ // [1]: https://groups.google.com/forum/#!original/comp.lang.c/2HtQXvg7iKc/xOJeipH6KLMJ
446
+
447
+ // Pad the chunk to a whole count of units if possible. This ensures that SWAR code is used
448
+ // to handle the tail in the hot path.
449
+ let block_end = ( self . index + ( end - self . index ) . next_multiple_of ( mem:: size_of :: < usize > ( ) ) )
450
+ . min ( self . slice . len ( ) ) ;
451
+ let mut block = & self . slice [ self . index ..block_end] ;
452
+
453
+ while let Some ( ( chars, block_remainder) ) = block. split_first_chunk ( ) {
454
+ const ONE_BYTES : usize = usize:: MAX / 255 ;
455
+ let chars = usize:: from_ne_bytes ( * chars) ;
456
+ let mask = chars. wrapping_sub ( ONE_BYTES * 0x20 ) & !chars & ( ONE_BYTES << 7 ) ;
457
+
458
+ if mask != 0 {
459
+ let control_index = block_end - block. len ( ) + mask. trailing_zeros ( ) as usize / 8 ;
460
+ self . index = control_index. min ( end) ;
461
+ return ;
462
+ }
463
+
464
+ block = block_remainder;
465
+ }
466
+
467
+ if let Some ( offset) = block. iter ( ) . position ( |& c| c <= 0x1F ) {
468
+ self . index = ( block_end - block. len ( ) + offset) . min ( end) ;
469
+ } else {
470
+ self . index = end;
471
+ }
472
+ }
473
+
428
474
/// The big optimization here over IoRead is that if the string contains no
429
475
/// backslash escape sequences, the returned &str is a slice of the raw JSON
430
476
/// data so we avoid copying into the scratch space.
@@ -442,9 +488,7 @@ impl<'a> SliceRead<'a> {
442
488
let mut start = self . index ;
443
489
444
490
loop {
445
- while self . index < self . slice . len ( ) && !ESCAPE [ self . slice [ self . index ] as usize ] {
446
- self . index += 1 ;
447
- }
491
+ self . skip_to_escape ( validate) ;
448
492
if self . index == self . slice . len ( ) {
449
493
return error ( self , ErrorCode :: EofWhileParsingString ) ;
450
494
}
@@ -470,9 +514,7 @@ impl<'a> SliceRead<'a> {
470
514
}
471
515
_ => {
472
516
self . index += 1 ;
473
- if validate {
474
- return error ( self , ErrorCode :: ControlCharacterWhileParsingString ) ;
475
- }
517
+ return error ( self , ErrorCode :: ControlCharacterWhileParsingString ) ;
476
518
}
477
519
}
478
520
}
@@ -538,9 +580,7 @@ impl<'a> Read<'a> for SliceRead<'a> {
538
580
539
581
fn ignore_str ( & mut self ) -> Result < ( ) > {
540
582
loop {
541
- while self . index < self . slice . len ( ) && !ESCAPE [ self . slice [ self . index ] as usize ] {
542
- self . index += 1 ;
543
- }
583
+ self . skip_to_escape ( true ) ;
544
584
if self . index == self . slice . len ( ) {
545
585
return error ( self , ErrorCode :: EofWhileParsingString ) ;
546
586
}
0 commit comments