Skip to content

Commit 3faae03

Browse files
committed
Vectorize string parsing
1 parent eca2658 commit 3faae03

File tree

1 file changed

+49
-9
lines changed

1 file changed

+49
-9
lines changed

src/read.rs

Lines changed: 49 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use crate::error::{Error, ErrorCode, Result};
22
use alloc::vec::Vec;
33
use core::char;
44
use core::cmp;
5+
use core::mem;
56
use core::ops::Deref;
67
use core::str;
78

@@ -425,6 +426,51 @@ impl<'a> SliceRead<'a> {
425426
}
426427
}
427428

429+
#[inline(always)]
430+
fn skip_to_escape(&mut self, forbid_control_characters: bool) {
431+
let rest = &self.slice[self.index..];
432+
let end = self.index + memchr::memchr2(b'"', b'\\', rest).unwrap_or(rest.len());
433+
434+
if !forbid_control_characters {
435+
self.index = end;
436+
return;
437+
}
438+
439+
// We now wish to check if the chunk contains a byte in range 0x00..=0x1F. Ideally, this
440+
// would be integrated this into the memchr2 check above, but memchr does not support this
441+
// at the moment. Therefore, use a variation on Mycroft's algorithm [1] to provide
442+
// performance better than a naive loop. It runs faster than just a single memchr call on
443+
// benchmarks and is faster than both SSE2 and AVX-based code, and it's cross-platform, so
444+
// probably the right fit.
445+
// [1]: https://groups.google.com/forum/#!original/comp.lang.c/2HtQXvg7iKc/xOJeipH6KLMJ
446+
447+
// Pad the chunk to a whole count of units if possible. This ensures that SWAR code is used
448+
// to handle the tail in the hot path.
449+
let block_end = (self.index + (end - self.index).next_multiple_of(mem::size_of::<usize>()))
450+
.min(self.slice.len());
451+
let mut block = &self.slice[self.index..block_end];
452+
453+
while let Some((chars, block_remainder)) = block.split_first_chunk() {
454+
const ONE_BYTES: usize = usize::MAX / 255;
455+
let chars = usize::from_ne_bytes(*chars);
456+
let mask = chars.wrapping_sub(ONE_BYTES * 0x20) & !chars & (ONE_BYTES << 7);
457+
458+
if mask != 0 {
459+
let control_index = block_end - block.len() + mask.trailing_zeros() as usize / 8;
460+
self.index = control_index.min(end);
461+
return;
462+
}
463+
464+
block = block_remainder;
465+
}
466+
467+
if let Some(offset) = block.iter().position(|&c| c <= 0x1F) {
468+
self.index = (block_end - block.len() + offset).min(end);
469+
} else {
470+
self.index = end;
471+
}
472+
}
473+
428474
/// The big optimization here over IoRead is that if the string contains no
429475
/// backslash escape sequences, the returned &str is a slice of the raw JSON
430476
/// data so we avoid copying into the scratch space.
@@ -442,9 +488,7 @@ impl<'a> SliceRead<'a> {
442488
let mut start = self.index;
443489

444490
loop {
445-
while self.index < self.slice.len() && !ESCAPE[self.slice[self.index] as usize] {
446-
self.index += 1;
447-
}
491+
self.skip_to_escape(validate);
448492
if self.index == self.slice.len() {
449493
return error(self, ErrorCode::EofWhileParsingString);
450494
}
@@ -470,9 +514,7 @@ impl<'a> SliceRead<'a> {
470514
}
471515
_ => {
472516
self.index += 1;
473-
if validate {
474-
return error(self, ErrorCode::ControlCharacterWhileParsingString);
475-
}
517+
return error(self, ErrorCode::ControlCharacterWhileParsingString);
476518
}
477519
}
478520
}
@@ -538,9 +580,7 @@ impl<'a> Read<'a> for SliceRead<'a> {
538580

539581
fn ignore_str(&mut self) -> Result<()> {
540582
loop {
541-
while self.index < self.slice.len() && !ESCAPE[self.slice[self.index] as usize] {
542-
self.index += 1;
543-
}
583+
self.skip_to_escape(true);
544584
if self.index == self.slice.len() {
545585
return error(self, ErrorCode::EofWhileParsingString);
546586
}

0 commit comments

Comments
 (0)