Skip to content

Commit 5496579

Browse files
committed
Inline memchr2 logic into Mycroft's algorithm
1 parent 3063d69 commit 5496579

File tree

1 file changed

+31
-37
lines changed

1 file changed

+31
-37
lines changed

src/read.rs

Lines changed: 31 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -426,53 +426,49 @@ impl<'a> SliceRead<'a> {
426426
}
427427
}
428428

429-
#[inline(always)]
430429
fn skip_to_escape(&mut self, forbid_control_characters: bool) {
431430
let rest = &self.slice[self.index..];
432-
let end = self.index + memchr::memchr2(b'"', b'\\', rest).unwrap_or(rest.len());
433431

434432
if !forbid_control_characters {
435-
self.index = end;
433+
self.index += memchr::memchr2(b'"', b'\\', rest).unwrap_or(rest.len());
436434
return;
437435
}
438436

439-
// We now wish to check if the chunk contains a byte in range 0x00..=0x1F. Ideally, this
440-
// would be integrated this into the memchr2 check above, but memchr does not support this
441-
// at the moment. Therefore, use a variation on Mycroft's algorithm [1] to provide
442-
// performance better than a naive loop. It runs faster than just a single memchr call on
443-
// benchmarks and is faster than both SSE2 and AVX-based code, and it's cross-platform, so
444-
// probably the right fit.
437+
// We wish to find the first byte in range 0x00..=0x1F or " or \. Ideally, we'd use
438+
// something akin to memchr3, but the memchr crate does not support this at the moment.
439+
// Therefore, we use a variation on Mycroft's algorithm [1] to provide performance better
440+
// than a naive loop. It runs faster than equivalent two-pass memchr2+SWAR code on
441+
// benchmarks and it's cross-platform, so probably the right fit.
445442
// [1]: https://groups.google.com/forum/#!original/comp.lang.c/2HtQXvg7iKc/xOJeipH6KLMJ
446-
const STEP: usize = mem::size_of::<usize>();
447-
448-
// Moving this to a local variable removes a spill in the hot loop.
449-
let mut index = self.index;
450-
451-
if self.slice.len() >= STEP {
452-
while index < end.min(self.slice.len() - STEP + 1) {
453-
// We can safely overread past end in most cases. This ensures that SWAR code is
454-
// used to handle the tail in the hot path.
455-
const ONE_BYTES: usize = usize::MAX / 255;
456-
let chars = usize::from_ne_bytes(self.slice[index..][..STEP].try_into().unwrap());
457-
let mask = chars.wrapping_sub(ONE_BYTES * 0x20) & !chars & (ONE_BYTES << 7);
458-
459-
if mask != 0 {
460-
index += mask.trailing_zeros() as usize / 8;
461-
break;
462-
}
463-
464-
index += STEP;
465-
}
466-
}
467-
468-
if index < end {
469-
if let Some(offset) = self.slice[index..end].iter().position(|&c| c <= 0x1F) {
470-
self.index = index + offset;
443+
type Chunk = usize;
444+
const STEP: usize = mem::size_of::<Chunk>();
445+
const ONE_BYTES: Chunk = Chunk::MAX / 255; // 0x0101...01
446+
447+
for chunk in rest.chunks_exact(STEP) {
448+
let chars = Chunk::from_ne_bytes(chunk.try_into().unwrap());
449+
let contains_ctrl = chars.wrapping_sub(ONE_BYTES * 0x20) & !chars;
450+
let chars_quote = chars ^ (ONE_BYTES * Chunk::from(b'"'));
451+
let contains_quote = chars_quote.wrapping_sub(ONE_BYTES) & !chars_quote;
452+
let chars_backslash = chars ^ (ONE_BYTES * Chunk::from(b'\\'));
453+
let contains_backslash = chars_backslash.wrapping_sub(ONE_BYTES) & !chars_backslash;
454+
let masked = (contains_ctrl | contains_quote | contains_backslash) & (ONE_BYTES << 7);
455+
if masked != 0 {
456+
// SAFETY: chunk is in-bounds for slice
457+
self.index = unsafe { chunk.as_ptr().offset_from(self.slice.as_ptr()) } as usize
458+
+ masked.trailing_zeros() as usize / 8;
471459
return;
472460
}
473461
}
474462

475-
self.index = end;
463+
self.skip_to_escape_slow();
464+
}
465+
466+
#[cold]
467+
#[inline(never)]
468+
fn skip_to_escape_slow(&mut self) {
469+
while self.index < self.slice.len() && !is_escape(self.slice[self.index]) {
470+
self.index += 1;
471+
}
476472
}
477473

478474
/// The big optimization here over IoRead is that if the string contains no
@@ -823,8 +819,6 @@ pub trait Fused: private::Sealed {}
823819
impl<'a> Fused for SliceRead<'a> {}
824820
impl<'a> Fused for StrRead<'a> {}
825821

826-
// This is only used in IoRead. SliceRead hardcodes the arguments to memchr.
827-
#[cfg(feature = "std")]
828822
fn is_escape(ch: u8) -> bool {
829823
ch == b'"' || ch == b'\\' || ch < 0x20
830824
}

0 commit comments

Comments
 (0)