Skip to content
This repository was archived by the owner on May 28, 2025. It is now read-only.

Commit af8be47

Browse files
committed
Use a fixed chunk size also in production code and update some comments
1 parent 5901a2d commit af8be47

File tree

1 file changed

+8
-6
lines changed

1 file changed

+8
-6
lines changed

library/alloc/src/str.rs

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -613,22 +613,23 @@ pub unsafe fn from_boxed_utf8_unchecked(v: Box<[u8]>) -> Box<str> {
613613
#[cfg(not(test))]
614614
#[cfg(not(no_global_oom_handling))]
615615
fn convert_while_ascii(s: &str, convert: fn(&u8) -> u8) -> (String, &str) {
616-
// Process the input in chunks to enable auto-vectorization
617-
const USIZE_SIZE: usize = mem::size_of::<usize>();
618-
const MAGIC_UNROLL: usize = 2;
619-
const N: usize = USIZE_SIZE * MAGIC_UNROLL;
616+
// Process the input in chunks of 16 bytes to enable auto-vectorization.
617+
// Previously the chunk size depended on the size of `usize`,
618+
// but on 32-bit platforms with sse or neon is also the better choice.
619+
// The only downside on other platforms would be a bit more loop-unrolling.
620+
const N: usize = 16;
620621

621622
let mut slice = s.as_bytes();
622623
let mut out = Vec::with_capacity(slice.len());
623-
let mut out_slice = &mut out.spare_capacity_mut()[..slice.len()];
624+
let mut out_slice = out.spare_capacity_mut();
624625

625626
let mut ascii_prefix_len = 0_usize;
626627
let mut is_ascii = [false; N];
627628

628629
while slice.len() >= N {
629630
// Safety: checked in loop condition
630631
let chunk = unsafe { slice.get_unchecked(..N) };
631-
// Safety: out_slice has same length as input slice and gets sliced with the same offsets
632+
// Safety: out_slice has at least same length as input slice and gets sliced with the same offsets
632633
let out_chunk = unsafe { out_slice.get_unchecked_mut(..N) };
633634

634635
for j in 0..N {
@@ -639,6 +640,7 @@ fn convert_while_ascii(s: &str, convert: fn(&u8) -> u8) -> (String, &str) {
639640
// size gives the best result, specifically a pmovmsk instruction on x86.
640641
// There is a codegen test in `issue-123712-str-to-lower-autovectorization.rs` which should
641642
// be updated when this method is changed.
643+
// See also https://github.com/llvm/llvm-project/issues/96395
642644
if is_ascii.iter().map(|x| *x as u8).sum::<u8>() as usize != N {
643645
break;
644646
}

0 commit comments

Comments
 (0)