@@ -613,22 +613,23 @@ pub unsafe fn from_boxed_utf8_unchecked(v: Box<[u8]>) -> Box<str> {
613
613
#[ cfg( not( test) ) ]
614
614
#[ cfg( not( no_global_oom_handling) ) ]
615
615
fn convert_while_ascii ( s : & str , convert : fn ( & u8 ) -> u8 ) -> ( String , & str ) {
616
- // Process the input in chunks to enable auto-vectorization
617
- const USIZE_SIZE : usize = mem:: size_of :: < usize > ( ) ;
618
- const MAGIC_UNROLL : usize = 2 ;
619
- const N : usize = USIZE_SIZE * MAGIC_UNROLL ;
616
+ // Process the input in chunks of 16 bytes to enable auto-vectorization.
617
+ // Previously the chunk size depended on the size of `usize`,
618
+ // but on 32-bit platforms with sse or neon is also the better choice.
619
+ // The only downside on other platforms would be a bit more loop-unrolling.
620
+ const N : usize = 16 ;
620
621
621
622
let mut slice = s. as_bytes ( ) ;
622
623
let mut out = Vec :: with_capacity ( slice. len ( ) ) ;
623
- let mut out_slice = & mut out. spare_capacity_mut ( ) [ ..slice . len ( ) ] ;
624
+ let mut out_slice = out. spare_capacity_mut ( ) ;
624
625
625
626
let mut ascii_prefix_len = 0_usize ;
626
627
let mut is_ascii = [ false ; N ] ;
627
628
628
629
while slice. len ( ) >= N {
629
630
// Safety: checked in loop condition
630
631
let chunk = unsafe { slice. get_unchecked ( ..N ) } ;
631
- // Safety: out_slice has same length as input slice and gets sliced with the same offsets
632
+ // Safety: out_slice has at least same length as input slice and gets sliced with the same offsets
632
633
let out_chunk = unsafe { out_slice. get_unchecked_mut ( ..N ) } ;
633
634
634
635
for j in 0 ..N {
@@ -639,6 +640,7 @@ fn convert_while_ascii(s: &str, convert: fn(&u8) -> u8) -> (String, &str) {
639
640
// size gives the best result, specifically a pmovmsk instruction on x86.
640
641
// There is a codegen test in `issue-123712-str-to-lower-autovectorization.rs` which should
641
642
// be updated when this method is changed.
643
+ // See also https://github.com/llvm/llvm-project/issues/96395
642
644
if is_ascii. iter ( ) . map ( |x| * x as u8 ) . sum :: < u8 > ( ) as usize != N {
643
645
break ;
644
646
}
0 commit comments