Skip to content

Commit a5eb196

Browse files
Rollup merge of #76971 - bugadani:issue-75659, r=dtolnay
Refactor memchr to allow optimization Closes #75659 The implementation already uses naive search if the slice if short enough, but the case is complicated enough to not be optimized away. This PR refactors memchr so that it exists early when the slice is short enough. Codegen-wise, as shown in #75659, memchr was not inlined previously so the only way I could find to test this is to check if there is no memchr call. Let me know if there is a more robust solution here.
2 parents 24d0040 + 37f08c7 commit a5eb196

File tree

2 files changed

+59
-14
lines changed

2 files changed

+59
-14
lines changed

library/core/src/slice/memchr.rs

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,17 @@ pub fn memchr(x: u8, text: &[u8]) -> Option<usize> {
4646
// - body, scan by 2 words at a time
4747
// - the last remaining part, < 2 word size
4848
let len = text.len();
49-
let ptr = text.as_ptr();
5049
let usize_bytes = mem::size_of::<usize>();
5150

51+
// Fast path for small slices
52+
if len < 2 * usize_bytes {
53+
return text.iter().position(|elt| *elt == x);
54+
}
55+
5256
// search up to an aligned boundary
57+
let ptr = text.as_ptr();
5358
let mut offset = ptr.align_offset(usize_bytes);
59+
5460
if offset > 0 {
5561
offset = cmp::min(offset, len);
5662
if let Some(index) = text[..offset].iter().position(|elt| *elt == x) {
@@ -60,22 +66,19 @@ pub fn memchr(x: u8, text: &[u8]) -> Option<usize> {
6066

6167
// search the body of the text
6268
let repeated_x = repeat_byte(x);
69+
while offset <= len - 2 * usize_bytes {
70+
unsafe {
71+
let u = *(ptr.add(offset) as *const usize);
72+
let v = *(ptr.add(offset + usize_bytes) as *const usize);
6373

64-
if len >= 2 * usize_bytes {
65-
while offset <= len - 2 * usize_bytes {
66-
unsafe {
67-
let u = *(ptr.add(offset) as *const usize);
68-
let v = *(ptr.add(offset + usize_bytes) as *const usize);
69-
70-
// break if there is a matching byte
71-
let zu = contains_zero_byte(u ^ repeated_x);
72-
let zv = contains_zero_byte(v ^ repeated_x);
73-
if zu || zv {
74-
break;
75-
}
74+
// break if there is a matching byte
75+
let zu = contains_zero_byte(u ^ repeated_x);
76+
let zv = contains_zero_byte(v ^ repeated_x);
77+
if zu || zv {
78+
break;
7679
}
77-
offset += usize_bytes * 2;
7880
}
81+
offset += usize_bytes * 2;
7982
}
8083

8184
// Find the byte after the point the body loop stopped.

src/test/codegen/issue-75659.rs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
// This test checks that the call to memchr is optimized away when searching in small slices.
2+
3+
// compile-flags: -O
4+
5+
#![crate_type = "lib"]
6+
7+
type T = u8;
8+
9+
// CHECK-LABEL: @foo1
10+
#[no_mangle]
11+
pub fn foo1(x: T, data: &[T; 1]) -> bool {
12+
// CHECK-NOT: memchr
13+
data.contains(&x)
14+
}
15+
16+
// CHECK-LABEL: @foo2
17+
#[no_mangle]
18+
pub fn foo2(x: T, data: &[T; 2]) -> bool {
19+
// CHECK-NOT: memchr
20+
data.contains(&x)
21+
}
22+
23+
// CHECK-LABEL: @foo3
24+
#[no_mangle]
25+
pub fn foo3(x: T, data: &[T; 3]) -> bool {
26+
// CHECK-NOT: memchr
27+
data.contains(&x)
28+
}
29+
30+
// CHECK-LABEL: @foo4
31+
#[no_mangle]
32+
pub fn foo4(x: T, data: &[T; 4]) -> bool {
33+
// CHECK-NOT: memchr
34+
data.contains(&x)
35+
}
36+
37+
// CHECK-LABEL: @foo16
38+
#[no_mangle]
39+
pub fn foo16(x: T, data: &[T; 16]) -> bool {
40+
// CHECK-NOT: memchr
41+
data.contains(&x)
42+
}

0 commit comments

Comments
 (0)