Skip to content

Commit c9dce1b

Browse files
committed
core: add internal core::str_bytes module handling string-like slices
Introduce a new core::str_bytes module with types and functions which handle string-like bytes slices. String-like means that they code treats UTF-8 byte sequences as characters within such slices but doesn’t assume that the slices are well-formed. A `str` is trivially a bytes sequence that the module can handle but so is OsStr (which is WTF-8 on Windows and unstructured bytes on Unix). Move bunch of code (most notably implementation of the two-way string-matching algorithm) from core::str to core::str_bytes. Note that this likely introduces regression in some of the str function performance (since the new code cannot assume well-formed UTF-8). This is going to be rectified by following commit which will make it again possible for the code to assume bytes format. This is not done in this commit to keep it smaller.
1 parent cc9bf61 commit c9dce1b

File tree

6 files changed

+1520
-836
lines changed

6 files changed

+1520
-836
lines changed

library/alloc/tests/str.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1972,7 +1972,7 @@ mod pattern {
19721972
str_searcher_multibyte_haystack,
19731973
" ",
19741974
"├──",
1975-
[Reject(0, 3), Reject(3, 6), Reject(6, 9),]
1975+
[Reject(0, 9),]
19761976
);
19771977
make_test!(
19781978
str_searcher_empty_needle_multibyte_haystack,
@@ -2008,13 +2008,13 @@ mod pattern {
20082008
char_searcher_multibyte_haystack,
20092009
' ',
20102010
"├──",
2011-
[Reject(0, 3), Reject(3, 6), Reject(6, 9),]
2011+
[Reject(0, 9),]
20122012
);
20132013
make_test!(
20142014
char_searcher_short_haystack,
20152015
'\u{1F4A9}',
20162016
"* \t",
2017-
[Reject(0, 1), Reject(1, 2), Reject(2, 3),]
2017+
[Reject(0, 3),]
20182018
);
20192019

20202020
// See #85462

library/core/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,8 @@ pub mod hash;
365365
pub mod pattern;
366366
pub mod slice;
367367
pub mod str;
368+
#[allow(missing_docs)]
369+
pub mod str_bytes;
368370
pub mod time;
369371

370372
pub mod unicode;

library/core/src/pattern.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,19 @@ pub trait SearchResult<T = usize>: Sized + sealed::Sealed {
211211
/// Value indicating searching has finished.
212212
const DONE: Self;
213213

214+
/// Whether search should return reject as soon as possible.
215+
///
216+
/// For example, if a search can quickly determine that the very next
217+
/// position cannot be where a next match starts, it should return a reject
218+
/// with that position. This is an optimisation which allows the algorithm
219+
/// to not waste time looking for the next match if caller is only
220+
/// interested in the next position of a reject.
221+
///
222+
/// If this is `true`, [`rejecting()`][Self::rejecting] is guaranteed to
223+
/// return `Some` and if this is `false`, [`matching()`][Self::matching] is
224+
/// guaranteed to return `Some`.
225+
const USE_EARLY_REJECT: bool;
226+
214227
/// Returns value describing a match or `None` if this implementation
215228
/// doesn’t care about matches.
216229
fn matching(start: T, end: T) -> Option<Self>;
@@ -232,6 +245,7 @@ pub struct RejectOnly<T = usize>(pub Option<(T, T)>);
232245

233246
impl<T> SearchResult<T> for SearchStep<T> {
234247
const DONE: Self = SearchStep::Done;
248+
const USE_EARLY_REJECT: bool = false;
235249

236250
#[inline(always)]
237251
fn matching(s: T, e: T) -> Option<Self> {
@@ -246,6 +260,7 @@ impl<T> SearchResult<T> for SearchStep<T> {
246260

247261
impl<T> SearchResult<T> for MatchOnly<T> {
248262
const DONE: Self = Self(None);
263+
const USE_EARLY_REJECT: bool = false;
249264

250265
#[inline(always)]
251266
fn matching(s: T, e: T) -> Option<Self> {
@@ -260,6 +275,7 @@ impl<T> SearchResult<T> for MatchOnly<T> {
260275

261276
impl<T> SearchResult<T> for RejectOnly<T> {
262277
const DONE: Self = Self(None);
278+
const USE_EARLY_REJECT: bool = true;
263279

264280
#[inline(always)]
265281
fn matching(_s: T, _e: T) -> Option<Self> {

0 commit comments

Comments
 (0)