Skip to content

Commit 9175272

Browse files
committed
an attempt
1 parent 038d599 commit 9175272

File tree

1 file changed

+156
-13
lines changed

1 file changed

+156
-13
lines changed

library/core/src/str/pattern.rs

Lines changed: 156 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -364,9 +364,90 @@ pub trait DoubleEndedSearcher<'a>: ReverseSearcher<'a> {}
364364
// Impl for char
365365
/////////////////////////////////////////////////////////////////////////////
366366

367+
#[derive(Clone, Debug)]
368+
/// hah
369+
pub struct AsciiCharSearcher<'a> {
370+
haystack: &'a str,
371+
needle: u8,
372+
finger: usize,
373+
finger_back: usize,
374+
}
375+
376+
unsafe impl<'a> Searcher<'a> for AsciiCharSearcher<'a> {
377+
fn haystack(&self) -> &'a str {
378+
self.haystack
379+
}
380+
381+
#[inline]
382+
fn next(&mut self) -> SearchStep {
383+
let byte = self.haystack.as_bytes().get(self.finger);
384+
if let Some(&byte) = byte {
385+
self.finger += 1;
386+
if byte == self.needle {
387+
SearchStep::Match(self.finger - 1, self.finger)
388+
} else {
389+
SearchStep::Reject(self.finger - 1, self.finger)
390+
}
391+
} else {
392+
SearchStep::Done
393+
}
394+
}
395+
396+
#[inline(always)]
397+
fn next_match(&mut self) -> Option<(usize, usize)> {
398+
match memchr::memchr(self.needle, unsafe {
399+
&self.haystack.as_bytes().get_unchecked(self.finger..self.finger_back)
400+
}) {
401+
Some(x) => {
402+
self.finger += x + 1;
403+
Some((self.finger - 1, self.finger))
404+
}
405+
None => None,
406+
}
407+
}
408+
409+
// let next_reject use the default implementation from the Searcher trait
410+
}
411+
412+
unsafe impl<'a> ReverseSearcher<'a> for AsciiCharSearcher<'a> {
413+
#[inline]
414+
fn next_back(&mut self) -> SearchStep {
415+
let old_finger = self.finger_back;
416+
let slice = unsafe { self.haystack.get_unchecked(self.finger..old_finger) };
417+
418+
let mut iter = slice.as_bytes().iter();
419+
let old_len = iter.len();
420+
if let Some(ch) = iter.next_back() {
421+
self.finger_back -= old_len - iter.len();
422+
if *ch == self.needle {
423+
SearchStep::Match(self.finger_back, old_finger)
424+
} else {
425+
SearchStep::Reject(self.finger_back, old_finger)
426+
}
427+
} else {
428+
SearchStep::Done
429+
}
430+
}
431+
432+
#[inline]
433+
fn next_match_back(&mut self) -> Option<(usize, usize)> {
434+
match memchr::memrchr(self.needle, self.haystack[self.finger..self.finger_back].as_bytes())
435+
{
436+
Some(x) => {
437+
let index = self.finger + x;
438+
self.finger_back = index;
439+
Some((self.finger_back, self.finger_back + 1))
440+
}
441+
None => None,
442+
}
443+
}
444+
445+
// let next_reject use the default implementation from the Searcher trait
446+
}
447+
367448
/// Associated type for `<char as Pattern>::Searcher<'a>`.
368449
#[derive(Clone, Debug)]
369-
pub struct CharSearcher<'a> {
450+
pub struct UnicodeCharSearcher<'a> {
370451
haystack: &'a str,
371452
// safety invariant: `finger`/`finger_back` must be a valid utf8 byte index of `haystack`
372453
// This invariant can be broken *within* next_match and next_match_back, however
@@ -391,13 +472,13 @@ pub struct CharSearcher<'a> {
391472
utf8_encoded: [u8; 4],
392473
}
393474

394-
impl CharSearcher<'_> {
475+
impl UnicodeCharSearcher<'_> {
395476
fn utf8_size(&self) -> usize {
396477
self.utf8_size.into()
397478
}
398479
}
399480

400-
unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
481+
unsafe impl<'a> Searcher<'a> for UnicodeCharSearcher<'a> {
401482
#[inline]
402483
fn haystack(&self) -> &'a str {
403484
self.haystack
@@ -450,7 +531,7 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
450531
//
451532
// However, this is totally okay. While we have the invariant that
452533
// self.finger is on a UTF8 boundary, this invariant is not relied upon
453-
// within this method (it is relied upon in CharSearcher::next()).
534+
// within this method (it is relied upon in UnicodeCharSearcher::next()).
454535
//
455536
// We only exit this method when we reach the end of the string, or if we
456537
// find something. When we find something the `finger` will be set
@@ -475,7 +556,7 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
475556
// let next_reject use the default implementation from the Searcher trait
476557
}
477558

478-
unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
559+
unsafe impl<'a> ReverseSearcher<'a> for UnicodeCharSearcher<'a> {
479560
#[inline]
480561
fn next_back(&mut self) -> SearchStep {
481562
let old_finger = self.finger_back;
@@ -550,6 +631,57 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
550631
}
551632

552633
impl<'a> DoubleEndedSearcher<'a> for CharSearcher<'a> {}
634+
#[derive(Clone, Debug)]
635+
///h
636+
pub enum CharSearcher<'a> {
637+
///h
638+
AsciiCharSearcher(AsciiCharSearcher<'a>),
639+
///h
640+
UnicodeCharSearcher(UnicodeCharSearcher<'a>),
641+
}
642+
unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
643+
#[inline]
644+
645+
fn haystack(&self) -> &'a str {
646+
let (Self::UnicodeCharSearcher(UnicodeCharSearcher { haystack, .. })
647+
| Self::AsciiCharSearcher(AsciiCharSearcher { haystack, .. })) = self;
648+
haystack
649+
}
650+
#[inline]
651+
652+
fn next_match(&mut self) -> Option<(usize, usize)> {
653+
match self {
654+
CharSearcher::AsciiCharSearcher(x) => x.next_match(),
655+
CharSearcher::UnicodeCharSearcher(x) => x.next_match(),
656+
}
657+
}
658+
#[inline]
659+
660+
fn next(&mut self) -> SearchStep {
661+
match self {
662+
CharSearcher::AsciiCharSearcher(x) => x.next(),
663+
CharSearcher::UnicodeCharSearcher(x) => x.next(),
664+
}
665+
}
666+
}
667+
unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
668+
#[inline]
669+
670+
fn next_back(&mut self) -> SearchStep {
671+
match self {
672+
CharSearcher::AsciiCharSearcher(x) => x.next_back(),
673+
CharSearcher::UnicodeCharSearcher(x) => x.next_back(),
674+
}
675+
}
676+
#[inline]
677+
678+
fn next_match_back(&mut self) -> Option<(usize, usize)> {
679+
match self {
680+
CharSearcher::AsciiCharSearcher(x) => x.next_match_back(),
681+
CharSearcher::UnicodeCharSearcher(x) => x.next_match_back(),
682+
}
683+
}
684+
}
553685

554686
/// Searches for chars that are equal to a given [`char`].
555687
///
@@ -563,20 +695,31 @@ impl Pattern for char {
563695

564696
#[inline]
565697
fn into_searcher<'a>(self, haystack: &'a str) -> Self::Searcher<'a> {
698+
if (self as u32) < 128 {}
566699
let mut utf8_encoded = [0; MAX_LEN_UTF8];
567700
let utf8_size = self
568701
.encode_utf8(&mut utf8_encoded)
569702
.len()
570703
.try_into()
571704
.expect("char len should be less than 255");
572-
573-
CharSearcher {
574-
haystack,
575-
finger: 0,
576-
finger_back: haystack.len(),
577-
needle: self,
578-
utf8_size,
579-
utf8_encoded,
705+
if utf8_size == 1 {
706+
CharSearcher::AsciiCharSearcher(AsciiCharSearcher {
707+
haystack,
708+
needle: utf8_encoded[0],
709+
finger: 0,
710+
finger_back: haystack.len(),
711+
// available: None,
712+
// available_back: None,
713+
})
714+
} else {
715+
CharSearcher::UnicodeCharSearcher(UnicodeCharSearcher {
716+
haystack,
717+
finger: 0,
718+
finger_back: haystack.len(),
719+
needle: self,
720+
utf8_size,
721+
utf8_encoded,
722+
})
580723
}
581724
}
582725

0 commit comments

Comments
 (0)