@@ -364,9 +364,90 @@ pub trait DoubleEndedSearcher<'a>: ReverseSearcher<'a> {}
364
364
// Impl for char
365
365
/////////////////////////////////////////////////////////////////////////////
366
366
367
+ #[ derive( Clone , Debug ) ]
368
+ /// hah
369
+ pub struct AsciiCharSearcher < ' a > {
370
+ haystack : & ' a str ,
371
+ needle : u8 ,
372
+ finger : usize ,
373
+ finger_back : usize ,
374
+ }
375
+
376
+ unsafe impl < ' a > Searcher < ' a > for AsciiCharSearcher < ' a > {
377
+ fn haystack ( & self ) -> & ' a str {
378
+ self . haystack
379
+ }
380
+
381
+ #[ inline]
382
+ fn next ( & mut self ) -> SearchStep {
383
+ let byte = self . haystack . as_bytes ( ) . get ( self . finger ) ;
384
+ if let Some ( & byte) = byte {
385
+ self . finger += 1 ;
386
+ if byte == self . needle {
387
+ SearchStep :: Match ( self . finger - 1 , self . finger )
388
+ } else {
389
+ SearchStep :: Reject ( self . finger - 1 , self . finger )
390
+ }
391
+ } else {
392
+ SearchStep :: Done
393
+ }
394
+ }
395
+
396
+ #[ inline( always) ]
397
+ fn next_match ( & mut self ) -> Option < ( usize , usize ) > {
398
+ match memchr:: memchr ( self . needle , unsafe {
399
+ & self . haystack . as_bytes ( ) . get_unchecked ( self . finger ..self . finger_back )
400
+ } ) {
401
+ Some ( x) => {
402
+ self . finger += x + 1 ;
403
+ Some ( ( self . finger - 1 , self . finger ) )
404
+ }
405
+ None => None ,
406
+ }
407
+ }
408
+
409
+ // let next_reject use the default implementation from the Searcher trait
410
+ }
411
+
412
+ unsafe impl < ' a > ReverseSearcher < ' a > for AsciiCharSearcher < ' a > {
413
+ #[ inline]
414
+ fn next_back ( & mut self ) -> SearchStep {
415
+ let old_finger = self . finger_back ;
416
+ let slice = unsafe { self . haystack . get_unchecked ( self . finger ..old_finger) } ;
417
+
418
+ let mut iter = slice. as_bytes ( ) . iter ( ) ;
419
+ let old_len = iter. len ( ) ;
420
+ if let Some ( ch) = iter. next_back ( ) {
421
+ self . finger_back -= old_len - iter. len ( ) ;
422
+ if * ch == self . needle {
423
+ SearchStep :: Match ( self . finger_back , old_finger)
424
+ } else {
425
+ SearchStep :: Reject ( self . finger_back , old_finger)
426
+ }
427
+ } else {
428
+ SearchStep :: Done
429
+ }
430
+ }
431
+
432
+ #[ inline]
433
+ fn next_match_back ( & mut self ) -> Option < ( usize , usize ) > {
434
+ match memchr:: memrchr ( self . needle , self . haystack [ self . finger ..self . finger_back ] . as_bytes ( ) )
435
+ {
436
+ Some ( x) => {
437
+ let index = self . finger + x;
438
+ self . finger_back = index;
439
+ Some ( ( self . finger_back , self . finger_back + 1 ) )
440
+ }
441
+ None => None ,
442
+ }
443
+ }
444
+
445
+ // let next_reject use the default implementation from the Searcher trait
446
+ }
447
+
367
448
/// Associated type for `<char as Pattern>::Searcher<'a>`.
368
449
#[ derive( Clone , Debug ) ]
369
- pub struct CharSearcher < ' a > {
450
+ pub struct UnicodeCharSearcher < ' a > {
370
451
haystack : & ' a str ,
371
452
// safety invariant: `finger`/`finger_back` must be a valid utf8 byte index of `haystack`
372
453
// This invariant can be broken *within* next_match and next_match_back, however
@@ -391,13 +472,13 @@ pub struct CharSearcher<'a> {
391
472
utf8_encoded : [ u8 ; 4 ] ,
392
473
}
393
474
394
- impl CharSearcher < ' _ > {
475
+ impl UnicodeCharSearcher < ' _ > {
395
476
fn utf8_size ( & self ) -> usize {
396
477
self . utf8_size . into ( )
397
478
}
398
479
}
399
480
400
- unsafe impl < ' a > Searcher < ' a > for CharSearcher < ' a > {
481
+ unsafe impl < ' a > Searcher < ' a > for UnicodeCharSearcher < ' a > {
401
482
#[ inline]
402
483
fn haystack ( & self ) -> & ' a str {
403
484
self . haystack
@@ -450,7 +531,7 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
450
531
//
451
532
// However, this is totally okay. While we have the invariant that
452
533
// self.finger is on a UTF8 boundary, this invariant is not relied upon
453
- // within this method (it is relied upon in CharSearcher ::next()).
534
+ // within this method (it is relied upon in UnicodeCharSearcher ::next()).
454
535
//
455
536
// We only exit this method when we reach the end of the string, or if we
456
537
// find something. When we find something the `finger` will be set
@@ -475,7 +556,7 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
475
556
// let next_reject use the default implementation from the Searcher trait
476
557
}
477
558
478
- unsafe impl < ' a > ReverseSearcher < ' a > for CharSearcher < ' a > {
559
+ unsafe impl < ' a > ReverseSearcher < ' a > for UnicodeCharSearcher < ' a > {
479
560
#[ inline]
480
561
fn next_back ( & mut self ) -> SearchStep {
481
562
let old_finger = self . finger_back ;
@@ -550,6 +631,57 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
550
631
}
551
632
552
633
impl < ' a > DoubleEndedSearcher < ' a > for CharSearcher < ' a > { }
634
+ #[ derive( Clone , Debug ) ]
635
+ ///h
636
+ pub enum CharSearcher < ' a > {
637
+ ///h
638
+ AsciiCharSearcher ( AsciiCharSearcher < ' a > ) ,
639
+ ///h
640
+ UnicodeCharSearcher ( UnicodeCharSearcher < ' a > ) ,
641
+ }
642
+ unsafe impl < ' a > Searcher < ' a > for CharSearcher < ' a > {
643
+ #[ inline]
644
+
645
+ fn haystack ( & self ) -> & ' a str {
646
+ let ( Self :: UnicodeCharSearcher ( UnicodeCharSearcher { haystack, .. } )
647
+ | Self :: AsciiCharSearcher ( AsciiCharSearcher { haystack, .. } ) ) = self ;
648
+ haystack
649
+ }
650
+ #[ inline]
651
+
652
+ fn next_match ( & mut self ) -> Option < ( usize , usize ) > {
653
+ match self {
654
+ CharSearcher :: AsciiCharSearcher ( x) => x. next_match ( ) ,
655
+ CharSearcher :: UnicodeCharSearcher ( x) => x. next_match ( ) ,
656
+ }
657
+ }
658
+ #[ inline]
659
+
660
+ fn next ( & mut self ) -> SearchStep {
661
+ match self {
662
+ CharSearcher :: AsciiCharSearcher ( x) => x. next ( ) ,
663
+ CharSearcher :: UnicodeCharSearcher ( x) => x. next ( ) ,
664
+ }
665
+ }
666
+ }
667
+ unsafe impl < ' a > ReverseSearcher < ' a > for CharSearcher < ' a > {
668
+ #[ inline]
669
+
670
+ fn next_back ( & mut self ) -> SearchStep {
671
+ match self {
672
+ CharSearcher :: AsciiCharSearcher ( x) => x. next_back ( ) ,
673
+ CharSearcher :: UnicodeCharSearcher ( x) => x. next_back ( ) ,
674
+ }
675
+ }
676
+ #[ inline]
677
+
678
+ fn next_match_back ( & mut self ) -> Option < ( usize , usize ) > {
679
+ match self {
680
+ CharSearcher :: AsciiCharSearcher ( x) => x. next_match_back ( ) ,
681
+ CharSearcher :: UnicodeCharSearcher ( x) => x. next_match_back ( ) ,
682
+ }
683
+ }
684
+ }
553
685
554
686
/// Searches for chars that are equal to a given [`char`].
555
687
///
@@ -563,20 +695,31 @@ impl Pattern for char {
563
695
564
696
#[ inline]
565
697
fn into_searcher < ' a > ( self , haystack : & ' a str ) -> Self :: Searcher < ' a > {
698
+ if ( self as u32 ) < 128 { }
566
699
let mut utf8_encoded = [ 0 ; MAX_LEN_UTF8 ] ;
567
700
let utf8_size = self
568
701
. encode_utf8 ( & mut utf8_encoded)
569
702
. len ( )
570
703
. try_into ( )
571
704
. expect ( "char len should be less than 255" ) ;
572
-
573
- CharSearcher {
574
- haystack,
575
- finger : 0 ,
576
- finger_back : haystack. len ( ) ,
577
- needle : self ,
578
- utf8_size,
579
- utf8_encoded,
705
+ if utf8_size == 1 {
706
+ CharSearcher :: AsciiCharSearcher ( AsciiCharSearcher {
707
+ haystack,
708
+ needle : utf8_encoded[ 0 ] ,
709
+ finger : 0 ,
710
+ finger_back : haystack. len ( ) ,
711
+ // available: None,
712
+ // available_back: None,
713
+ } )
714
+ } else {
715
+ CharSearcher :: UnicodeCharSearcher ( UnicodeCharSearcher {
716
+ haystack,
717
+ finger : 0 ,
718
+ finger_back : haystack. len ( ) ,
719
+ needle : self ,
720
+ utf8_size,
721
+ utf8_encoded,
722
+ } )
580
723
}
581
724
}
582
725
0 commit comments