@@ -152,6 +152,31 @@ impl Utf8Sequence {
152
152
self . as_slice ( ) . len ( )
153
153
}
154
154
155
+ /// Reverses the ranges in this sequence.
156
+ ///
157
+ /// For example, if this corresponds to the following sequence:
158
+ ///
159
+ /// ```ignore
160
+ /// [D0-D3][80-BF]
161
+ /// ```
162
+ ///
163
+ /// Then after reversal, it will be
164
+ ///
165
+ /// ```ignore
166
+ /// [80-BF][D0-D3]
167
+ /// ```
168
+ ///
169
+ /// This is useful when one is constructing a UTF-8 automaton to match
170
+ /// character classes in reverse.
171
+ pub fn reverse ( & mut self ) {
172
+ match * self {
173
+ Utf8Sequence :: One ( _) => { }
174
+ Utf8Sequence :: Two ( ref mut x) => x. reverse ( ) ,
175
+ Utf8Sequence :: Three ( ref mut x) => x. reverse ( ) ,
176
+ Utf8Sequence :: Four ( ref mut x) => x. reverse ( ) ,
177
+ }
178
+ }
179
+
155
180
/// Returns true if and only if a prefix of `bytes` matches this sequence
156
181
/// of byte ranges.
157
182
pub fn matches ( & self , bytes : & [ u8 ] ) -> bool {
@@ -201,7 +226,7 @@ pub struct Utf8Range {
201
226
202
227
impl Utf8Range {
203
228
fn new ( start : u8 , end : u8 ) -> Self {
204
- Utf8Range { start : start , end : end }
229
+ Utf8Range { start, end }
205
230
}
206
231
207
232
/// Returns true if and only if the given byte is in this range.
@@ -294,7 +319,7 @@ impl Utf8Sequences {
294
319
}
295
320
296
321
fn push ( & mut self , start : u32 , end : u32 ) {
297
- self . range_stack . push ( ScalarRange { start : start , end : end } ) ;
322
+ self . range_stack . push ( ScalarRange { start, end } ) ;
298
323
}
299
324
}
300
325
@@ -507,6 +532,43 @@ mod tests {
507
532
) ;
508
533
}
509
534
535
+ #[ test]
536
+ fn reverse ( ) {
537
+ use utf8:: Utf8Sequence :: * ;
538
+
539
+ let mut s = One ( rutf8 ( 0xA , 0xB ) ) ;
540
+ s. reverse ( ) ;
541
+ assert_eq ! ( s. as_slice( ) , & [ rutf8( 0xA , 0xB ) ] ) ;
542
+
543
+ let mut s = Two ( [ rutf8 ( 0xA , 0xB ) , rutf8 ( 0xB , 0xC ) ] ) ;
544
+ s. reverse ( ) ;
545
+ assert_eq ! ( s. as_slice( ) , & [ rutf8( 0xB , 0xC ) , rutf8( 0xA , 0xB ) ] ) ;
546
+
547
+ let mut s = Three ( [ rutf8 ( 0xA , 0xB ) , rutf8 ( 0xB , 0xC ) , rutf8 ( 0xC , 0xD ) ] ) ;
548
+ s. reverse ( ) ;
549
+ assert_eq ! (
550
+ s. as_slice( ) ,
551
+ & [ rutf8( 0xC , 0xD ) , rutf8( 0xB , 0xC ) , rutf8( 0xA , 0xB ) ]
552
+ ) ;
553
+
554
+ let mut s = Four ( [
555
+ rutf8 ( 0xA , 0xB ) ,
556
+ rutf8 ( 0xB , 0xC ) ,
557
+ rutf8 ( 0xC , 0xD ) ,
558
+ rutf8 ( 0xD , 0xE ) ,
559
+ ] ) ;
560
+ s. reverse ( ) ;
561
+ assert_eq ! (
562
+ s. as_slice( ) ,
563
+ & [
564
+ rutf8( 0xD , 0xE ) ,
565
+ rutf8( 0xC , 0xD ) ,
566
+ rutf8( 0xB , 0xC ) ,
567
+ rutf8( 0xA , 0xB )
568
+ ]
569
+ ) ;
570
+ }
571
+
510
572
fn encode_surrogate ( cp : u32 ) -> [ u8 ; 3 ] {
511
573
const TAG_CONT : u8 = 0b1000_0000 ;
512
574
const TAG_THREE_B : u8 = 0b1110_0000 ;
0 commit comments