@@ -58,7 +58,7 @@ use sparse::SparseSet;
58
58
/// Return true if and only if the given program can be executed by a DFA.
59
59
///
60
60
/// Generally, a DFA is always possible. A pathological case where it is not
61
- /// possible is if the number of NFA states exceeds u32::MAX, in which case,
61
+ /// possible is if the number of NFA states exceeds ` u32::MAX` , in which case,
62
62
/// this function will return false.
63
63
///
64
64
/// This function will also return false if the given program has any Unicode
@@ -104,7 +104,7 @@ pub struct Cache {
104
104
qnext : SparseSet ,
105
105
}
106
106
107
- /// CacheInner is logically just a part of Cache, but groups together fields
107
+ /// ` CacheInner` is logically just a part of Cache, but groups together fields
108
108
/// that aren't passed as function parameters throughout search. (This split
109
109
/// is mostly an artifact of the borrow checker. It is happily paid.)
110
110
#[ derive( Clone , Debug ) ]
@@ -162,8 +162,8 @@ struct CacheInner {
162
162
/// It is laid out in row-major order, with states as rows and byte class
163
163
/// transitions as columns.
164
164
///
165
- /// The transition table is responsible for producing valid StatePtrs. A
166
- /// StatePtr points to the start of a particular row in this table. When
165
+ /// The transition table is responsible for producing valid ` StatePtrs` . A
166
+ /// ` StatePtr` points to the start of a particular row in this table. When
167
167
/// indexing to find the next state this allows us to avoid a multiplication
168
168
/// when computing an index into the table.
169
169
#[ derive( Clone ) ]
@@ -252,7 +252,7 @@ impl<T> Result<T> {
252
252
}
253
253
}
254
254
255
- /// State is a DFA state. It contains an ordered set of NFA states (not
255
+ /// ` State` is a DFA state. It contains an ordered set of NFA states (not
256
256
/// necessarily complete) and a smattering of flags.
257
257
///
258
258
/// The flags are packed into the first byte of data.
@@ -271,7 +271,7 @@ struct State{
271
271
data : Box < [ u8 ] > ,
272
272
}
273
273
274
- /// InstPtr is a 32 bit pointer into a sequence of opcodes (i.e., it indexes
274
+ /// ` InstPtr` is a 32 bit pointer into a sequence of opcodes (i.e., it indexes
275
275
/// an NFA state).
276
276
///
277
277
/// Throughout this library, this is usually set to `usize`, but we force a
@@ -322,7 +322,8 @@ impl State {
322
322
}
323
323
}
324
324
325
- /// StatePtr is a 32 bit pointer to the start of a row in the transition table.
325
+ /// `StatePtr` is a 32 bit pointer to the start of a row in the transition
326
+ /// table.
326
327
///
327
328
/// It has many special values. There are two types of special values:
328
329
/// sentinels and flags.
@@ -345,7 +346,8 @@ impl State {
345
346
///
346
347
/// The other type of state pointer is a state pointer with special flag bits.
347
348
/// There are two flags: a start flag and a match flag. The lower bits of both
348
- /// kinds always contain a "valid" StatePtr (indicated by the STATE_MAX mask).
349
+ /// kinds always contain a "valid" `StatePtr` (indicated by the `STATE_MAX`
350
+ /// mask).
349
351
///
350
352
/// The start flag means that the state is a start state, and therefore may be
351
353
/// subject to special prefix scanning optimizations.
@@ -998,16 +1000,20 @@ impl<'a> Fsm<'a> {
998
1000
}
999
1001
}
1000
1002
}
1001
- let mut cache = true ;
1002
- if b. is_eof ( ) && self . prog . matches . len ( ) > 1 {
1003
- // If we're processing the last byte of the input and we're
1004
- // matching a regex set, then make the next state contain the
1005
- // previous states transitions. We do this so that the main
1006
- // matching loop can extract all of the match instructions.
1007
- mem:: swap ( qcur, qnext) ;
1008
- // And don't cache this state because it's totally bunk.
1009
- cache = false ;
1010
- }
1003
+
1004
+ let cache =
1005
+ if b. is_eof ( ) && self . prog . matches . len ( ) > 1 {
1006
+ // If we're processing the last byte of the input and we're
1007
+ // matching a regex set, then make the next state contain the
1008
+ // previous states transitions. We do this so that the main
1009
+ // matching loop can extract all of the match instructions.
1010
+ mem:: swap ( qcur, qnext) ;
1011
+ // And don't cache this state because it's totally bunk.
1012
+ false
1013
+ } else {
1014
+ true
1015
+ } ;
1016
+
1011
1017
// We've now built up the set of NFA states that ought to comprise the
1012
1018
// next DFA state, so try to find it in the cache, and if it doesn't
1013
1019
// exist, cache it.
@@ -1030,7 +1036,7 @@ impl<'a> Fsm<'a> {
1030
1036
next = self . start_ptr ( next) ;
1031
1037
}
1032
1038
if next <= STATE_MAX && self . state ( next) . flags ( ) . is_match ( ) {
1033
- next = STATE_MATCH | next ;
1039
+ next | = STATE_MATCH ;
1034
1040
}
1035
1041
debug_assert ! ( next != STATE_UNKNOWN ) ;
1036
1042
// And now store our state in the current state's next list.
@@ -1113,9 +1119,9 @@ impl<'a> Fsm<'a> {
1113
1119
NotWordBoundary if flags. not_word_boundary => {
1114
1120
self . cache . stack . push ( inst. goto as InstPtr ) ;
1115
1121
}
1116
- StartLine | EndLine | StartText | EndText => { }
1117
- WordBoundaryAscii | NotWordBoundaryAscii => { }
1118
- WordBoundary | NotWordBoundary => { }
1122
+ StartLine | EndLine | StartText | EndText
1123
+ | WordBoundaryAscii | NotWordBoundaryAscii
1124
+ | WordBoundary | NotWordBoundary => { }
1119
1125
}
1120
1126
}
1121
1127
Save ( ref inst) => self . cache . stack . push ( inst. goto as InstPtr ) ,
@@ -1167,12 +1173,12 @@ impl<'a> Fsm<'a> {
1167
1173
return Some ( si) ;
1168
1174
}
1169
1175
// If the cache has gotten too big, wipe it.
1170
- if self . approximate_size ( ) > self . prog . dfa_size_limit {
1171
- if !self . clear_cache_and_save ( current_state) {
1176
+ if self . approximate_size ( ) > self . prog . dfa_size_limit
1177
+ && !self . clear_cache_and_save ( current_state)
1178
+ {
1172
1179
// Ooops. DFA is giving up.
1173
1180
return None ;
1174
1181
}
1175
- }
1176
1182
// Allocate room for our state and add it.
1177
1183
self . add_state ( key)
1178
1184
}
@@ -1210,8 +1216,7 @@ impl<'a> Fsm<'a> {
1210
1216
let ip = usize_to_u32 ( ip) ;
1211
1217
match self . prog [ ip as usize ] {
1212
1218
Char ( _) | Ranges ( _) => unreachable ! ( ) ,
1213
- Save ( _) => { }
1214
- Split ( _) => { }
1219
+ Save ( _) | Split ( _) => { }
1215
1220
Bytes ( _) => push_inst_ptr ( & mut insts, & mut prev, ip) ,
1216
1221
EmptyLook ( _) => {
1217
1222
state_flags. set_empty ( ) ;
@@ -1301,7 +1306,7 @@ impl<'a> Fsm<'a> {
1301
1306
self . cache . trans . clear ( ) ;
1302
1307
self . cache . states . clear ( ) ;
1303
1308
self . cache . compiled . clear ( ) ;
1304
- for s in self . cache . start_states . iter_mut ( ) {
1309
+ for s in & mut self . cache . start_states {
1305
1310
* s = STATE_UNKNOWN ;
1306
1311
}
1307
1312
// The unwraps are OK because we just cleared the cache and therefore
@@ -1411,9 +1416,9 @@ impl<'a> Fsm<'a> {
1411
1416
let mut empty_flags = EmptyFlags :: default ( ) ;
1412
1417
let mut state_flags = StateFlags :: default ( ) ;
1413
1418
empty_flags. start = at == 0 ;
1414
- empty_flags. end = text. len ( ) == 0 ;
1419
+ empty_flags. end = text. is_empty ( ) ;
1415
1420
empty_flags. start_line = at == 0 || text[ at - 1 ] == b'\n' ;
1416
- empty_flags. end_line = text. len ( ) == 0 ;
1421
+ empty_flags. end_line = text. is_empty ( ) ;
1417
1422
1418
1423
let is_word_last = at > 0 && Byte :: byte ( text[ at - 1 ] ) . is_ascii_word ( ) ;
1419
1424
let is_word = at < text. len ( ) && Byte :: byte ( text[ at] ) . is_ascii_word ( ) ;
@@ -1440,9 +1445,9 @@ impl<'a> Fsm<'a> {
1440
1445
let mut empty_flags = EmptyFlags :: default ( ) ;
1441
1446
let mut state_flags = StateFlags :: default ( ) ;
1442
1447
empty_flags. start = at == text. len ( ) ;
1443
- empty_flags. end = text. len ( ) == 0 ;
1448
+ empty_flags. end = text. is_empty ( ) ;
1444
1449
empty_flags. start_line = at == text. len ( ) || text[ at] == b'\n' ;
1445
- empty_flags. end_line = text. len ( ) == 0 ;
1450
+ empty_flags. end_line = text. is_empty ( ) ;
1446
1451
1447
1452
let is_word_last =
1448
1453
at < text. len ( ) && Byte :: byte ( text[ at] ) . is_ascii_word ( ) ;
0 commit comments