Skip to content

Commit cf8acc7

Browse files
ethanpailesBurntSushi
authored andcommitted
literal: fix anchor performance problem
The Match literal iterator would repeatedly look for matches in the remainder of the input after it found its first match regardless of whether or not the regex was anchored at the start. This commit adds logic to make sure that we don't keep looking for matches after the first match is returned for a start-anchored literal regex.
1 parent 2f6f88e commit cf8acc7

File tree

1 file changed

+66
-2
lines changed

1 file changed

+66
-2
lines changed

src/exec.rs

Lines changed: 66 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -622,8 +622,13 @@ impl<'c> ExecNoSync<'c> {
622622
}
623623
AnchoredStart => {
624624
let lits = &self.ro.nfa.prefixes;
625-
lits.find_start(&text[start..])
626-
.map(|(s, e)| (start + s, start + e))
625+
if !self.ro.nfa.is_anchored_start
626+
|| (self.ro.nfa.is_anchored_start && start == 0) {
627+
lits.find_start(&text[start..])
628+
.map(|(s, e)| (start + s, start + e))
629+
} else {
630+
None
631+
}
627632
}
628633
AnchoredEnd => {
629634
let lits = &self.ro.suffixes;
@@ -1286,3 +1291,62 @@ impl ProgramCacheInner {
12861291
}
12871292
}
12881293
}
1294+
1295+
#[cfg(test)]
1296+
mod test {
1297+
#[test]
1298+
fn uppercut_s_backtracking_bytes_default_bytes_mismatch() {
1299+
use internal::ExecBuilder;
1300+
1301+
let backtrack_bytes_re = ExecBuilder::new("^S")
1302+
.bounded_backtracking()
1303+
.only_utf8(false)
1304+
.build()
1305+
.map(|exec| exec.into_byte_regex())
1306+
.map_err(|err| format!("{}", err))
1307+
.unwrap();
1308+
1309+
let default_bytes_re = ExecBuilder::new("^S")
1310+
.only_utf8(false)
1311+
.build()
1312+
.map(|exec| exec.into_byte_regex())
1313+
.map_err(|err| format!("{}", err))
1314+
.unwrap();
1315+
1316+
let input = vec![83, 83];
1317+
1318+
let s1 = backtrack_bytes_re.split(&input);
1319+
let s2 = default_bytes_re.split(&input);
1320+
for (chunk1, chunk2) in s1.zip(s2) {
1321+
assert_eq!(chunk1, chunk2);
1322+
}
1323+
}
1324+
1325+
#[test]
1326+
fn unicode_lit_star_backtracking_utf8bytes_default_utf8bytes_mismatch() {
1327+
use internal::ExecBuilder;
1328+
1329+
let backtrack_bytes_re = ExecBuilder::new(r"^(?u:\*)")
1330+
.bounded_backtracking()
1331+
.bytes(true)
1332+
.build()
1333+
.map(|exec| exec.into_regex())
1334+
.map_err(|err| format!("{}", err))
1335+
.unwrap();
1336+
1337+
let default_bytes_re = ExecBuilder::new(r"^(?u:\*)")
1338+
.bytes(true)
1339+
.build()
1340+
.map(|exec| exec.into_regex())
1341+
.map_err(|err| format!("{}", err))
1342+
.unwrap();
1343+
1344+
let input = "**";
1345+
1346+
let s1 = backtrack_bytes_re.split(input);
1347+
let s2 = default_bytes_re.split(input);
1348+
for (chunk1, chunk2) in s1.zip(s2) {
1349+
assert_eq!(chunk1, chunk2);
1350+
}
1351+
}
1352+
}

0 commit comments

Comments
 (0)