Skip to content

Commit 79aa68d

Browse files
committed
Issue #19387: explain and test the sre overlap table
1 parent e38b054 commit 79aa68d

File tree

2 files changed

+41
-9
lines changed

2 files changed

+41
-9
lines changed

Lib/sre_compile.py

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,27 @@ def _simple(av):
353353
lo, hi = av[2].getwidth()
354354
return lo == hi == 1 and av[2][0][0] != SUBPATTERN
355355

356+
def _generate_overlap_table(prefix):
357+
"""
358+
Generate an overlap table for the following prefix.
359+
An overlap table is a table of the same size as the prefix which
360+
informs about the potential self-overlap for each index in the prefix:
361+
- if overlap[i] == 0, prefix[i:] can't overlap prefix[0:...]
362+
- if overlap[i] == k with 0 < k <= i, prefix[i-k+1:i+1] overlaps with
363+
prefix[0:k]
364+
"""
365+
table = [0] * len(prefix)
366+
for i in range(1, len(prefix)):
367+
idx = table[i - 1]
368+
while prefix[i] != prefix[idx]:
369+
if idx == 0:
370+
table[i] = 0
371+
break
372+
idx = table[idx - 1]
373+
else:
374+
table[i] = idx + 1
375+
return table
376+
356377
def _compile_info(code, pattern, flags):
357378
# internal: compile an info block. in the current version,
358379
# this contains min/max pattern width, and an optional literal
@@ -449,12 +470,7 @@ def _compile_info(code, pattern, flags):
449470
emit(prefix_skip) # skip
450471
code.extend(prefix)
451472
# generate overlap table
452-
table = [-1] + ([0]*len(prefix))
453-
for i in range(len(prefix)):
454-
table[i+1] = table[i]+1
455-
while table[i+1] > 0 and prefix[i] != prefix[table[i+1]-1]:
456-
table[i+1] = table[table[i+1]-1]+1
457-
code.extend(table[1:]) # don't store first entry
473+
code.extend(_generate_overlap_table(prefix))
458474
elif charset:
459475
_compile_charset(charset, flags, code)
460476
code[skip] = len(code) - skip

Lib/test/test_re.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@
33
import io
44
import re
55
from re import Scanner
6+
import sre_compile
67
import sre_constants
78
import sys
89
import string
910
import traceback
11+
import unittest
1012
from weakref import proxy
1113

1214
# Misc tests from Tim Peters' re.doc
@@ -15,8 +17,6 @@
1517
# what you're doing. Some of these tests were carefully modeled to
1618
# cover most of the code.
1719

18-
import unittest
19-
2020
class S(str):
2121
def __getitem__(self, index):
2222
return S(super().__getitem__(index))
@@ -1140,6 +1140,22 @@ def test_bug_2537(self):
11401140
self.assertEqual(m.group(1), "")
11411141
self.assertEqual(m.group(2), "y")
11421142

1143+
1144+
class ImplementationTest(unittest.TestCase):
1145+
"""
1146+
Test implementation details of the re module.
1147+
"""
1148+
1149+
def test_overlap_table(self):
1150+
f = sre_compile._generate_overlap_table
1151+
self.assertEqual(f(""), [])
1152+
self.assertEqual(f("a"), [0])
1153+
self.assertEqual(f("abcd"), [0, 0, 0, 0])
1154+
self.assertEqual(f("aaaa"), [0, 1, 2, 3])
1155+
self.assertEqual(f("ababba"), [0, 0, 1, 2, 0, 1])
1156+
self.assertEqual(f("abcabdac"), [0, 0, 0, 1, 2, 0, 1, 0])
1157+
1158+
11431159
def run_re_tests():
11441160
from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
11451161
if verbose:
@@ -1269,7 +1285,7 @@ def run_re_tests():
12691285

12701286

12711287
def test_main():
1272-
run_unittest(ReTests)
1288+
run_unittest(__name__)
12731289
run_re_tests()
12741290

12751291
if __name__ == "__main__":

0 commit comments

Comments
 (0)