Skip to content

Commit 9eabac6

Browse files
Issue #18685: Restore re performance to pre-PEP 393 levels.
1 parent ffb58e9 commit 9eabac6

File tree

4 files changed

+350
-384
lines changed

4 files changed

+350
-384
lines changed

Lib/test/test_re.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ def test_basic_re_sub(self):
7777
self.assertTypedEqual(re.sub(b'y', B(b'a'), B(b'xyz')), b'xaz')
7878
self.assertTypedEqual(re.sub(b'y', bytearray(b'a'), bytearray(b'xyz')), b'xaz')
7979
self.assertTypedEqual(re.sub(b'y', memoryview(b'a'), memoryview(b'xyz')), b'xaz')
80+
for y in ("\xe0", "\u0430", "\U0001d49c"):
81+
self.assertEqual(re.sub(y, 'a', 'x%sz' % y), 'xaz')
8082

8183
self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
8284
self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
@@ -250,6 +252,13 @@ def test_re_split(self):
250252
[b'', b'a', b'b', b'c'])
251253
self.assertTypedEqual(re.split(b"(:*)", string),
252254
[b'', b':', b'a', b':', b'b', b'::', b'c'])
255+
for a, b, c in ("\xe0\xdf\xe7", "\u0430\u0431\u0432",
256+
"\U0001d49c\U0001d49e\U0001d4b5"):
257+
string = ":%s:%s::%s" % (a, b, c)
258+
self.assertEqual(re.split(":", string), ['', a, b, '', c])
259+
self.assertEqual(re.split(":*", string), ['', a, b, c])
260+
self.assertEqual(re.split("(:*)", string),
261+
['', ':', a, ':', b, '::', c])
253262

254263
self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
255264
self.assertEqual(re.split("(:)*", ":a:b::c"),
@@ -287,6 +296,14 @@ def test_re_findall(self):
287296
[b":", b"::", b":::"])
288297
self.assertTypedEqual(re.findall(b"(:)(:*)", string),
289298
[(b":", b""), (b":", b":"), (b":", b"::")])
299+
for x in ("\xe0", "\u0430", "\U0001d49c"):
300+
xx = x * 2
301+
xxx = x * 3
302+
string = "a%sb%sc%sd" % (x, xx, xxx)
303+
self.assertEqual(re.findall("%s+" % x, string), [x, xx, xxx])
304+
self.assertEqual(re.findall("(%s+)" % x, string), [x, xx, xxx])
305+
self.assertEqual(re.findall("(%s)(%s*)" % (x, x), string),
306+
[(x, ""), (x, x), (x, xx)])
290307

291308
def test_bug_117612(self):
292309
self.assertEqual(re.findall(r"(a|(b))", "aba"),
@@ -305,6 +322,12 @@ def test_re_match(self):
305322
self.assertEqual(re.match(b'(a)', string).group(0), b'a')
306323
self.assertEqual(re.match(b'(a)', string).group(1), b'a')
307324
self.assertEqual(re.match(b'(a)', string).group(1, 1), (b'a', b'a'))
325+
for a in ("\xe0", "\u0430", "\U0001d49c"):
326+
self.assertEqual(re.match(a, a).groups(), ())
327+
self.assertEqual(re.match('(%s)' % a, a).groups(), (a,))
328+
self.assertEqual(re.match('(%s)' % a, a).group(0), a)
329+
self.assertEqual(re.match('(%s)' % a, a).group(1), a)
330+
self.assertEqual(re.match('(%s)' % a, a).group(1, 1), (a, a))
308331

309332
pat = re.compile('((a)|(b))(c)?')
310333
self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))

Misc/NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ Core and Builtins
2121
Library
2222
-------
2323

24+
- Issue #18685: Restore re performance to pre-PEP 393 levels.
25+
2426
- Issue #19339: telnetlib module is now using time.monotonic() when available
2527
to compute timeout.
2628

0 commit comments

Comments
 (0)