Skip to content

Commit e86c077

Browse files
committed
Step 1, test-cases
Show the wrong behaviors before this fix.
1 parent 69b4a17 commit e86c077

File tree

2 files changed

+110
-0
lines changed

2 files changed

+110
-0
lines changed

Lib/test/test_re.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1194,6 +1194,115 @@ def check_en_US_utf8(self):
11941194
self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5'))
11951195
self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5'))
11961196

1197+
def test_bug_35859(self):
1198+
# four bugs about capturing groups
1199+
1200+
# ============================================
1201+
# need to reset capturing groups in SRE_SEARCH
1202+
# ============================================
1203+
# found in issue34294
1204+
s = "a\tx"
1205+
p = r"\b(?=(\t)|(x))x"
1206+
self.assertEqual(re.search(p, s).groups(), ('', 'x'))
1207+
1208+
# =============================
1209+
# macro MARK_PUSH(lastmark) bug
1210+
# =============================
1211+
# reported in issue35859
1212+
self.assertEqual(re.match(r'(ab|a)*?b', 'ab').groups(), ('',))
1213+
self.assertEqual(re.match(r'(ab|a)+?b', 'ab').groups(), ('',))
1214+
self.assertEqual(re.match(r'(ab|a){0,2}?b', 'ab').groups(), ('',))
1215+
self.assertEqual(re.match(r'(.b|a)*?b', 'ab').groups(), ('',))
1216+
1217+
# =======================================================
1218+
# JUMP_MIN_UNTIL_3 should LASTMARK_SAVE() and MARK_PUSH()
1219+
# =======================================================
1220+
1221+
# 1, triggered by SRE_OP_REPEAT_ONE, b? in this pattern
1222+
self.assertEqual(re.match(r'(ab?)*?b', 'ab').groups(), ('',))
1223+
1224+
# 2, triggered by SRE_OP_MIN_REPEAT_ONE, a*? in this pattern
1225+
s = 'axxzaz'
1226+
p = r'(?:a*?(xx)??z)*'
1227+
self.assertEqual(re.match(p, s).groups(), ('',))
1228+
1229+
# 3, triggered by SRE_OP_MIN_UNTIL (JUMP_MIN_UNTIL_2)
1230+
# (?:a|bc)*? in this pattern
1231+
s = 'axxzbcz'
1232+
p = r'(?:(?:a|bc)*?(xx)??z)*'
1233+
self.assertEqual(re.match(p, s).groups(), ('',))
1234+
# test-case provided by issue9134
1235+
s = 'xtcxyzxc'
1236+
p = r'((x|yz)+?(t)??c)*'
1237+
self.assertEqual(re.match(p, s).groups(), ('xyzxc', 'x', ''))
1238+
1239+
# ======================================================
1240+
# JUMP_ASSERT_NOT should LASTMARK_SAVE() and MARK_PUSH()
1241+
# ======================================================
1242+
# reported in issue725149
1243+
# negative assertion
1244+
self.assertEqual(re.match(r'(?!(..)c)', 'ab').groups(), ('ab',))
1245+
# negative assertion in a repeat
1246+
self.assertEqual(re.match(r'(?:(?!(ab)c).)*', 'ab').groups(), ('b',))
1247+
self.assertEqual(re.match(r'((?!(bc)d)(.))*', 'abc').groups(),
1248+
('c', 'c', 'c'))
1249+
1250+
# =============================================================
1251+
# below asserts didn't fail before fix, just prevent regression
1252+
# =============================================================
1253+
1254+
# 1, why JUMP_MIN_REPEAT_ONE should LASTMARK_SAVE()
1255+
# .?? in this pattern
1256+
m = re.match(r'.??(?=(a)?)b', 'ab')
1257+
self.assertEqual(m.span(), (0, 2))
1258+
self.assertEqual(m.groups(), (None,))
1259+
# put in a repeat
1260+
m = re.match(r'(?:.??(?=(a)?)b)*', 'abab')
1261+
self.assertEqual(m.span(), (0, 4))
1262+
self.assertEqual(m.groups(), (None,))
1263+
1264+
# 2, why JUMP_MIN_UNTIL_2 should LASTMARK_SAVE()
1265+
# (?:..)?? in this pattern
1266+
m = re.match(r'(?:..)??(?=(aa)?)bb', 'aabb')
1267+
self.assertEqual(m.span(), (0, 4))
1268+
self.assertEqual(m.groups(), (None,))
1269+
# put in a repeat
1270+
m = re.match(r'(?:(?:..)??(?=(aa)?)bb)*', 'aabbaabb')
1271+
self.assertEqual(m.span(), (0, 8))
1272+
self.assertEqual(m.groups(), (None,))
1273+
1274+
# 3, why JUMP_REPEAT_ONE_1 should LASTMARK_SAVE()
1275+
# .* in this pattern, tail starts with a literal.
1276+
self.assertEqual(re.match(r'.*x(?=(b)?)a', 'xaxb').groups(), (None,))
1277+
1278+
# 4, why JUMP_REPEAT_ONE_2 should LASTMARK_SAVE()
1279+
# .* in this pattern, tail is general case
1280+
self.assertEqual(re.match(r'.*(?=(b)?)a', 'ab').groups(), (None,))
1281+
1282+
# 5, demonstrate that JUMP_MAX_UNTIL_3 doesn't need LASTMARK_SAVE()
1283+
# this pattern is similar to 4
1284+
self.assertEqual(re.match(r'(.)*(?=(b)?)a', 'ab').groups(),
1285+
(None, None))
1286+
self.assertEqual(re.match(r'(.){0}(?=(b)?)a', 'ab').groups(),
1287+
(None, None))
1288+
1289+
# 6, positive assertion in a repeat
1290+
# strictly speaking, this is a bug, the correct result should be
1291+
# (None,), but it's very hard to fix with the current fundamental
1292+
# implementation of sre.
1293+
# PHP 7.3.2, Java 11.0.2, Ruby 2.6.1, and the third-party module
1294+
# regex 2019.2.21, return ('a',) as well.
1295+
# Perl 5.26.1, Node.js 10.15.1, return the correct result (None,)
1296+
# Go 1.12, Rust 1.32.0, don't support lookaround yet.
1297+
self.assertEqual(re.match(r'(?:(?=(a)?).)*', 'ab').groups(), ('a',))
1298+
1299+
# 7, negative assertion
1300+
# PHP 7.3.2, Ruby 2.6.1, Node.js 10.15.1, regex 2019.2.21 return
1301+
# (None,)
1302+
# Java 11.0.2, Perl 5.26.1, return ('b',)
1303+
# Go 1.12, Rust 1.32.0, don't support lookaround yet.
1304+
self.assertEqual(re.match(r'a*(?!(b))', 'ab').groups(), (None,))
1305+
11971306

11981307
def run_re_tests():
11991308
from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
re module, fix four bugs about capturing groups.

0 commit comments

Comments
 (0)