@@ -1194,6 +1194,115 @@ def check_en_US_utf8(self):
1194
1194
self .assertIsNone (re .match (b'(?Li)\xc5 ' , b'\xe5 ' ))
1195
1195
self .assertIsNone (re .match (b'(?Li)\xe5 ' , b'\xc5 ' ))
1196
1196
1197
+ def test_bug_35859 (self ):
1198
+ # four bugs about capturing groups
1199
+
1200
+ # ============================================
1201
+ # need to reset capturing groups in SRE_SEARCH
1202
+ # ============================================
1203
+ # found in issue34294
1204
+ s = "a\t x"
1205
+ p = r"\b(?=(\t)|(x))x"
1206
+ self .assertEqual (re .search (p , s ).groups (), ('' , 'x' ))
1207
+
1208
+ # =============================
1209
+ # macro MARK_PUSH(lastmark) bug
1210
+ # =============================
1211
+ # reported in issue35859
1212
+ self .assertEqual (re .match (r'(ab|a)*?b' , 'ab' ).groups (), ('' ,))
1213
+ self .assertEqual (re .match (r'(ab|a)+?b' , 'ab' ).groups (), ('' ,))
1214
+ self .assertEqual (re .match (r'(ab|a){0,2}?b' , 'ab' ).groups (), ('' ,))
1215
+ self .assertEqual (re .match (r'(.b|a)*?b' , 'ab' ).groups (), ('' ,))
1216
+
1217
+ # =======================================================
1218
+ # JUMP_MIN_UNTIL_3 should LASTMARK_SAVE() and MARK_PUSH()
1219
+ # =======================================================
1220
+
1221
+ # 1, triggered by SRE_OP_REPEAT_ONE, b? in this pattern
1222
+ self .assertEqual (re .match (r'(ab?)*?b' , 'ab' ).groups (), ('' ,))
1223
+
1224
+ # 2, triggered by SRE_OP_MIN_REPEAT_ONE, a*? in this pattern
1225
+ s = 'axxzaz'
1226
+ p = r'(?:a*?(xx)??z)*'
1227
+ self .assertEqual (re .match (p , s ).groups (), ('' ,))
1228
+
1229
+ # 3, triggered by SRE_OP_MIN_UNTIL (JUMP_MIN_UNTIL_2)
1230
+ # (?:a|bc)*? in this pattern
1231
+ s = 'axxzbcz'
1232
+ p = r'(?:(?:a|bc)*?(xx)??z)*'
1233
+ self .assertEqual (re .match (p , s ).groups (), ('' ,))
1234
+ # test-case provided by issue9134
1235
+ s = 'xtcxyzxc'
1236
+ p = r'((x|yz)+?(t)??c)*'
1237
+ self .assertEqual (re .match (p , s ).groups (), ('xyzxc' , 'x' , '' ))
1238
+
1239
+ # ======================================================
1240
+ # JUMP_ASSERT_NOT should LASTMARK_SAVE() and MARK_PUSH()
1241
+ # ======================================================
1242
+ # reported in issue725149
1243
+ # negative assertion
1244
+ self .assertEqual (re .match (r'(?!(..)c)' , 'ab' ).groups (), ('ab' ,))
1245
+ # negative assertion in a repeat
1246
+ self .assertEqual (re .match (r'(?:(?!(ab)c).)*' , 'ab' ).groups (), ('b' ,))
1247
+ self .assertEqual (re .match (r'((?!(bc)d)(.))*' , 'abc' ).groups (),
1248
+ ('c' , 'c' , 'c' ))
1249
+
1250
+ # =============================================================
1251
+ # below asserts didn't fail before fix, just prevent regression
1252
+ # =============================================================
1253
+
1254
+ # 1, why JUMP_MIN_REPEAT_ONE should LASTMARK_SAVE()
1255
+ # .?? in this pattern
1256
+ m = re .match (r'.??(?=(a)?)b' , 'ab' )
1257
+ self .assertEqual (m .span (), (0 , 2 ))
1258
+ self .assertEqual (m .groups (), (None ,))
1259
+ # put in a repeat
1260
+ m = re .match (r'(?:.??(?=(a)?)b)*' , 'abab' )
1261
+ self .assertEqual (m .span (), (0 , 4 ))
1262
+ self .assertEqual (m .groups (), (None ,))
1263
+
1264
+ # 2, why JUMP_MIN_UNTIL_2 should LASTMARK_SAVE()
1265
+ # (?:..)?? in this pattern
1266
+ m = re .match (r'(?:..)??(?=(aa)?)bb' , 'aabb' )
1267
+ self .assertEqual (m .span (), (0 , 4 ))
1268
+ self .assertEqual (m .groups (), (None ,))
1269
+ # put in a repeat
1270
+ m = re .match (r'(?:(?:..)??(?=(aa)?)bb)*' , 'aabbaabb' )
1271
+ self .assertEqual (m .span (), (0 , 8 ))
1272
+ self .assertEqual (m .groups (), (None ,))
1273
+
1274
+ # 3, why JUMP_REPEAT_ONE_1 should LASTMARK_SAVE()
1275
+ # .* in this pattern, tail starts with a literal.
1276
+ self .assertEqual (re .match (r'.*x(?=(b)?)a' , 'xaxb' ).groups (), (None ,))
1277
+
1278
+ # 4, why JUMP_REPEAT_ONE_2 should LASTMARK_SAVE()
1279
+ # .* in this pattern, tail is general case
1280
+ self .assertEqual (re .match (r'.*(?=(b)?)a' , 'ab' ).groups (), (None ,))
1281
+
1282
+ # 5, demonstrate that JUMP_MAX_UNTIL_3 doesn't need LASTMARK_SAVE()
1283
+ # this pattern is similar to 4
1284
+ self .assertEqual (re .match (r'(.)*(?=(b)?)a' , 'ab' ).groups (),
1285
+ (None , None ))
1286
+ self .assertEqual (re .match (r'(.){0}(?=(b)?)a' , 'ab' ).groups (),
1287
+ (None , None ))
1288
+
1289
+ # 6, positive assertion in a repeat
1290
+ # strictly speaking, this is a bug, the correct result should be
1291
+ # (None,), but it's very hard to fix with the current fundamental
1292
+ # implementation of sre.
1293
+ # PHP 7.3.2, Java 11.0.2, Ruby 2.6.1, and the third-party module
1294
+ # regex 2019.2.21, return ('a',) as well.
1295
+ # Perl 5.26.1, Node.js 10.15.1, return the correct result (None,)
1296
+ # Go 1.12, Rust 1.32.0, don't support lookaround yet.
1297
+ self .assertEqual (re .match (r'(?:(?=(a)?).)*' , 'ab' ).groups (), ('a' ,))
1298
+
1299
+ # 7, negative assertion
1300
+ # PHP 7.3.2, Ruby 2.6.1, Node.js 10.15.1, regex 2019.2.21 return
1301
+ # (None,)
1302
+ # Java 11.0.2, Perl 5.26.1, return ('b',)
1303
+ # Go 1.12, Rust 1.32.0, don't support lookaround yet.
1304
+ self .assertEqual (re .match (r'a*(?!(b))' , 'ab' ).groups (), (None ,))
1305
+
1197
1306
1198
1307
def run_re_tests ():
1199
1308
from test .re_tests import tests , SUCCEED , FAIL , SYNTAX_ERROR
0 commit comments