Skip to content

Commit 955b676

Browse files
[2.7] bpo-30363: Backport warnings in the re module. (#1577)
Running Python with the -3 option now warns about regular expression syntax that is invalid or has different semantic in Python 3 or will change the behavior in future Python versions.
1 parent 09b5247 commit 955b676

File tree

6 files changed

+128
-18
lines changed

6 files changed

+128
-18
lines changed

Lib/_strptime.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -254,8 +254,8 @@ def pattern(self, format):
254254
# format directives (%m, etc.).
255255
regex_chars = re_compile(r"([\\.^$*+?\(\){}\[\]|])")
256256
format = regex_chars.sub(r"\\\1", format)
257-
whitespace_replacement = re_compile('\s+')
258-
format = whitespace_replacement.sub('\s+', format)
257+
whitespace_replacement = re_compile(r'\s+')
258+
format = whitespace_replacement.sub(r'\\s+', format)
259259
while '%' in format:
260260
directive_index = format.index('%')+1
261261
processed_format = "%s%s%s" % (processed_format,

Lib/sre_compile.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -435,7 +435,7 @@ def _compile_info(code, pattern, flags):
435435
# this contains min/max pattern width, and an optional literal
436436
# prefix or a character map
437437
lo, hi = pattern.getwidth()
438-
if lo == 0:
438+
if not lo and hi:
439439
return # not worth it
440440
# look for a literal prefix
441441
prefix = []

Lib/sre_parse.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323

2424
OCTDIGITS = set("01234567")
2525
HEXDIGITS = set("0123456789abcdefABCDEF")
26+
ASCIILETTERS = set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
2627

2728
WHITESPACE = set(" \t\n\r\v\f")
2829

@@ -260,6 +261,15 @@ def _class_escape(source, escape):
260261
elif c in DIGITS:
261262
raise error, "bogus escape: %s" % repr(escape)
262263
if len(escape) == 2:
264+
if sys.py3kwarning and c in ASCIILETTERS:
265+
import warnings
266+
if c in 'Uu':
267+
warnings.warn('bad escape %s; Unicode escapes are '
268+
'supported only since Python 3.3' % escape,
269+
FutureWarning, stacklevel=8)
270+
else:
271+
warnings.warnpy3k('bad escape %s' % escape,
272+
DeprecationWarning, stacklevel=8)
263273
return LITERAL, ord(escape[1])
264274
except ValueError:
265275
pass
@@ -309,6 +319,15 @@ def _escape(source, escape, state):
309319
return GROUPREF, group
310320
raise ValueError
311321
if len(escape) == 2:
322+
if sys.py3kwarning and c in ASCIILETTERS:
323+
import warnings
324+
if c in 'Uu':
325+
warnings.warn('bad escape %s; Unicode escapes are '
326+
'supported only since Python 3.3' % escape,
327+
FutureWarning, stacklevel=8)
328+
else:
329+
warnings.warnpy3k('bad escape %s' % escape,
330+
DeprecationWarning, stacklevel=8)
312331
return LITERAL, ord(escape[1])
313332
except ValueError:
314333
pass
@@ -714,6 +733,12 @@ def parse(str, flags=0, pattern=None):
714733
pattern.str = str
715734

716735
p = _parse_sub(source, pattern, 0)
736+
if (sys.py3kwarning and
737+
(p.pattern.flags & SRE_FLAG_LOCALE) and
738+
(p.pattern.flags & SRE_FLAG_UNICODE)):
739+
import warnings
740+
warnings.warnpy3k("LOCALE and UNICODE flags are incompatible",
741+
DeprecationWarning, stacklevel=5)
717742

718743
tail = source.get()
719744
if tail == ")":
@@ -801,7 +826,10 @@ def literal(literal, p=p, pappend=a):
801826
try:
802827
this = makechar(ESCAPES[this][1])
803828
except KeyError:
804-
pass
829+
if sys.py3kwarning and c in ASCIILETTERS:
830+
import warnings
831+
warnings.warnpy3k('bad escape %s' % this,
832+
DeprecationWarning, stacklevel=4)
805833
literal(this)
806834
else:
807835
literal(this)

Lib/test/test_re.py

Lines changed: 78 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
verbose, run_unittest, import_module,
44
precisionbigmemtest, _2G, cpython_only,
55
captured_stdout, have_unicode, requires_unicode, u,
6-
check_warnings)
6+
check_warnings, check_py3k_warnings)
77
import locale
88
import re
99
from re import Scanner
@@ -66,11 +66,13 @@ def test_basic_re_sub(self):
6666
self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
6767
self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
6868

69-
self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
70-
'\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
71-
self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
72-
self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
73-
(chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
69+
self.assertEqual(re.sub('a', r'\t\n\v\r\f\a\b', 'a'), '\t\n\v\r\f\a\b')
70+
self.assertEqual(re.sub('a', '\t\n\v\r\f\a\b', 'a'), '\t\n\v\r\f\a\b')
71+
self.assertEqual(re.sub('a', '\t\n\v\r\f\a\b', 'a'),
72+
(chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)+chr(8)))
73+
for c in 'cdehijklmopqsuwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':
74+
with check_py3k_warnings():
75+
self.assertEqual(re.sub('a', '\\' + c, 'a'), '\\' + c)
7476

7577
self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
7678

@@ -223,11 +225,11 @@ def test_re_subn(self):
223225

224226
def test_re_split(self):
225227
self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
226-
self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
227-
self.assertEqual(re.split("(:*)", ":a:b::c"),
228+
self.assertEqual(re.split(":+", ":a:b::c"), ['', 'a', 'b', 'c'])
229+
self.assertEqual(re.split("(:+)", ":a:b::c"),
228230
['', ':', 'a', ':', 'b', '::', 'c'])
229-
self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
230-
self.assertEqual(re.split("(:)*", ":a:b::c"),
231+
self.assertEqual(re.split("(?::+)", ":a:b::c"), ['', 'a', 'b', 'c'])
232+
self.assertEqual(re.split("(:)+", ":a:b::c"),
231233
['', ':', 'a', ':', 'b', ':', 'c'])
232234
self.assertEqual(re.split("([b:]+)", ":a:b::c"),
233235
['', ':', 'a', ':b::', 'c'])
@@ -237,13 +239,34 @@ def test_re_split(self):
237239
self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
238240
['', 'a', '', '', 'c'])
239241

242+
for sep, expected in [
243+
(':*', ['', 'a', 'b', 'c']),
244+
('(?::*)', ['', 'a', 'b', 'c']),
245+
('(:*)', ['', ':', 'a', ':', 'b', '::', 'c']),
246+
('(:)*', ['', ':', 'a', ':', 'b', ':', 'c']),
247+
]:
248+
with check_py3k_warnings(('', FutureWarning)):
249+
self.assertEqual(re.split(sep, ':a:b::c'), expected)
250+
251+
for sep, expected in [
252+
('', [':a:b::c']),
253+
(r'\b', [':a:b::c']),
254+
(r'(?=:)', [':a:b::c']),
255+
(r'(?<=:)', [':a:b::c']),
256+
]:
257+
with check_py3k_warnings():
258+
self.assertEqual(re.split(sep, ':a:b::c'), expected)
259+
240260
def test_qualified_re_split(self):
241261
self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
242262
self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
243263
self.assertEqual(re.split("(:)", ":a:b::c", 2),
244264
['', ':', 'a', ':', 'b::c'])
245-
self.assertEqual(re.split("(:*)", ":a:b::c", 2),
265+
self.assertEqual(re.split("(:+)", ":a:b::c", 2),
246266
['', ':', 'a', ':', 'b::c'])
267+
with check_py3k_warnings(('', FutureWarning)):
268+
self.assertEqual(re.split("(:*)", ":a:b::c", maxsplit=2),
269+
['', ':', 'a', ':', 'b::c'])
247270

248271
def test_re_findall(self):
249272
self.assertEqual(re.findall(":+", "abc"), [])
@@ -404,6 +427,29 @@ def test_special_escapes(self):
404427
self.assertEqual(re.search(r"\d\D\w\W\s\S",
405428
"1aa! a", re.UNICODE).group(0), "1aa! a")
406429

430+
def test_other_escapes(self):
431+
self.assertRaises(re.error, re.compile, "\\")
432+
self.assertEqual(re.match(r"\(", '(').group(), '(')
433+
self.assertIsNone(re.match(r"\(", ')'))
434+
self.assertEqual(re.match(r"\\", '\\').group(), '\\')
435+
self.assertEqual(re.match(r"[\]]", ']').group(), ']')
436+
self.assertIsNone(re.match(r"[\]]", '['))
437+
self.assertEqual(re.match(r"[a\-c]", '-').group(), '-')
438+
self.assertIsNone(re.match(r"[a\-c]", 'b'))
439+
self.assertEqual(re.match(r"[\^a]+", 'a^').group(), 'a^')
440+
self.assertIsNone(re.match(r"[\^a]+", 'b'))
441+
re.purge() # for warnings
442+
for c in 'ceghijklmopquyzCEFGHIJKLMNOPQRTUVXY':
443+
warn = FutureWarning if c in 'Uu' else DeprecationWarning
444+
with check_py3k_warnings(('', warn)):
445+
self.assertEqual(re.match('\\%c$' % c, c).group(), c)
446+
self.assertIsNone(re.match('\\%c' % c, 'a'))
447+
for c in 'ceghijklmopquyzABCEFGHIJKLMNOPQRTUVXYZ':
448+
warn = FutureWarning if c in 'Uu' else DeprecationWarning
449+
with check_py3k_warnings(('', warn)):
450+
self.assertEqual(re.match('[\\%c]$' % c, c).group(), c)
451+
self.assertIsNone(re.match('[\\%c]' % c, 'a'))
452+
407453
def test_string_boundaries(self):
408454
# See http://bugs.python.org/issue10713
409455
self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
@@ -931,6 +977,19 @@ def test_inline_flags(self):
931977
self.assertTrue(re.match('(?ixu) ' + upper_char, lower_char))
932978
self.assertTrue(re.match('(?ixu) ' + lower_char, upper_char))
933979

980+
# Incompatibilities
981+
re.purge()
982+
with check_py3k_warnings():
983+
re.compile('', re.LOCALE|re.UNICODE)
984+
with check_py3k_warnings():
985+
re.compile('(?L)', re.UNICODE)
986+
with check_py3k_warnings():
987+
re.compile('(?u)', re.LOCALE)
988+
with check_py3k_warnings():
989+
re.compile('(?Lu)')
990+
with check_py3k_warnings():
991+
re.compile('(?uL)')
992+
934993
def test_dollar_matches_twice(self):
935994
"$ matches the end of string, and just before the terminating \n"
936995
pattern = re.compile('$')
@@ -967,8 +1026,9 @@ def test_compile(self):
9671026
def test_bug_13899(self):
9681027
# Issue #13899: re pattern r"[\A]" should work like "A" but matches
9691028
# nothing. Ditto B and Z.
970-
self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
971-
['A', 'B', '\b', 'C', 'Z'])
1029+
with check_py3k_warnings():
1030+
self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
1031+
['A', 'B', '\b', 'C', 'Z'])
9721032

9731033
@precisionbigmemtest(size=_2G, memuse=1)
9741034
def test_large_search(self, size):
@@ -1261,7 +1321,11 @@ def run_re_tests():
12611321

12621322
def test_main():
12631323
run_unittest(ReTests)
1264-
run_re_tests()
1324+
deprecations = [
1325+
('bad escape', DeprecationWarning),
1326+
]
1327+
with check_py3k_warnings(*deprecations):
1328+
run_re_tests()
12651329

12661330
if __name__ == "__main__":
12671331
test_main()

Misc/NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,10 @@ Extension Modules
4242
Library
4343
-------
4444

45+
- bpo-30363: Running Python with the -3 option now warns about regular
46+
expression syntax that is invalid or has different semantic in Python 3
47+
or will change the behavior in future Python versions.
48+
4549
- bpo-30365: Running Python with the -3 option now emits deprecation warnings
4650
for getchildren() and getiterator() methods of the Element class in the
4751
xml.etree.cElementTree module and when pass the html argument to

Modules/_sre.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2267,6 +2267,20 @@ pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
22672267
if (!string)
22682268
return NULL;
22692269

2270+
if (Py_Py3kWarningFlag &&
2271+
(self->code[0] != SRE_OP_INFO || self->code[3] == 0))
2272+
{
2273+
if (self->code[0] == SRE_OP_INFO && self->code[4] == 0) {
2274+
if (PyErr_WarnPy3k("split() requires a non-empty pattern match.",
2275+
1) < 0)
2276+
return NULL;
2277+
}
2278+
else if (PyErr_WarnEx(PyExc_FutureWarning,
2279+
"split() requires a non-empty pattern match.",
2280+
1) < 0)
2281+
return NULL;
2282+
}
2283+
22702284
string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
22712285
if (!string)
22722286
return NULL;

0 commit comments

Comments
 (0)