Skip to content

[2.7] bpo-30363: Backport warnings in the re module. #1577

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 18, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Lib/_strptime.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,8 +254,8 @@ def pattern(self, format):
# format directives (%m, etc.).
regex_chars = re_compile(r"([\\.^$*+?\(\){}\[\]|])")
format = regex_chars.sub(r"\\\1", format)
whitespace_replacement = re_compile('\s+')
format = whitespace_replacement.sub('\s+', format)
whitespace_replacement = re_compile(r'\s+')
format = whitespace_replacement.sub(r'\\s+', format)
while '%' in format:
directive_index = format.index('%')+1
processed_format = "%s%s%s" % (processed_format,
Expand Down
2 changes: 1 addition & 1 deletion Lib/sre_compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,7 @@ def _compile_info(code, pattern, flags):
# this contains min/max pattern width, and an optional literal
# prefix or a character map
lo, hi = pattern.getwidth()
if lo == 0:
if not lo and hi:
return # not worth it
# look for a literal prefix
prefix = []
Expand Down
30 changes: 29 additions & 1 deletion Lib/sre_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

OCTDIGITS = set("01234567")
HEXDIGITS = set("0123456789abcdefABCDEF")
ASCIILETTERS = set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")

WHITESPACE = set(" \t\n\r\v\f")

Expand Down Expand Up @@ -260,6 +261,15 @@ def _class_escape(source, escape):
elif c in DIGITS:
raise error, "bogus escape: %s" % repr(escape)
if len(escape) == 2:
if sys.py3kwarning and c in ASCIILETTERS:
import warnings
if c in 'Uu':
warnings.warn('bad escape %s; Unicode escapes are '
'supported only since Python 3.3' % escape,
FutureWarning, stacklevel=8)
else:
warnings.warnpy3k('bad escape %s' % escape,
DeprecationWarning, stacklevel=8)
return LITERAL, ord(escape[1])
except ValueError:
pass
Expand Down Expand Up @@ -309,6 +319,15 @@ def _escape(source, escape, state):
return GROUPREF, group
raise ValueError
if len(escape) == 2:
if sys.py3kwarning and c in ASCIILETTERS:
import warnings
if c in 'Uu':
warnings.warn('bad escape %s; Unicode escapes are '
'supported only since Python 3.3' % escape,
FutureWarning, stacklevel=8)
else:
warnings.warnpy3k('bad escape %s' % escape,
DeprecationWarning, stacklevel=8)
return LITERAL, ord(escape[1])
except ValueError:
pass
Expand Down Expand Up @@ -714,6 +733,12 @@ def parse(str, flags=0, pattern=None):
pattern.str = str

p = _parse_sub(source, pattern, 0)
if (sys.py3kwarning and
(p.pattern.flags & SRE_FLAG_LOCALE) and
(p.pattern.flags & SRE_FLAG_UNICODE)):
import warnings
warnings.warnpy3k("LOCALE and UNICODE flags are incompatible",
DeprecationWarning, stacklevel=5)

tail = source.get()
if tail == ")":
Expand Down Expand Up @@ -801,7 +826,10 @@ def literal(literal, p=p, pappend=a):
try:
this = makechar(ESCAPES[this][1])
except KeyError:
pass
if sys.py3kwarning and c in ASCIILETTERS:
import warnings
warnings.warnpy3k('bad escape %s' % this,
DeprecationWarning, stacklevel=4)
literal(this)
else:
literal(this)
Expand Down
92 changes: 78 additions & 14 deletions Lib/test/test_re.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
verbose, run_unittest, import_module,
precisionbigmemtest, _2G, cpython_only,
captured_stdout, have_unicode, requires_unicode, u,
check_warnings)
check_warnings, check_py3k_warnings)
import locale
import re
from re import Scanner
Expand Down Expand Up @@ -66,11 +66,13 @@ def test_basic_re_sub(self):
self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')

self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
'\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
(chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
self.assertEqual(re.sub('a', r'\t\n\v\r\f\a\b', 'a'), '\t\n\v\r\f\a\b')
self.assertEqual(re.sub('a', '\t\n\v\r\f\a\b', 'a'), '\t\n\v\r\f\a\b')
self.assertEqual(re.sub('a', '\t\n\v\r\f\a\b', 'a'),
(chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)+chr(8)))
for c in 'cdehijklmopqsuwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':
with check_py3k_warnings():
self.assertEqual(re.sub('a', '\\' + c, 'a'), '\\' + c)

self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')

Expand Down Expand Up @@ -223,11 +225,11 @@ def test_re_subn(self):

def test_re_split(self):
self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
self.assertEqual(re.split("(:*)", ":a:b::c"),
self.assertEqual(re.split(":+", ":a:b::c"), ['', 'a', 'b', 'c'])
self.assertEqual(re.split("(:+)", ":a:b::c"),
['', ':', 'a', ':', 'b', '::', 'c'])
self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
self.assertEqual(re.split("(:)*", ":a:b::c"),
self.assertEqual(re.split("(?::+)", ":a:b::c"), ['', 'a', 'b', 'c'])
self.assertEqual(re.split("(:)+", ":a:b::c"),
['', ':', 'a', ':', 'b', ':', 'c'])
self.assertEqual(re.split("([b:]+)", ":a:b::c"),
['', ':', 'a', ':b::', 'c'])
Expand All @@ -237,13 +239,34 @@ def test_re_split(self):
self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
['', 'a', '', '', 'c'])

for sep, expected in [
(':*', ['', 'a', 'b', 'c']),
('(?::*)', ['', 'a', 'b', 'c']),
('(:*)', ['', ':', 'a', ':', 'b', '::', 'c']),
('(:)*', ['', ':', 'a', ':', 'b', ':', 'c']),
]:
with check_py3k_warnings(('', FutureWarning)):
self.assertEqual(re.split(sep, ':a:b::c'), expected)

for sep, expected in [
('', [':a:b::c']),
(r'\b', [':a:b::c']),
(r'(?=:)', [':a:b::c']),
(r'(?<=:)', [':a:b::c']),
]:
with check_py3k_warnings():
self.assertEqual(re.split(sep, ':a:b::c'), expected)

def test_qualified_re_split(self):
self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
self.assertEqual(re.split("(:)", ":a:b::c", 2),
['', ':', 'a', ':', 'b::c'])
self.assertEqual(re.split("(:*)", ":a:b::c", 2),
self.assertEqual(re.split("(:+)", ":a:b::c", 2),
['', ':', 'a', ':', 'b::c'])
with check_py3k_warnings(('', FutureWarning)):
self.assertEqual(re.split("(:*)", ":a:b::c", maxsplit=2),
['', ':', 'a', ':', 'b::c'])

def test_re_findall(self):
self.assertEqual(re.findall(":+", "abc"), [])
Expand Down Expand Up @@ -404,6 +427,29 @@ def test_special_escapes(self):
self.assertEqual(re.search(r"\d\D\w\W\s\S",
"1aa! a", re.UNICODE).group(0), "1aa! a")

def test_other_escapes(self):
self.assertRaises(re.error, re.compile, "\\")
self.assertEqual(re.match(r"\(", '(').group(), '(')
self.assertIsNone(re.match(r"\(", ')'))
self.assertEqual(re.match(r"\\", '\\').group(), '\\')
self.assertEqual(re.match(r"[\]]", ']').group(), ']')
self.assertIsNone(re.match(r"[\]]", '['))
self.assertEqual(re.match(r"[a\-c]", '-').group(), '-')
self.assertIsNone(re.match(r"[a\-c]", 'b'))
self.assertEqual(re.match(r"[\^a]+", 'a^').group(), 'a^')
self.assertIsNone(re.match(r"[\^a]+", 'b'))
re.purge() # for warnings
for c in 'ceghijklmopquyzCEFGHIJKLMNOPQRTUVXY':
warn = FutureWarning if c in 'Uu' else DeprecationWarning
with check_py3k_warnings(('', warn)):
self.assertEqual(re.match('\\%c$' % c, c).group(), c)
self.assertIsNone(re.match('\\%c' % c, 'a'))
for c in 'ceghijklmopquyzABCEFGHIJKLMNOPQRTUVXYZ':
warn = FutureWarning if c in 'Uu' else DeprecationWarning
with check_py3k_warnings(('', warn)):
self.assertEqual(re.match('[\\%c]$' % c, c).group(), c)
self.assertIsNone(re.match('[\\%c]' % c, 'a'))

def test_string_boundaries(self):
# See http://bugs.python.org/issue10713
self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
Expand Down Expand Up @@ -931,6 +977,19 @@ def test_inline_flags(self):
self.assertTrue(re.match('(?ixu) ' + upper_char, lower_char))
self.assertTrue(re.match('(?ixu) ' + lower_char, upper_char))

# Incompatibilities
re.purge()
with check_py3k_warnings():
re.compile('', re.LOCALE|re.UNICODE)
with check_py3k_warnings():
re.compile('(?L)', re.UNICODE)
with check_py3k_warnings():
re.compile('(?u)', re.LOCALE)
with check_py3k_warnings():
re.compile('(?Lu)')
with check_py3k_warnings():
re.compile('(?uL)')

def test_dollar_matches_twice(self):
"$ matches the end of string, and just before the terminating \n"
pattern = re.compile('$')
Expand Down Expand Up @@ -967,8 +1026,9 @@ def test_compile(self):
def test_bug_13899(self):
# Issue #13899: re pattern r"[\A]" should work like "A" but matches
# nothing. Ditto B and Z.
self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
['A', 'B', '\b', 'C', 'Z'])
with check_py3k_warnings():
self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
['A', 'B', '\b', 'C', 'Z'])

@precisionbigmemtest(size=_2G, memuse=1)
def test_large_search(self, size):
Expand Down Expand Up @@ -1261,7 +1321,11 @@ def run_re_tests():

def test_main():
run_unittest(ReTests)
run_re_tests()
deprecations = [
('bad escape', DeprecationWarning),
]
with check_py3k_warnings(*deprecations):
run_re_tests()

if __name__ == "__main__":
test_main()
4 changes: 4 additions & 0 deletions Misc/NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ Extension Modules
Library
-------

- bpo-30363: Running Python with the -3 option now warns about regular
expression syntax that is invalid or has different semantic in Python 3
or will change the behavior in future Python versions.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You might document the change in https://docs.python.org/2/whatsnew/2.7.html#porting-to-python-2-7

"or will change the behavior in future Python versions" is it possible to write code working on Python 2 and 3 that doesn't emit a warning?

Copy link
Member Author

@serhiy-storchaka serhiy-storchaka May 18, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't change the behavior. Just warnings are raised for suspicious regexpes in py3k compatible mode.

It is easy to write code working on Python 2 and 3 that doesn't emit a warning. In case of a bad escape, just remove a redundant backslash if the code is correct. But it is likely that the warning points to a bug (@jwilk have found a number of such bugs in third-party projects). If you use re.split() with a pattern always matching an empty string (e.g. r'\b'), it never worked, this is a bug. If you use re.split() with a pattern that may match an empty string (e.g. r'\s*'), you should change it to a pattern that doesn't match an empty string (r'\s+') for avoiding a warning.


- bpo-30365: Running Python with the -3 option now emits deprecation warnings
for getchildren() and getiterator() methods of the Element class in the
xml.etree.cElementTree module and when pass the html argument to
Expand Down
14 changes: 14 additions & 0 deletions Modules/_sre.c
Original file line number Diff line number Diff line change
Expand Up @@ -2267,6 +2267,20 @@ pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
if (!string)
return NULL;

if (Py_Py3kWarningFlag &&
(self->code[0] != SRE_OP_INFO || self->code[3] == 0))
{
if (self->code[0] == SRE_OP_INFO && self->code[4] == 0) {
if (PyErr_WarnPy3k("split() requires a non-empty pattern match.",
1) < 0)
return NULL;
}
else if (PyErr_WarnEx(PyExc_FutureWarning,
"split() requires a non-empty pattern match.",
1) < 0)
return NULL;
}

string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
if (!string)
return NULL;
Expand Down