Skip to content

bpo-30375: Correct the stacklevel of regex compiling warnings. #1595

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 11 additions & 10 deletions Lib/sre_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,15 +414,16 @@ def _uniq(items):
newitems.append(item)
return newitems

def _parse_sub(source, state, verbose, nested=True):
def _parse_sub(source, state, verbose, nested):
# parse an alternation: a|b|c

items = []
itemsappend = items.append
sourcematch = source.match
start = source.tell()
while True:
itemsappend(_parse(source, state, verbose, not nested and not items))
itemsappend(_parse(source, state, verbose, nested + 1,
not nested and not items))
if not sourcematch("|"):
break

Expand Down Expand Up @@ -471,7 +472,7 @@ def _parse_sub(source, state, verbose, nested=True):
subpattern.append((BRANCH, (None, items)))
return subpattern

def _parse(source, state, verbose, first=False):
def _parse(source, state, verbose, nested, first=False):
# parse a simple pattern
subpattern = SubPattern(state)

Expand Down Expand Up @@ -708,7 +709,7 @@ def _parse(source, state, verbose, first=False):
lookbehindgroups = state.lookbehindgroups
if lookbehindgroups is None:
state.lookbehindgroups = state.groups
p = _parse_sub(source, state, verbose)
p = _parse_sub(source, state, verbose, nested + 1)
if dir < 0:
if lookbehindgroups is None:
state.lookbehindgroups = None
Expand Down Expand Up @@ -744,9 +745,9 @@ def _parse(source, state, verbose, first=False):
msg = "invalid group reference %d" % condgroup
raise source.error(msg, len(condname) + 1)
state.checklookbehindgroup(condgroup, source)
item_yes = _parse(source, state, verbose)
item_yes = _parse(source, state, verbose, nested + 1)
if source.match("|"):
item_no = _parse(source, state, verbose)
item_no = _parse(source, state, verbose, nested + 1)
if source.next == "|":
raise source.error("conditional backref with more than two branches")
else:
Expand All @@ -768,7 +769,7 @@ def _parse(source, state, verbose, first=False):
source.string[:20], # truncate long regexes
' (truncated)' if len(source.string) > 20 else '',
),
DeprecationWarning, stacklevel=7
DeprecationWarning, stacklevel=nested + 6
)
if (state.flags & SRE_FLAG_VERBOSE) and not verbose:
raise Verbose
Expand All @@ -788,7 +789,7 @@ def _parse(source, state, verbose, first=False):
raise source.error(err.msg, len(name) + 1) from None
sub_verbose = ((verbose or (add_flags & SRE_FLAG_VERBOSE)) and
not (del_flags & SRE_FLAG_VERBOSE))
p = _parse_sub(source, state, sub_verbose)
p = _parse_sub(source, state, sub_verbose, nested + 1)
if not source.match(")"):
raise source.error("missing ), unterminated subpattern",
source.tell() - start)
Expand Down Expand Up @@ -886,15 +887,15 @@ def parse(str, flags=0, pattern=None):
pattern.str = str

try:
p = _parse_sub(source, pattern, flags & SRE_FLAG_VERBOSE, False)
p = _parse_sub(source, pattern, flags & SRE_FLAG_VERBOSE, 0)
except Verbose:
# the VERBOSE flag was switched on inside the pattern. to be
# on the safe side, we'll parse the whole thing again...
pattern = Pattern()
pattern.flags = flags | SRE_FLAG_VERBOSE
pattern.str = str
source.seek(0)
p = _parse_sub(source, pattern, True, False)
p = _parse_sub(source, pattern, True, 0)

p.pattern.flags = fix_flags(str, p.pattern.flags)

Expand Down
17 changes: 14 additions & 3 deletions Lib/test/test_re.py
Original file line number Diff line number Diff line change
Expand Up @@ -1370,6 +1370,7 @@ def test_inline_flags(self):
str(warns.warnings[0].message),
'Flags not at the start of the expression %s' % p
)
self.assertEqual(warns.warnings[0].filename, __file__)

p = upper_char + '(?i)%s' % ('.?' * 100)
with self.assertWarns(DeprecationWarning) as warns:
Expand All @@ -1378,6 +1379,7 @@ def test_inline_flags(self):
str(warns.warnings[0].message),
'Flags not at the start of the expression %s (truncated)' % p[:20]
)
self.assertEqual(warns.warnings[0].filename, __file__)

with self.assertWarns(DeprecationWarning):
self.assertTrue(re.match('(?s).(?i)' + upper_char, '\n' + lower_char))
Expand All @@ -1389,14 +1391,23 @@ def test_inline_flags(self):
self.assertTrue(re.match('^(?i)' + upper_char, lower_char))
with self.assertWarns(DeprecationWarning):
self.assertTrue(re.match('$|(?i)' + upper_char, lower_char))
with self.assertWarns(DeprecationWarning):
with self.assertWarns(DeprecationWarning) as warns:
self.assertTrue(re.match('(?:(?i)' + upper_char + ')', lower_char))
with self.assertWarns(DeprecationWarning):
self.assertRegex(str(warns.warnings[0].message),
'Flags not at the start')
self.assertEqual(warns.warnings[0].filename, __file__)
with self.assertWarns(DeprecationWarning) as warns:
self.assertTrue(re.fullmatch('(^)?(?(1)(?i)' + upper_char + ')',
lower_char))
with self.assertWarns(DeprecationWarning):
self.assertRegex(str(warns.warnings[0].message),
'Flags not at the start')
self.assertEqual(warns.warnings[0].filename, __file__)
with self.assertWarns(DeprecationWarning) as warns:
self.assertTrue(re.fullmatch('($)?(?(1)|(?i)' + upper_char + ')',
lower_char))
self.assertRegex(str(warns.warnings[0].message),
'Flags not at the start')
self.assertEqual(warns.warnings[0].filename, __file__)


def test_dollar_matches_twice(self):
Expand Down
4 changes: 4 additions & 0 deletions Misc/NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,10 @@ Extension Modules
Library
-------

- bpo-30375: Warnings emitted when compile a regular expression now always
point to the line in the user code. Previously they could point into inners
of the re module if emitted from inside of groups or conditionals.

- bpo-30329: imaplib and poplib now catch the Windows socket WSAEINVAL error
(code 10022) on shutdown(SHUT_RDWR): An invalid operation was attempted.
This error occurs sometimes on SSL connections.
Expand Down