Skip to content

Commit 73fb45d

Browse files
[3.6] bpo-30375: Correct the stacklevel of regex compiling warnings. (GH-1595) (#1604)
Warnings emitted when compile a regular expression now always point to the line in the user code. Previously they could point into inners of the re module if emitted from inside of groups or conditionals.. (cherry picked from commit c7ac728)
1 parent 75b8a54 commit 73fb45d

File tree

3 files changed

+31
-15
lines changed

3 files changed

+31
-15
lines changed

Lib/sre_parse.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -404,15 +404,16 @@ def _escape(source, escape, state):
404404
pass
405405
raise source.error("bad escape %s" % escape, len(escape))
406406

407-
def _parse_sub(source, state, verbose, nested=True):
407+
def _parse_sub(source, state, verbose, nested):
408408
# parse an alternation: a|b|c
409409

410410
items = []
411411
itemsappend = items.append
412412
sourcematch = source.match
413413
start = source.tell()
414414
while True:
415-
itemsappend(_parse(source, state, verbose, not nested and not items))
415+
itemsappend(_parse(source, state, verbose, nested + 1,
416+
not nested and not items))
416417
if not sourcematch("|"):
417418
break
418419

@@ -454,10 +455,10 @@ def _parse_sub(source, state, verbose, nested=True):
454455
subpattern.append((BRANCH, (None, items)))
455456
return subpattern
456457

457-
def _parse_sub_cond(source, state, condgroup, verbose):
458-
item_yes = _parse(source, state, verbose)
458+
def _parse_sub_cond(source, state, condgroup, verbose, nested):
459+
item_yes = _parse(source, state, verbose, nested + 1)
459460
if source.match("|"):
460-
item_no = _parse(source, state, verbose)
461+
item_no = _parse(source, state, verbose, nested + 1)
461462
if source.next == "|":
462463
raise source.error("conditional backref with more than two branches")
463464
else:
@@ -466,7 +467,7 @@ def _parse_sub_cond(source, state, condgroup, verbose):
466467
subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))
467468
return subpattern
468469

469-
def _parse(source, state, verbose, first=False):
470+
def _parse(source, state, verbose, nested, first=False):
470471
# parse a simple pattern
471472
subpattern = SubPattern(state)
472473

@@ -692,7 +693,7 @@ def _parse(source, state, verbose, first=False):
692693
lookbehindgroups = state.lookbehindgroups
693694
if lookbehindgroups is None:
694695
state.lookbehindgroups = state.groups
695-
p = _parse_sub(source, state, verbose)
696+
p = _parse_sub(source, state, verbose, nested + 1)
696697
if dir < 0:
697698
if lookbehindgroups is None:
698699
state.lookbehindgroups = None
@@ -739,7 +740,7 @@ def _parse(source, state, verbose, first=False):
739740
source.string[:20], # truncate long regexes
740741
' (truncated)' if len(source.string) > 20 else '',
741742
),
742-
DeprecationWarning, stacklevel=7
743+
DeprecationWarning, stacklevel=nested + 6
743744
)
744745
if (state.flags & SRE_FLAG_VERBOSE) and not verbose:
745746
raise Verbose
@@ -757,11 +758,11 @@ def _parse(source, state, verbose, first=False):
757758
except error as err:
758759
raise source.error(err.msg, len(name) + 1) from None
759760
if condgroup:
760-
p = _parse_sub_cond(source, state, condgroup, verbose)
761+
p = _parse_sub_cond(source, state, condgroup, verbose, nested + 1)
761762
else:
762763
sub_verbose = ((verbose or (add_flags & SRE_FLAG_VERBOSE)) and
763764
not (del_flags & SRE_FLAG_VERBOSE))
764-
p = _parse_sub(source, state, sub_verbose)
765+
p = _parse_sub(source, state, sub_verbose, nested + 1)
765766
if not source.match(")"):
766767
raise source.error("missing ), unterminated subpattern",
767768
source.tell() - start)
@@ -851,15 +852,15 @@ def parse(str, flags=0, pattern=None):
851852
pattern.str = str
852853

853854
try:
854-
p = _parse_sub(source, pattern, flags & SRE_FLAG_VERBOSE, False)
855+
p = _parse_sub(source, pattern, flags & SRE_FLAG_VERBOSE, 0)
855856
except Verbose:
856857
# the VERBOSE flag was switched on inside the pattern. to be
857858
# on the safe side, we'll parse the whole thing again...
858859
pattern = Pattern()
859860
pattern.flags = flags | SRE_FLAG_VERBOSE
860861
pattern.str = str
861862
source.seek(0)
862-
p = _parse_sub(source, pattern, True, False)
863+
p = _parse_sub(source, pattern, True, 0)
863864

864865
p.pattern.flags = fix_flags(str, p.pattern.flags)
865866

Lib/test/test_re.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1348,6 +1348,7 @@ def test_inline_flags(self):
13481348
str(warns.warnings[0].message),
13491349
'Flags not at the start of the expression %s' % p
13501350
)
1351+
self.assertEqual(warns.warnings[0].filename, __file__)
13511352

13521353
p = upper_char + '(?i)%s' % ('.?' * 100)
13531354
with self.assertWarns(DeprecationWarning) as warns:
@@ -1356,6 +1357,7 @@ def test_inline_flags(self):
13561357
str(warns.warnings[0].message),
13571358
'Flags not at the start of the expression %s (truncated)' % p[:20]
13581359
)
1360+
self.assertEqual(warns.warnings[0].filename, __file__)
13591361

13601362
with self.assertWarns(DeprecationWarning):
13611363
self.assertTrue(re.match('(?s).(?i)' + upper_char, '\n' + lower_char))
@@ -1367,14 +1369,23 @@ def test_inline_flags(self):
13671369
self.assertTrue(re.match('^(?i)' + upper_char, lower_char))
13681370
with self.assertWarns(DeprecationWarning):
13691371
self.assertTrue(re.match('$|(?i)' + upper_char, lower_char))
1370-
with self.assertWarns(DeprecationWarning):
1372+
with self.assertWarns(DeprecationWarning) as warns:
13711373
self.assertTrue(re.match('(?:(?i)' + upper_char + ')', lower_char))
1372-
with self.assertWarns(DeprecationWarning):
1374+
self.assertRegex(str(warns.warnings[0].message),
1375+
'Flags not at the start')
1376+
self.assertEqual(warns.warnings[0].filename, __file__)
1377+
with self.assertWarns(DeprecationWarning) as warns:
13731378
self.assertTrue(re.fullmatch('(^)?(?(1)(?i)' + upper_char + ')',
13741379
lower_char))
1375-
with self.assertWarns(DeprecationWarning):
1380+
self.assertRegex(str(warns.warnings[0].message),
1381+
'Flags not at the start')
1382+
self.assertEqual(warns.warnings[0].filename, __file__)
1383+
with self.assertWarns(DeprecationWarning) as warns:
13761384
self.assertTrue(re.fullmatch('($)?(?(1)|(?i)' + upper_char + ')',
13771385
lower_char))
1386+
self.assertRegex(str(warns.warnings[0].message),
1387+
'Flags not at the start')
1388+
self.assertEqual(warns.warnings[0].filename, __file__)
13781389

13791390

13801391
def test_dollar_matches_twice(self):

Misc/NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@ Core and Builtins
3636
Library
3737
-------
3838

39+
- bpo-30375: Warnings emitted when compile a regular expression now always
40+
point to the line in the user code. Previously they could point into inners
41+
of the re module if emitted from inside of groups or conditionals.
42+
3943
- bpo-30048: Fixed ``Task.cancel()`` can be ignored when the task is
4044
running coroutine and the coroutine returned without any more ``await``.
4145

0 commit comments

Comments
 (0)