Skip to content

Commit 24b5ed2

Browse files
[3.5] bpo-30375: Correct the stacklevel of regex compiling warnings. (GH-1595) (#1605)
Warnings emitted when compile a regular expression now always point to the line in the user code. Previously they could point into inners of the re module if emitted from inside of groups or conditionals.. (cherry picked from commit c7ac728)
1 parent 72e5aa1 commit 24b5ed2

File tree

3 files changed

+26
-18
lines changed

3 files changed

+26
-18
lines changed

Lib/sre_parse.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,7 @@ def isname(name):
309309
return False
310310
return True
311311

312-
def _class_escape(source, escape):
312+
def _class_escape(source, escape, nested):
313313
# handle escape code inside character class
314314
code = ESCAPES.get(escape)
315315
if code:
@@ -353,13 +353,13 @@ def _class_escape(source, escape):
353353
if c in ASCIILETTERS:
354354
import warnings
355355
warnings.warn('bad escape %s' % escape,
356-
DeprecationWarning, stacklevel=8)
356+
DeprecationWarning, stacklevel=nested + 6)
357357
return LITERAL, ord(escape[1])
358358
except ValueError:
359359
pass
360360
raise source.error("bad escape %s" % escape, len(escape))
361361

362-
def _escape(source, escape, state):
362+
def _escape(source, escape, state, nested):
363363
# handle escape code in expression
364364
code = CATEGORIES.get(escape)
365365
if code:
@@ -420,21 +420,21 @@ def _escape(source, escape, state):
420420
if c in ASCIILETTERS:
421421
import warnings
422422
warnings.warn('bad escape %s' % escape,
423-
DeprecationWarning, stacklevel=8)
423+
DeprecationWarning, stacklevel=nested + 6)
424424
return LITERAL, ord(escape[1])
425425
except ValueError:
426426
pass
427427
raise source.error("bad escape %s" % escape, len(escape))
428428

429-
def _parse_sub(source, state, nested=True):
429+
def _parse_sub(source, state, nested):
430430
# parse an alternation: a|b|c
431431

432432
items = []
433433
itemsappend = items.append
434434
sourcematch = source.match
435435
start = source.tell()
436436
while True:
437-
itemsappend(_parse(source, state))
437+
itemsappend(_parse(source, state, nested + 1))
438438
if not sourcematch("|"):
439439
break
440440

@@ -476,10 +476,10 @@ def _parse_sub(source, state, nested=True):
476476
subpattern.append((BRANCH, (None, items)))
477477
return subpattern
478478

479-
def _parse_sub_cond(source, state, condgroup):
480-
item_yes = _parse(source, state)
479+
def _parse_sub_cond(source, state, condgroup, nested):
480+
item_yes = _parse(source, state, nested + 1)
481481
if source.match("|"):
482-
item_no = _parse(source, state)
482+
item_no = _parse(source, state, nested + 1)
483483
if source.next == "|":
484484
raise source.error("conditional backref with more than two branches")
485485
else:
@@ -488,7 +488,7 @@ def _parse_sub_cond(source, state, condgroup):
488488
subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))
489489
return subpattern
490490

491-
def _parse(source, state):
491+
def _parse(source, state, nested):
492492
# parse a simple pattern
493493
subpattern = SubPattern(state)
494494

@@ -521,7 +521,7 @@ def _parse(source, state):
521521
continue
522522

523523
if this[0] == "\\":
524-
code = _escape(source, this, state)
524+
code = _escape(source, this, state, nested + 1)
525525
subpatternappend(code)
526526

527527
elif this not in SPECIAL_CHARS:
@@ -546,7 +546,7 @@ def _parse(source, state):
546546
if this == "]" and set != start:
547547
break
548548
elif this[0] == "\\":
549-
code1 = _class_escape(source, this)
549+
code1 = _class_escape(source, this, nested + 1)
550550
else:
551551
code1 = LITERAL, _ord(this)
552552
if sourcematch("-"):
@@ -562,7 +562,7 @@ def _parse(source, state):
562562
setappend((LITERAL, _ord("-")))
563563
break
564564
if that[0] == "\\":
565-
code2 = _class_escape(source, that)
565+
code2 = _class_escape(source, that, nested + 1)
566566
else:
567567
code2 = LITERAL, _ord(that)
568568
if code1[0] != LITERAL or code2[0] != LITERAL:
@@ -713,7 +713,7 @@ def _parse(source, state):
713713
lookbehindgroups = state.lookbehindgroups
714714
if lookbehindgroups is None:
715715
state.lookbehindgroups = state.groups
716-
p = _parse_sub(source, state)
716+
p = _parse_sub(source, state, nested + 1)
717717
if dir < 0:
718718
if lookbehindgroups is None:
719719
state.lookbehindgroups = None
@@ -773,9 +773,9 @@ def _parse(source, state):
773773
except error as err:
774774
raise source.error(err.msg, len(name) + 1) from None
775775
if condgroup:
776-
p = _parse_sub_cond(source, state, condgroup)
776+
p = _parse_sub_cond(source, state, condgroup, nested + 1)
777777
else:
778-
p = _parse_sub(source, state)
778+
p = _parse_sub(source, state, nested + 1)
779779
if not source.match(")"):
780780
raise source.error("missing ), unterminated subpattern",
781781
source.tell() - start)

Lib/test/test_re.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -638,14 +638,18 @@ def test_other_escapes(self):
638638
re.purge() # for warnings
639639
for c in 'ceghijklmopqyzCEFGHIJKLMNOPQRTVXY':
640640
with self.subTest(c):
641-
with self.assertWarns(DeprecationWarning):
641+
with self.assertWarns(DeprecationWarning) as warns:
642642
self.assertEqual(re.fullmatch('\\%c' % c, c).group(), c)
643643
self.assertIsNone(re.match('\\%c' % c, 'a'))
644+
self.assertRegex(str(warns.warnings[0].message), 'bad escape')
645+
self.assertEqual(warns.warnings[0].filename, __file__)
644646
for c in 'ceghijklmopqyzABCEFGHIJKLMNOPQRTVXYZ':
645647
with self.subTest(c):
646-
with self.assertWarns(DeprecationWarning):
648+
with self.assertWarns(DeprecationWarning) as warns:
647649
self.assertEqual(re.fullmatch('[\\%c]' % c, c).group(), c)
648650
self.assertIsNone(re.match('[\\%c]' % c, 'a'))
651+
self.assertRegex(str(warns.warnings[0].message), 'bad escape')
652+
self.assertEqual(warns.warnings[0].filename, __file__)
649653

650654
def test_string_boundaries(self):
651655
# See http://bugs.python.org/issue10713

Misc/NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,10 @@ Extension Modules
4949
Library
5050
-------
5151

52+
- bpo-30375: Warnings emitted when compile a regular expression now always
53+
point to the line in the user code. Previously they could point into inners
54+
of the re module if emitted from inside of groups or conditionals.
55+
5256
- bpo-30048: Fixed ``Task.cancel()`` can be ignored when the task is
5357
running coroutine and the coroutine returned without any more ``await``.
5458

0 commit comments

Comments
 (0)