Skip to content

Commit bd48d27

Browse files
Issue #22493: Inline flags now should be used only at the start of the
regular expression. Deprecation warning is emitted if uses them in the middle of the regular expression.
1 parent 352601c commit bd48d27

File tree

12 files changed

+58
-34
lines changed

12 files changed

+58
-34
lines changed

Doc/library/re.rst

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -224,12 +224,8 @@ The special characters are:
224224
flags are described in :ref:`contents-of-module-re`.) This
225225
is useful if you wish to include the flags as part of the regular
226226
expression, instead of passing a *flag* argument to the
227-
:func:`re.compile` function.
228-
229-
Note that the ``(?x)`` flag changes how the expression is parsed. It should be
230-
used first in the expression string, or after one or more whitespace characters.
231-
If there are non-whitespace characters before the flag, the results are
232-
undefined.
227+
:func:`re.compile` function. Flags should be used first in the
228+
expression string.
233229

234230
``(?:...)``
235231
A non-capturing version of regular parentheses. Matches whatever regular

Doc/whatsnew/3.6.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1124,6 +1124,15 @@ Deprecated features
11241124
that will not be for several Python releases. (Contributed by Emanuel Barry
11251125
in :issue:`27364`.)
11261126

1127+
* Inline flags ``(?letters)`` now should be used only at the start of the
1128+
regular expression. Inline flags in the middle of the regular expression
1129+
affects global flags in Python :mod:`re` module. This is an exception to
1130+
other regular expression engines that either apply flags to only part of
1131+
the regular expression or treat them as an error. To avoid distinguishing
1132+
inline flags in the middle of the regular expression now emit a deprecation
1133+
warning. It will be an error in future Python releases.
1134+
(Contributed by Serhiy Storchaka in :issue:`22493`.)
1135+
11271136

11281137
Deprecated Python behavior
11291138
--------------------------

Lib/distutils/filelist.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -302,21 +302,26 @@ def translate_pattern(pattern, anchor=1, prefix=None, is_regex=0):
302302
else:
303303
return pattern
304304

305+
# ditch start and end characters
306+
start, _, end = glob_to_re('_').partition('_')
307+
305308
if pattern:
306309
pattern_re = glob_to_re(pattern)
310+
assert pattern_re.startswith(start) and pattern_re.endswith(end)
307311
else:
308312
pattern_re = ''
309313

310314
if prefix is not None:
311-
# ditch end of pattern character
312-
empty_pattern = glob_to_re('')
313-
prefix_re = glob_to_re(prefix)[:-len(empty_pattern)]
315+
prefix_re = glob_to_re(prefix)
316+
assert prefix_re.startswith(start) and prefix_re.endswith(end)
317+
prefix_re = prefix_re[len(start): len(prefix_re) - len(end)]
314318
sep = os.sep
315319
if os.sep == '\\':
316320
sep = r'\\'
317-
pattern_re = "^" + sep.join((prefix_re, ".*" + pattern_re))
321+
pattern_re = pattern_re[len(start): len(pattern_re) - len(end)]
322+
pattern_re = r'%s\A%s%s.*%s%s' % (start, prefix_re, sep, pattern_re, end)
318323
else: # no prefix -- respect anchor flag
319324
if anchor:
320-
pattern_re = "^" + pattern_re
325+
pattern_re = r'%s\A%s' % (start, pattern_re[len(start):])
321326

322327
return re.compile(pattern_re)

Lib/distutils/tests/test_filelist.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -51,14 +51,14 @@ def test_glob_to_re(self):
5151

5252
for glob, regex in (
5353
# simple cases
54-
('foo*', r'foo[^%(sep)s]*\Z(?ms)'),
55-
('foo?', r'foo[^%(sep)s]\Z(?ms)'),
56-
('foo??', r'foo[^%(sep)s][^%(sep)s]\Z(?ms)'),
54+
('foo*', r'(?s:foo[^%(sep)s]*)\Z'),
55+
('foo?', r'(?s:foo[^%(sep)s])\Z'),
56+
('foo??', r'(?s:foo[^%(sep)s][^%(sep)s])\Z'),
5757
# special cases
58-
(r'foo\\*', r'foo\\\\[^%(sep)s]*\Z(?ms)'),
59-
(r'foo\\\*', r'foo\\\\\\[^%(sep)s]*\Z(?ms)'),
60-
('foo????', r'foo[^%(sep)s][^%(sep)s][^%(sep)s][^%(sep)s]\Z(?ms)'),
61-
(r'foo\\??', r'foo\\\\[^%(sep)s][^%(sep)s]\Z(?ms)')):
58+
(r'foo\\*', r'(?s:foo\\\\[^%(sep)s]*)\Z'),
59+
(r'foo\\\*', r'(?s:foo\\\\\\[^%(sep)s]*)\Z'),
60+
('foo????', r'(?s:foo[^%(sep)s][^%(sep)s][^%(sep)s][^%(sep)s])\Z'),
61+
(r'foo\\??', r'(?s:foo\\\\[^%(sep)s][^%(sep)s])\Z')):
6262
regex = regex % {'sep': sep}
6363
self.assertEqual(glob_to_re(glob), regex)
6464

Lib/fnmatch.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,4 +106,4 @@ def translate(pat):
106106
res = '%s[%s]' % (res, stuff)
107107
else:
108108
res = res + re.escape(c)
109-
return res + r'\Z(?ms)'
109+
return r'(?s:%s)\Z' % res

Lib/http/cookies.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -458,7 +458,6 @@ def OutputString(self, attrs=None):
458458
_LegalKeyChars = r"\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\="
459459
_LegalValueChars = _LegalKeyChars + r'\[\]'
460460
_CookiePattern = re.compile(r"""
461-
(?x) # This is a verbose pattern
462461
\s* # Optional whitespace at start of cookie
463462
(?P<key> # Start of group 'key'
464463
[""" + _LegalKeyChars + r"""]+? # Any word of at least one letter
@@ -475,7 +474,7 @@ def OutputString(self, attrs=None):
475474
)? # End of optional value group
476475
\s* # Any number of spaces.
477476
(\s+|;|$) # Ending either at space, semicolon, or EOS.
478-
""", re.ASCII) # May be removed if safe.
477+
""", re.ASCII | re.VERBOSE) # re.ASCII may be removed if safe.
479478

480479

481480
# At long last, here is the cookie class. Using this class is almost just like

Lib/sre_parse.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,9 @@ def getuntil(self, terminator):
279279
break
280280
result += c
281281
return result
282+
@property
283+
def pos(self):
284+
return self.index - len(self.next or '')
282285
def tell(self):
283286
return self.index - len(self.next or '')
284287
def seek(self, index):
@@ -727,8 +730,13 @@ def _parse(source, state, verbose):
727730
state.checklookbehindgroup(condgroup, source)
728731
elif char in FLAGS or char == "-":
729732
# flags
733+
pos = source.pos
730734
flags = _parse_flags(source, state, char)
731735
if flags is None: # global flags
736+
if pos != 3: # "(?x"
737+
import warnings
738+
warnings.warn('Flags not at the start of the expression',
739+
DeprecationWarning, stacklevel=7)
732740
continue
733741
add_flags, del_flags = flags
734742
group = None

Lib/test/re_tests.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -106,8 +106,8 @@
106106
('a.*b', 'acc\nccb', FAIL),
107107
('a.{4,5}b', 'acc\nccb', FAIL),
108108
('a.b', 'a\rb', SUCCEED, 'found', 'a\rb'),
109-
('a.b(?s)', 'a\nb', SUCCEED, 'found', 'a\nb'),
110-
('a.*(?s)b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
109+
('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
110+
('(?s)a.*b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
111111
('(?s)a.{4,5}b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
112112
('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
113113

@@ -563,7 +563,7 @@
563563
# Check odd placement of embedded pattern modifiers
564564

565565
# not an error under PCRE/PRE:
566-
('w(?i)', 'W', SUCCEED, 'found', 'W'),
566+
('(?i)w', 'W', SUCCEED, 'found', 'W'),
567567
# ('w(?i)', 'W', SYNTAX_ERROR),
568568

569569
# Comments using the x embedded pattern modifier
@@ -627,7 +627,7 @@
627627
# bug 114033: nothing to repeat
628628
(r'(x?)?', 'x', SUCCEED, 'found', 'x'),
629629
# bug 115040: rescan if flags are modified inside pattern
630-
(r' (?x)foo ', 'foo', SUCCEED, 'found', 'foo'),
630+
(r'(?x) foo ', 'foo', SUCCEED, 'found', 'foo'),
631631
# bug 115618: negative lookahead
632632
(r'(?<!abc)(d.f)', 'abcdefdof', SUCCEED, 'found', 'dof'),
633633
# bug 116251: character class bug

Lib/test/test_fnmatch.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -62,14 +62,14 @@ def test_bytes(self):
6262
class TranslateTestCase(unittest.TestCase):
6363

6464
def test_translate(self):
65-
self.assertEqual(translate('*'), r'.*\Z(?ms)')
66-
self.assertEqual(translate('?'), r'.\Z(?ms)')
67-
self.assertEqual(translate('a?b*'), r'a.b.*\Z(?ms)')
68-
self.assertEqual(translate('[abc]'), r'[abc]\Z(?ms)')
69-
self.assertEqual(translate('[]]'), r'[]]\Z(?ms)')
70-
self.assertEqual(translate('[!x]'), r'[^x]\Z(?ms)')
71-
self.assertEqual(translate('[^x]'), r'[\^x]\Z(?ms)')
72-
self.assertEqual(translate('[x'), r'\[x\Z(?ms)')
65+
self.assertEqual(translate('*'), r'(?s:.*)\Z')
66+
self.assertEqual(translate('?'), r'(?s:.)\Z')
67+
self.assertEqual(translate('a?b*'), r'(?s:a.b.*)\Z')
68+
self.assertEqual(translate('[abc]'), r'(?s:[abc])\Z')
69+
self.assertEqual(translate('[]]'), r'(?s:[]])\Z')
70+
self.assertEqual(translate('[!x]'), r'(?s:[^x])\Z')
71+
self.assertEqual(translate('[^x]'), r'(?s:[\^x])\Z')
72+
self.assertEqual(translate('[x'), r'(?s:\[x)\Z')
7373

7474

7575
class FilterTestCase(unittest.TestCase):

Lib/test/test_pyclbr.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ def test_others(self):
158158
cm('cgi', ignore=('log',)) # set with = in module
159159
cm('pickle', ignore=('partial',))
160160
cm('aifc', ignore=('openfp', '_aifc_params')) # set with = in module
161-
cm('sre_parse', ignore=('dump', 'groups')) # from sre_constants import *; property
161+
cm('sre_parse', ignore=('dump', 'groups', 'pos')) # from sre_constants import *; property
162162
cm('pdb')
163163
cm('pydoc')
164164

Lib/test/test_re.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1279,6 +1279,9 @@ def test_inline_flags(self):
12791279
self.assertTrue(re.match('(?ixu) ' + upper_char, lower_char))
12801280
self.assertTrue(re.match('(?ixu) ' + lower_char, upper_char))
12811281

1282+
with self.assertWarns(DeprecationWarning):
1283+
self.assertTrue(re.match(upper_char + '(?i)', lower_char))
1284+
12821285
def test_dollar_matches_twice(self):
12831286
"$ matches the end of string, and just before the terminating \n"
12841287
pattern = re.compile('$')

Misc/NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,10 @@ Core and Builtins
143143
Library
144144
-------
145145

146+
- Issue #22493: Inline flags now should be used only at the start of the
147+
regular expression. Deprecation warning is emitted if uses them in the
148+
middle of the regular expression.
149+
146150
- Issue #26885: xmlrpc now supports unmarshalling additional data types used
147151
by Apache XML-RPC implementation for numerics and None.
148152

0 commit comments

Comments
 (0)