Skip to content

Commit 5c3201e

Browse files
authored
bpo-47080: Use atomic groups to simplify fnmatch (GH-32029)
Use re's new atomic groups to greatly simplify the construction of worst-case linear-time patterns.
1 parent 345b390 commit 5c3201e

File tree

2 files changed

+7
-29
lines changed

2 files changed

+7
-29
lines changed

Lib/fnmatch.py

Lines changed: 5 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,6 @@
1616

1717
__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"]
1818

19-
# Build a thread-safe incrementing counter to help create unique regexp group
20-
# names across calls.
21-
from itertools import count
22-
_nextgroupnum = count().__next__
23-
del count
24-
2519
def fnmatch(name, pat):
2620
"""Test whether FILENAME matches PATTERN.
2721
@@ -149,17 +143,10 @@ def translate(pat):
149143
# Now deal with STAR fixed STAR fixed ...
150144
# For an interior `STAR fixed` pairing, we want to do a minimal
151145
# .*? match followed by `fixed`, with no possibility of backtracking.
152-
# We can't spell that directly, but can trick it into working by matching
153-
# .*?fixed
154-
# in a lookahead assertion, save the matched part in a group, then
155-
# consume that group via a backreference. If the overall match fails,
156-
# the lookahead assertion won't try alternatives. So the translation is:
157-
# (?=(?P<name>.*?fixed))(?P=name)
158-
# Group names are created as needed: g0, g1, g2, ...
159-
# The numbers are obtained from _nextgroupnum() to ensure they're unique
160-
# across calls and across threads. This is because people rely on the
161-
# undocumented ability to join multiple translate() results together via
162-
# "|" to build large regexps matching "one of many" shell patterns.
146+
# Atomic groups ("(?>...)") allow us to spell that directly.
147+
# Note: people rely on the undocumented ability to join multiple
148+
# translate() results together via "|" to build large regexps matching
149+
# "one of many" shell patterns.
163150
while i < n:
164151
assert inp[i] is STAR
165152
i += 1
@@ -176,8 +163,7 @@ def translate(pat):
176163
add(".*")
177164
add(fixed)
178165
else:
179-
groupnum = _nextgroupnum()
180-
add(f"(?=(?P<g{groupnum}>.*?{fixed}))(?P=g{groupnum})")
166+
add(f"(?>.*?{fixed})")
181167
assert i == n
182168
res = "".join(res)
183169
return fr'(?s:{res})\Z'

Lib/test/test_fnmatch.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -124,17 +124,9 @@ def test_translate(self):
124124
self.assertEqual(translate('A*********?[?]?'), r'(?s:A.*.[?].)\Z')
125125
# fancy translation to prevent exponential-time match failure
126126
t = translate('**a*a****a')
127-
digits = re.findall(r'\d+', t)
128-
self.assertEqual(len(digits), 4)
129-
self.assertEqual(digits[0], digits[1])
130-
self.assertEqual(digits[2], digits[3])
131-
g1 = f"g{digits[0]}" # e.g., group name "g4"
132-
g2 = f"g{digits[2]}" # e.g., group name "g5"
133-
self.assertEqual(t,
134-
fr'(?s:(?=(?P<{g1}>.*?a))(?P={g1})(?=(?P<{g2}>.*?a))(?P={g2}).*a)\Z')
127+
self.assertEqual(t, r'(?s:(?>.*?a)(?>.*?a).*a)\Z')
135128
# and try pasting multiple translate results - it's an undocumented
136-
# feature that this works; all the pain of generating unique group
137-
# names across calls exists to support this
129+
# feature that this works
138130
r1 = translate('**a**a**a*')
139131
r2 = translate('**b**b**b*')
140132
r3 = translate('*c*c*c*')

0 commit comments

Comments
 (0)