9
9
The function translate(PATTERN) returns a regular expression
10
10
corresponding to PATTERN. (It does not compile it.)
11
11
"""
12
- try :
13
- import os
14
- except ImportError :
15
- import _dummy_os as os
12
+ import os
16
13
import posixpath
17
14
import re
18
15
import functools
19
16
20
17
__all__ = ["filter" , "fnmatch" , "fnmatchcase" , "translate" ]
21
18
19
+ # Build a thread-safe incrementing counter to help create unique regexp group
20
+ # names across calls.
21
+ from itertools import count
22
+ _nextgroupnum = count ().__next__
23
+ del count
24
+
22
25
def fnmatch (name , pat ):
23
26
"""Test whether FILENAME matches PATTERN.
24
27
@@ -49,7 +52,7 @@ def _compile_pattern(pat):
49
52
return re .compile (res ).match
50
53
51
54
def filter (names , pat ):
52
- """Return the subset of the list NAMES that match PAT."""
55
+ """Construct a list from those elements of the iterable NAMES that match PAT."""
53
56
result = []
54
57
pat = os .path .normcase (pat )
55
58
match = _compile_pattern (pat )
@@ -80,15 +83,19 @@ def translate(pat):
80
83
There is no way to quote meta-characters.
81
84
"""
82
85
86
+ STAR = object ()
87
+ res = []
88
+ add = res .append
83
89
i , n = 0 , len (pat )
84
- res = ''
85
90
while i < n :
86
91
c = pat [i ]
87
92
i = i + 1
88
93
if c == '*' :
89
- res = res + '.*'
94
+ # compress consecutive `*` into one
95
+ if (not res ) or res [- 1 ] is not STAR :
96
+ add (STAR )
90
97
elif c == '?' :
91
- res = res + '.'
98
+ add ( '.' )
92
99
elif c == '[' :
93
100
j = i
94
101
if j < n and pat [j ] == '!' :
@@ -98,10 +105,10 @@ def translate(pat):
98
105
while j < n and pat [j ] != ']' :
99
106
j = j + 1
100
107
if j >= n :
101
- res = res + '\\ ['
108
+ add ( '\\ [' )
102
109
else :
103
110
stuff = pat [i :j ]
104
- if '-- ' not in stuff :
111
+ if '-' not in stuff :
105
112
stuff = stuff .replace ('\\ ' , r'\\' )
106
113
else :
107
114
chunks = []
@@ -113,19 +120,80 @@ def translate(pat):
113
120
chunks .append (pat [i :k ])
114
121
i = k + 1
115
122
k = k + 3
116
- chunks .append (pat [i :j ])
123
+ chunk = pat [i :j ]
124
+ if chunk :
125
+ chunks .append (chunk )
126
+ else :
127
+ chunks [- 1 ] += '-'
128
+ # Remove empty ranges -- invalid in RE.
129
+ for k in range (len (chunks )- 1 , 0 , - 1 ):
130
+ if chunks [k - 1 ][- 1 ] > chunks [k ][0 ]:
131
+ chunks [k - 1 ] = chunks [k - 1 ][:- 1 ] + chunks [k ][1 :]
132
+ del chunks [k ]
117
133
# Escape backslashes and hyphens for set difference (--).
118
134
# Hyphens that create ranges shouldn't be escaped.
119
135
stuff = '-' .join (s .replace ('\\ ' , r'\\' ).replace ('-' , r'\-' )
120
136
for s in chunks )
121
137
# Escape set operations (&&, ~~ and ||).
122
138
stuff = re .sub (r'([&~|])' , r'\\\1' , stuff )
123
139
i = j + 1
124
- if stuff [0 ] == '!' :
125
- stuff = '^' + stuff [1 :]
126
- elif stuff [0 ] in ('^' , '[' ):
127
- stuff = '\\ ' + stuff
128
- res = '%s[%s]' % (res , stuff )
140
+ if not stuff :
141
+ # Empty range: never match.
142
+ add ('(?!)' )
143
+ elif stuff == '!' :
144
+ # Negated empty range: match any character.
145
+ add ('.' )
146
+ else :
147
+ if stuff [0 ] == '!' :
148
+ stuff = '^' + stuff [1 :]
149
+ elif stuff [0 ] in ('^' , '[' ):
150
+ stuff = '\\ ' + stuff
151
+ add (f'[{ stuff } ]' )
152
+ else :
153
+ add (re .escape (c ))
154
+ assert i == n
155
+
156
+ # Deal with STARs.
157
+ inp = res
158
+ res = []
159
+ add = res .append
160
+ i , n = 0 , len (inp )
161
+ # Fixed pieces at the start?
162
+ while i < n and inp [i ] is not STAR :
163
+ add (inp [i ])
164
+ i += 1
165
+ # Now deal with STAR fixed STAR fixed ...
166
+ # For an interior `STAR fixed` pairing, we want to do a minimal
167
+ # .*? match followed by `fixed`, with no possibility of backtracking.
168
+ # We can't spell that directly, but can trick it into working by matching
169
+ # .*?fixed
170
+ # in a lookahead assertion, save the matched part in a group, then
171
+ # consume that group via a backreference. If the overall match fails,
172
+ # the lookahead assertion won't try alternatives. So the translation is:
173
+ # (?=(?P<name>.*?fixed))(?P=name)
174
+ # Group names are created as needed: g0, g1, g2, ...
175
+ # The numbers are obtained from _nextgroupnum() to ensure they're unique
176
+ # across calls and across threads. This is because people rely on the
177
+ # undocumented ability to join multiple translate() results together via
178
+ # "|" to build large regexps matching "one of many" shell patterns.
179
+ while i < n :
180
+ assert inp [i ] is STAR
181
+ i += 1
182
+ if i == n :
183
+ add (".*" )
184
+ break
185
+ assert inp [i ] is not STAR
186
+ fixed = []
187
+ while i < n and inp [i ] is not STAR :
188
+ fixed .append (inp [i ])
189
+ i += 1
190
+ fixed = "" .join (fixed )
191
+ if i == n :
192
+ add (".*" )
193
+ add (fixed )
129
194
else :
130
- res = res + re .escape (c )
131
- return r'(?s:%s)\Z' % res
195
+ groupnum = _nextgroupnum ()
196
+ add (f"(?=(?P<g{ groupnum } >.*?{ fixed } ))(?P=g{ groupnum } )" )
197
+ assert i == n
198
+ res = "" .join (res )
199
+ return fr'(?s:{ res } )\Z'
0 commit comments