Skip to content

Commit 840bb5a

Browse files
authored
Merge pull request RustPython#4077 from youknowone/ntpath
Add nt._path_splitroot and update ntpath
2 parents 95fc027 + 3d4fe93 commit 840bb5a

File tree

7 files changed

+420
-69
lines changed

7 files changed

+420
-69
lines changed

Cargo.lock

Lines changed: 49 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Lib/fnmatch.py

Lines changed: 86 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,19 @@
99
The function translate(PATTERN) returns a regular expression
1010
corresponding to PATTERN. (It does not compile it.)
1111
"""
12-
try:
13-
import os
14-
except ImportError:
15-
import _dummy_os as os
12+
import os
1613
import posixpath
1714
import re
1815
import functools
1916

2017
__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"]
2118

19+
# Build a thread-safe incrementing counter to help create unique regexp group
20+
# names across calls.
21+
from itertools import count
22+
_nextgroupnum = count().__next__
23+
del count
24+
2225
def fnmatch(name, pat):
2326
"""Test whether FILENAME matches PATTERN.
2427
@@ -49,7 +52,7 @@ def _compile_pattern(pat):
4952
return re.compile(res).match
5053

5154
def filter(names, pat):
52-
"""Return the subset of the list NAMES that match PAT."""
55+
"""Construct a list from those elements of the iterable NAMES that match PAT."""
5356
result = []
5457
pat = os.path.normcase(pat)
5558
match = _compile_pattern(pat)
@@ -80,15 +83,19 @@ def translate(pat):
8083
There is no way to quote meta-characters.
8184
"""
8285

86+
STAR = object()
87+
res = []
88+
add = res.append
8389
i, n = 0, len(pat)
84-
res = ''
8590
while i < n:
8691
c = pat[i]
8792
i = i+1
8893
if c == '*':
89-
res = res + '.*'
94+
# compress consecutive `*` into one
95+
if (not res) or res[-1] is not STAR:
96+
add(STAR)
9097
elif c == '?':
91-
res = res + '.'
98+
add('.')
9299
elif c == '[':
93100
j = i
94101
if j < n and pat[j] == '!':
@@ -98,10 +105,10 @@ def translate(pat):
98105
while j < n and pat[j] != ']':
99106
j = j+1
100107
if j >= n:
101-
res = res + '\\['
108+
add('\\[')
102109
else:
103110
stuff = pat[i:j]
104-
if '--' not in stuff:
111+
if '-' not in stuff:
105112
stuff = stuff.replace('\\', r'\\')
106113
else:
107114
chunks = []
@@ -113,19 +120,80 @@ def translate(pat):
113120
chunks.append(pat[i:k])
114121
i = k+1
115122
k = k+3
116-
chunks.append(pat[i:j])
123+
chunk = pat[i:j]
124+
if chunk:
125+
chunks.append(chunk)
126+
else:
127+
chunks[-1] += '-'
128+
# Remove empty ranges -- invalid in RE.
129+
for k in range(len(chunks)-1, 0, -1):
130+
if chunks[k-1][-1] > chunks[k][0]:
131+
chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:]
132+
del chunks[k]
117133
# Escape backslashes and hyphens for set difference (--).
118134
# Hyphens that create ranges shouldn't be escaped.
119135
stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
120136
for s in chunks)
121137
# Escape set operations (&&, ~~ and ||).
122138
stuff = re.sub(r'([&~|])', r'\\\1', stuff)
123139
i = j+1
124-
if stuff[0] == '!':
125-
stuff = '^' + stuff[1:]
126-
elif stuff[0] in ('^', '['):
127-
stuff = '\\' + stuff
128-
res = '%s[%s]' % (res, stuff)
140+
if not stuff:
141+
# Empty range: never match.
142+
add('(?!)')
143+
elif stuff == '!':
144+
# Negated empty range: match any character.
145+
add('.')
146+
else:
147+
if stuff[0] == '!':
148+
stuff = '^' + stuff[1:]
149+
elif stuff[0] in ('^', '['):
150+
stuff = '\\' + stuff
151+
add(f'[{stuff}]')
152+
else:
153+
add(re.escape(c))
154+
assert i == n
155+
156+
# Deal with STARs.
157+
inp = res
158+
res = []
159+
add = res.append
160+
i, n = 0, len(inp)
161+
# Fixed pieces at the start?
162+
while i < n and inp[i] is not STAR:
163+
add(inp[i])
164+
i += 1
165+
# Now deal with STAR fixed STAR fixed ...
166+
# For an interior `STAR fixed` pairing, we want to do a minimal
167+
# .*? match followed by `fixed`, with no possibility of backtracking.
168+
# We can't spell that directly, but can trick it into working by matching
169+
# .*?fixed
170+
# in a lookahead assertion, save the matched part in a group, then
171+
# consume that group via a backreference. If the overall match fails,
172+
# the lookahead assertion won't try alternatives. So the translation is:
173+
# (?=(?P<name>.*?fixed))(?P=name)
174+
# Group names are created as needed: g0, g1, g2, ...
175+
# The numbers are obtained from _nextgroupnum() to ensure they're unique
176+
# across calls and across threads. This is because people rely on the
177+
# undocumented ability to join multiple translate() results together via
178+
# "|" to build large regexps matching "one of many" shell patterns.
179+
while i < n:
180+
assert inp[i] is STAR
181+
i += 1
182+
if i == n:
183+
add(".*")
184+
break
185+
assert inp[i] is not STAR
186+
fixed = []
187+
while i < n and inp[i] is not STAR:
188+
fixed.append(inp[i])
189+
i += 1
190+
fixed = "".join(fixed)
191+
if i == n:
192+
add(".*")
193+
add(fixed)
129194
else:
130-
res = res + re.escape(c)
131-
return r'(?s:%s)\Z' % res
195+
groupnum = _nextgroupnum()
196+
add(f"(?=(?P<g{groupnum}>.*?{fixed}))(?P=g{groupnum})")
197+
assert i == n
198+
res = "".join(res)
199+
return fr'(?s:{res})\Z'

Lib/ntpath.py

Lines changed: 53 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import genericpath
2424
from genericpath import *
2525

26+
2627
__all__ = ["normcase","isabs","join","splitdrive","split","splitext",
2728
"basename","dirname","commonprefix","getsize","getmtime",
2829
"getatime","getctime", "islink","exists","lexists","isdir","isfile",
@@ -41,14 +42,39 @@ def _get_bothseps(path):
4142
# Other normalizations (such as optimizing '../' away) are not done
4243
# (this is done by normpath).
4344

44-
def normcase(s):
45-
"""Normalize case of pathname.
46-
47-
Makes all characters lowercase and all slashes into backslashes."""
48-
s = os.fspath(s)
49-
if isinstance(s, bytes):
50-
return s.replace(b'/', b'\\').lower()
51-
else:
45+
try:
46+
from _winapi import (
47+
LCMapStringEx as _LCMapStringEx,
48+
LOCALE_NAME_INVARIANT as _LOCALE_NAME_INVARIANT,
49+
LCMAP_LOWERCASE as _LCMAP_LOWERCASE)
50+
51+
def normcase(s):
52+
"""Normalize case of pathname.
53+
54+
Makes all characters lowercase and all slashes into backslashes.
55+
"""
56+
s = os.fspath(s)
57+
if not s:
58+
return s
59+
if isinstance(s, bytes):
60+
encoding = sys.getfilesystemencoding()
61+
s = s.decode(encoding, 'surrogateescape').replace('/', '\\')
62+
s = _LCMapStringEx(_LOCALE_NAME_INVARIANT,
63+
_LCMAP_LOWERCASE, s)
64+
return s.encode(encoding, 'surrogateescape')
65+
else:
66+
return _LCMapStringEx(_LOCALE_NAME_INVARIANT,
67+
_LCMAP_LOWERCASE,
68+
s.replace('/', '\\'))
69+
except ImportError:
70+
def normcase(s):
71+
"""Normalize case of pathname.
72+
73+
Makes all characters lowercase and all slashes into backslashes.
74+
"""
75+
s = os.fspath(s)
76+
if isinstance(s, bytes):
77+
return os.fsencode(os.fsdecode(s).replace('/', '\\').lower())
5278
return s.replace('/', '\\').lower()
5379

5480

@@ -312,12 +338,25 @@ def expanduser(path):
312338
drive = ''
313339
userhome = join(drive, os.environ['HOMEPATH'])
314340

341+
if i != 1: #~user
342+
target_user = path[1:i]
343+
if isinstance(target_user, bytes):
344+
target_user = os.fsdecode(target_user)
345+
current_user = os.environ.get('USERNAME')
346+
347+
if target_user != current_user:
348+
# Try to guess user home directory. By default all user
349+
# profile directories are located in the same place and are
350+
# named by corresponding usernames. If userhome isn't a
351+
# normal profile directory, this guess is likely wrong,
352+
# so we bail out.
353+
if current_user != basename(userhome):
354+
return path
355+
userhome = join(dirname(userhome), target_user)
356+
315357
if isinstance(path, bytes):
316358
userhome = os.fsencode(userhome)
317359

318-
if i != 1: #~user
319-
userhome = join(dirname(userhome), path[1:i])
320-
321360
return userhome + path[i:]
322361

323362

@@ -622,7 +661,7 @@ def _getfinalpathname_nonstrict(path):
622661
tail = join(name, tail) if tail else name
623662
return tail
624663

625-
def realpath(path):
664+
def realpath(path, *, strict=False):
626665
path = normpath(path)
627666
if isinstance(path, bytes):
628667
prefix = b'\\\\?\\'
@@ -647,6 +686,8 @@ def realpath(path):
647686
path = _getfinalpathname(path)
648687
initial_winerror = 0
649688
except OSError as ex:
689+
if strict:
690+
raise
650691
initial_winerror = ex.winerror
651692
path = _getfinalpathname_nonstrict(path)
652693
# The path returned by _getfinalpathname will always start with \\?\ -

0 commit comments

Comments
 (0)