Skip to content

Commit 6872edd

Browse files
committed
gh-102511: Speed up os.path.splitroot() with native helpers (GH-118089)
1 parent 8c96850 commit 6872edd

File tree

8 files changed

+340
-108
lines changed

8 files changed

+340
-108
lines changed

Include/internal/pycore_fileutils.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,8 @@ extern wchar_t *_Py_normpath_and_size(wchar_t *path, Py_ssize_t size, Py_ssize_t
264264
extern HRESULT PathCchSkipRoot(const wchar_t *pszPath, const wchar_t **ppszRootEnd);
265265
#endif /* defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP) */
266266

267+
extern void _Py_skiproot(const wchar_t *path, Py_ssize_t size, Py_ssize_t *drvsize, Py_ssize_t *rootsize);
268+
267269
// Macros to protect CRT calls against instant termination when passed an
268270
// invalid parameter (bpo-23524). IPH stands for Invalid Parameter Handler.
269271
// Usage:

Lib/ntpath.py

Lines changed: 68 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -176,56 +176,76 @@ def splitdrive(p):
176176
return drive, root + tail
177177

178178

179-
def splitroot(p):
180-
"""Split a pathname into drive, root and tail. The drive is defined
181-
exactly as in splitdrive(). On Windows, the root may be a single path
182-
separator or an empty string. The tail contains anything after the root.
183-
For example:
184-
185-
splitroot('//server/share/') == ('//server/share', '/', '')
186-
splitroot('C:/Users/Barney') == ('C:', '/', 'Users/Barney')
187-
splitroot('C:///spam///ham') == ('C:', '/', '//spam///ham')
188-
splitroot('Windows/notepad') == ('', '', 'Windows/notepad')
189-
"""
190-
p = os.fspath(p)
191-
if isinstance(p, bytes):
192-
sep = b'\\'
193-
altsep = b'/'
194-
colon = b':'
195-
unc_prefix = b'\\\\?\\UNC\\'
196-
empty = b''
197-
else:
198-
sep = '\\'
199-
altsep = '/'
200-
colon = ':'
201-
unc_prefix = '\\\\?\\UNC\\'
202-
empty = ''
203-
normp = p.replace(altsep, sep)
204-
if normp[:1] == sep:
205-
if normp[1:2] == sep:
206-
# UNC drives, e.g. \\server\share or \\?\UNC\server\share
207-
# Device drives, e.g. \\.\device or \\?\device
208-
start = 8 if normp[:8].upper() == unc_prefix else 2
209-
index = normp.find(sep, start)
210-
if index == -1:
211-
return p, empty, empty
212-
index2 = normp.find(sep, index + 1)
213-
if index2 == -1:
214-
return p, empty, empty
215-
return p[:index2], p[index2:index2 + 1], p[index2 + 1:]
179+
try:
180+
from nt import _path_splitroot_ex
181+
except ImportError:
182+
def splitroot(p):
183+
"""Split a pathname into drive, root and tail. The drive is defined
184+
exactly as in splitdrive(). On Windows, the root may be a single path
185+
separator or an empty string. The tail contains anything after the root.
186+
For example:
187+
188+
splitroot('//server/share/') == ('//server/share', '/', '')
189+
splitroot('C:/Users/Barney') == ('C:', '/', 'Users/Barney')
190+
splitroot('C:///spam///ham') == ('C:', '/', '//spam///ham')
191+
splitroot('Windows/notepad') == ('', '', 'Windows/notepad')
192+
"""
193+
p = os.fspath(p)
194+
if isinstance(p, bytes):
195+
sep = b'\\'
196+
altsep = b'/'
197+
colon = b':'
198+
unc_prefix = b'\\\\?\\UNC\\'
199+
empty = b''
216200
else:
217-
# Relative path with root, e.g. \Windows
218-
return empty, p[:1], p[1:]
219-
elif normp[1:2] == colon:
220-
if normp[2:3] == sep:
221-
# Absolute drive-letter path, e.g. X:\Windows
222-
return p[:2], p[2:3], p[3:]
201+
sep = '\\'
202+
altsep = '/'
203+
colon = ':'
204+
unc_prefix = '\\\\?\\UNC\\'
205+
empty = ''
206+
normp = p.replace(altsep, sep)
207+
if normp[:1] == sep:
208+
if normp[1:2] == sep:
209+
# UNC drives, e.g. \\server\share or \\?\UNC\server\share
210+
# Device drives, e.g. \\.\device or \\?\device
211+
start = 8 if normp[:8].upper() == unc_prefix else 2
212+
index = normp.find(sep, start)
213+
if index == -1:
214+
return p, empty, empty
215+
index2 = normp.find(sep, index + 1)
216+
if index2 == -1:
217+
return p, empty, empty
218+
return p[:index2], p[index2:index2 + 1], p[index2 + 1:]
219+
else:
220+
# Relative path with root, e.g. \Windows
221+
return empty, p[:1], p[1:]
222+
elif normp[1:2] == colon:
223+
if normp[2:3] == sep:
224+
# Absolute drive-letter path, e.g. X:\Windows
225+
return p[:2], p[2:3], p[3:]
226+
else:
227+
# Relative path with drive, e.g. X:Windows
228+
return p[:2], empty, p[2:]
223229
else:
224-
# Relative path with drive, e.g. X:Windows
225-
return p[:2], empty, p[2:]
226-
else:
227-
# Relative path, e.g. Windows
228-
return empty, empty, p
230+
# Relative path, e.g. Windows
231+
return empty, empty, p
232+
else:
233+
def splitroot(p):
234+
"""Split a pathname into drive, root and tail. The drive is defined
235+
exactly as in splitdrive(). On Windows, the root may be a single path
236+
separator or an empty string. The tail contains anything after the root.
237+
For example:
238+
239+
splitroot('//server/share/') == ('//server/share', '/', '')
240+
splitroot('C:/Users/Barney') == ('C:', '/', 'Users/Barney')
241+
splitroot('C:///spam///ham') == ('C:', '/', '//spam///ham')
242+
splitroot('Windows/notepad') == ('', '', 'Windows/notepad')
243+
"""
244+
p = os.fspath(p)
245+
if isinstance(p, bytes):
246+
drive, root, tail = _path_splitroot_ex(os.fsdecode(p))
247+
return os.fsencode(drive), os.fsencode(root), os.fsencode(tail)
248+
return _path_splitroot_ex(p)
229249

230250

231251
# Split a path in head (everything up to the last '/') and tail (the

Lib/posixpath.py

Lines changed: 47 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -135,33 +135,53 @@ def splitdrive(p):
135135
return p[:0], p
136136

137137

138-
def splitroot(p):
139-
"""Split a pathname into drive, root and tail. On Posix, drive is always
140-
empty; the root may be empty, a single slash, or two slashes. The tail
141-
contains anything after the root. For example:
142-
143-
splitroot('foo/bar') == ('', '', 'foo/bar')
144-
splitroot('/foo/bar') == ('', '/', 'foo/bar')
145-
splitroot('//foo/bar') == ('', '//', 'foo/bar')
146-
splitroot('///foo/bar') == ('', '/', '//foo/bar')
147-
"""
148-
p = os.fspath(p)
149-
if isinstance(p, bytes):
150-
sep = b'/'
151-
empty = b''
152-
else:
153-
sep = '/'
154-
empty = ''
155-
if p[:1] != sep:
156-
# Relative path, e.g.: 'foo'
157-
return empty, empty, p
158-
elif p[1:2] != sep or p[2:3] == sep:
159-
# Absolute path, e.g.: '/foo', '///foo', '////foo', etc.
160-
return empty, sep, p[1:]
161-
else:
162-
# Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see
163-
# https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13
164-
return empty, p[:2], p[2:]
138+
try:
139+
from posix import _path_splitroot_ex
140+
except ImportError:
141+
def splitroot(p):
142+
"""Split a pathname into drive, root and tail. On Posix, drive is always
143+
empty; the root may be empty, a single slash, or two slashes. The tail
144+
contains anything after the root. For example:
145+
146+
splitroot('foo/bar') == ('', '', 'foo/bar')
147+
splitroot('/foo/bar') == ('', '/', 'foo/bar')
148+
splitroot('//foo/bar') == ('', '//', 'foo/bar')
149+
splitroot('///foo/bar') == ('', '/', '//foo/bar')
150+
"""
151+
p = os.fspath(p)
152+
if isinstance(p, bytes):
153+
sep = b'/'
154+
empty = b''
155+
else:
156+
sep = '/'
157+
empty = ''
158+
if p[:1] != sep:
159+
# Relative path, e.g.: 'foo'
160+
return empty, empty, p
161+
elif p[1:2] != sep or p[2:3] == sep:
162+
# Absolute path, e.g.: '/foo', '///foo', '////foo', etc.
163+
return empty, sep, p[1:]
164+
else:
165+
# Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see
166+
# https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13
167+
return empty, p[:2], p[2:]
168+
else:
169+
def splitroot(p):
170+
"""Split a pathname into drive, root and tail. On Posix, drive is always
171+
empty; the root may be empty, a single slash, or two slashes. The tail
172+
contains anything after the root. For example:
173+
174+
splitroot('foo/bar') == ('', '', 'foo/bar')
175+
splitroot('/foo/bar') == ('', '/', 'foo/bar')
176+
splitroot('//foo/bar') == ('', '//', 'foo/bar')
177+
splitroot('///foo/bar') == ('', '/', '//foo/bar')
178+
"""
179+
p = os.fspath(p)
180+
if isinstance(p, bytes):
181+
# Optimisation: the drive is always empty
182+
_, root, tail = _path_splitroot_ex(os.fsdecode(p))
183+
return b'', os.fsencode(root), os.fsencode(tail)
184+
return _path_splitroot_ex(p)
165185

166186

167187
# Return the tail (basename) part of a path, same as split(path)[1].

Lib/test/test_ntpath.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,7 @@ def test_normpath(self):
354354
tester("ntpath.normpath('\\\\foo\\')", '\\\\foo\\')
355355
tester("ntpath.normpath('\\\\foo')", '\\\\foo')
356356
tester("ntpath.normpath('\\\\')", '\\\\')
357+
tester("ntpath.normpath('//?/UNC/server/share/..')", '\\\\?\\UNC\\server\\share\\')
357358

358359
def test_realpath_curdir(self):
359360
expected = ntpath.normpath(os.getcwd())
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Speed up :func:`os.path.splitroot` with a native implementation.

Modules/clinic/posixmodule.c.h

Lines changed: 62 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Modules/posixmodule.c

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5286,6 +5286,49 @@ os__path_islink_impl(PyObject *module, PyObject *path)
52865286
#endif /* MS_WINDOWS */
52875287

52885288

5289+
/*[clinic input]
5290+
os._path_splitroot_ex
5291+
5292+
path: unicode
5293+
5294+
[clinic start generated code]*/
5295+
5296+
static PyObject *
5297+
os__path_splitroot_ex_impl(PyObject *module, PyObject *path)
5298+
/*[clinic end generated code: output=de97403d3dfebc40 input=f1470e12d899f9ac]*/
5299+
{
5300+
Py_ssize_t len, drvsize, rootsize;
5301+
PyObject *drv = NULL, *root = NULL, *tail = NULL, *result = NULL;
5302+
5303+
wchar_t *buffer = PyUnicode_AsWideCharString(path, &len);
5304+
if (!buffer) {
5305+
goto exit;
5306+
}
5307+
5308+
_Py_skiproot(buffer, len, &drvsize, &rootsize);
5309+
drv = PyUnicode_FromWideChar(buffer, drvsize);
5310+
if (drv == NULL) {
5311+
goto exit;
5312+
}
5313+
root = PyUnicode_FromWideChar(&buffer[drvsize], rootsize);
5314+
if (root == NULL) {
5315+
goto exit;
5316+
}
5317+
tail = PyUnicode_FromWideChar(&buffer[drvsize + rootsize],
5318+
len - drvsize - rootsize);
5319+
if (tail == NULL) {
5320+
goto exit;
5321+
}
5322+
result = Py_BuildValue("(OOO)", drv, root, tail);
5323+
exit:
5324+
PyMem_Free(buffer);
5325+
Py_XDECREF(drv);
5326+
Py_XDECREF(root);
5327+
Py_XDECREF(tail);
5328+
return result;
5329+
}
5330+
5331+
52895332
/*[clinic input]
52905333
os._path_normpath
52915334
@@ -16029,6 +16072,7 @@ static PyMethodDef posix_methods[] = {
1602916072
OS__GETFINALPATHNAME_METHODDEF
1603016073
OS__GETVOLUMEPATHNAME_METHODDEF
1603116074
OS__PATH_SPLITROOT_METHODDEF
16075+
OS__PATH_SPLITROOT_EX_METHODDEF
1603216076
OS__PATH_NORMPATH_METHODDEF
1603316077
OS_GETLOADAVG_METHODDEF
1603416078
OS_URANDOM_METHODDEF

0 commit comments

Comments
 (0)