Skip to content

Commit 15de493

Browse files
GH-107465: Add pathlib.Path.from_uri() classmethod. (#107640)
This method supports file URIs (including variants) as described in RFC 8089, such as URIs generated by `pathlib.Path.as_uri()` and `urllib.request.pathname2url()`. The method is added to `Path` rather than `PurePath` because it uses `os.fsdecode()`, and so its results vary from system to system. I intend to deprecate `PurePath.as_uri()` and move it to `Path` for the same reason. Co-authored-by: Adam Turner <[email protected]>
1 parent 06faa9a commit 15de493

File tree

5 files changed

+120
-5
lines changed

5 files changed

+120
-5
lines changed

Doc/library/pathlib.rst

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -850,6 +850,42 @@ call fails (for example because the path doesn't exist).
850850
.. versionadded:: 3.5
851851

852852

853+
.. classmethod:: Path.from_uri(uri)
854+
855+
Return a new path object from parsing a 'file' URI conforming to
856+
:rfc:`8089`. For example::
857+
858+
>>> p = Path.from_uri('file:///etc/hosts')
859+
PosixPath('/etc/hosts')
860+
861+
On Windows, DOS device and UNC paths may be parsed from URIs::
862+
863+
>>> p = Path.from_uri('file:///c:/windows')
864+
WindowsPath('c:/windows')
865+
>>> p = Path.from_uri('file://server/share')
866+
WindowsPath('//server/share')
867+
868+
Several variant forms are supported::
869+
870+
>>> p = Path.from_uri('file:////server/share')
871+
WindowsPath('//server/share')
872+
>>> p = Path.from_uri('file://///server/share')
873+
WindowsPath('//server/share')
874+
>>> p = Path.from_uri('file:c:/windows')
875+
WindowsPath('c:/windows')
876+
>>> p = Path.from_uri('file:/c|/windows')
877+
WindowsPath('c:/windows')
878+
879+
:exc:`ValueError` is raised if the URI does not start with ``file:``, or
880+
the parsed path isn't absolute.
881+
882+
:func:`os.fsdecode` is used to decode percent-escaped byte sequences, and
883+
so file URIs are not portable across machines with different
884+
:ref:`filesystem encodings <filesystem-encoding>`.
885+
886+
.. versionadded:: 3.13
887+
888+
853889
.. method:: Path.stat(*, follow_symlinks=True)
854890

855891
Return a :class:`os.stat_result` object containing information about this path, like :func:`os.stat`.

Doc/whatsnew/3.13.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,10 @@ pathlib
184184
:exc:`NotImplementedError` when a path operation isn't supported.
185185
(Contributed by Barney Gale in :gh:`89812`.)
186186

187+
* Add :meth:`pathlib.Path.from_uri`, a new constructor to create a :class:`pathlib.Path`
188+
object from a 'file' URI (``file:/``).
189+
(Contributed by Barney Gale in :gh:`107465`.)
190+
187191
* Add support for recursive wildcards in :meth:`pathlib.PurePath.match`.
188192
(Contributed by Barney Gale in :gh:`73435`.)
189193

Lib/pathlib.py

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
from _collections_abc import Sequence
1919
from errno import ENOENT, ENOTDIR, EBADF, ELOOP, EINVAL
2020
from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO
21-
from urllib.parse import quote_from_bytes as urlquote_from_bytes
2221

2322
try:
2423
import pwd
@@ -452,7 +451,8 @@ def as_uri(self):
452451
# It's a posix path => 'file:///etc/hosts'
453452
prefix = 'file://'
454453
path = str(self)
455-
return prefix + urlquote_from_bytes(os.fsencode(path))
454+
from urllib.parse import quote_from_bytes
455+
return prefix + quote_from_bytes(os.fsencode(path))
456456

457457
@property
458458
def _str_normcase(self):
@@ -814,9 +814,10 @@ class _PathBase(PurePath):
814814
__bytes__ = None
815815
__fspath__ = None # virtual paths have no local file system representation
816816

817-
def _unsupported(self, method_name):
818-
msg = f"{type(self).__name__}.{method_name}() is unsupported"
819-
if isinstance(self, Path):
817+
@classmethod
818+
def _unsupported(cls, method_name):
819+
msg = f"{cls.__name__}.{method_name}() is unsupported"
820+
if issubclass(cls, Path):
820821
msg += " on this system"
821822
raise UnsupportedOperation(msg)
822823

@@ -1418,6 +1419,11 @@ def group(self):
14181419
"""
14191420
self._unsupported("group")
14201421

1422+
@classmethod
1423+
def from_uri(cls, uri):
1424+
"""Return a new path from the given 'file' URI."""
1425+
cls._unsupported("from_uri")
1426+
14211427
def as_uri(self):
14221428
"""Return the path as a URI."""
14231429
self._unsupported("as_uri")
@@ -1661,6 +1667,30 @@ def expanduser(self):
16611667

16621668
return self
16631669

1670+
@classmethod
1671+
def from_uri(cls, uri):
1672+
"""Return a new path from the given 'file' URI."""
1673+
if not uri.startswith('file:'):
1674+
raise ValueError(f"URI does not start with 'file:': {uri!r}")
1675+
path = uri[5:]
1676+
if path[:3] == '///':
1677+
# Remove empty authority
1678+
path = path[2:]
1679+
elif path[:12] == '//localhost/':
1680+
# Remove 'localhost' authority
1681+
path = path[11:]
1682+
if path[:3] == '///' or (path[:1] == '/' and path[2:3] in ':|'):
1683+
# Remove slash before DOS device/UNC path
1684+
path = path[1:]
1685+
if path[1:2] == '|':
1686+
# Replace bar with colon in DOS drive
1687+
path = path[:1] + ':' + path[2:]
1688+
from urllib.parse import unquote_to_bytes
1689+
path = cls(os.fsdecode(unquote_to_bytes(path)))
1690+
if not path.is_absolute():
1691+
raise ValueError(f"URI is not absolute: {uri!r}")
1692+
return path
1693+
16641694

16651695
class PosixPath(Path, PurePosixPath):
16661696
"""Path subclass for non-Windows systems.

Lib/test/test_pathlib.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import tempfile
1212
import unittest
1313
from unittest import mock
14+
from urllib.request import pathname2url
1415

1516
from test.support import import_helper
1617
from test.support import set_recursion_limit
@@ -3602,6 +3603,24 @@ def test_handling_bad_descriptor(self):
36023603
self.fail("Bad file descriptor not handled.")
36033604
raise
36043605

3606+
def test_from_uri(self):
3607+
P = self.cls
3608+
self.assertEqual(P.from_uri('file:/foo/bar'), P('/foo/bar'))
3609+
self.assertEqual(P.from_uri('file://foo/bar'), P('//foo/bar'))
3610+
self.assertEqual(P.from_uri('file:///foo/bar'), P('/foo/bar'))
3611+
self.assertEqual(P.from_uri('file:////foo/bar'), P('//foo/bar'))
3612+
self.assertEqual(P.from_uri('file://localhost/foo/bar'), P('/foo/bar'))
3613+
self.assertRaises(ValueError, P.from_uri, 'foo/bar')
3614+
self.assertRaises(ValueError, P.from_uri, '/foo/bar')
3615+
self.assertRaises(ValueError, P.from_uri, '//foo/bar')
3616+
self.assertRaises(ValueError, P.from_uri, 'file:foo/bar')
3617+
self.assertRaises(ValueError, P.from_uri, 'http://foo/bar')
3618+
3619+
def test_from_uri_pathname2url(self):
3620+
P = self.cls
3621+
self.assertEqual(P.from_uri('file:' + pathname2url('/foo/bar')), P('/foo/bar'))
3622+
self.assertEqual(P.from_uri('file:' + pathname2url('//foo/bar')), P('//foo/bar'))
3623+
36053624

36063625
@only_nt
36073626
class WindowsPathTest(PathTest):
@@ -3721,6 +3740,31 @@ def check():
37213740
env['HOME'] = 'C:\\Users\\eve'
37223741
check()
37233742

3743+
def test_from_uri(self):
3744+
P = self.cls
3745+
# DOS drive paths
3746+
self.assertEqual(P.from_uri('file:c:/path/to/file'), P('c:/path/to/file'))
3747+
self.assertEqual(P.from_uri('file:c|/path/to/file'), P('c:/path/to/file'))
3748+
self.assertEqual(P.from_uri('file:/c|/path/to/file'), P('c:/path/to/file'))
3749+
self.assertEqual(P.from_uri('file:///c|/path/to/file'), P('c:/path/to/file'))
3750+
# UNC paths
3751+
self.assertEqual(P.from_uri('file://server/path/to/file'), P('//server/path/to/file'))
3752+
self.assertEqual(P.from_uri('file:////server/path/to/file'), P('//server/path/to/file'))
3753+
self.assertEqual(P.from_uri('file://///server/path/to/file'), P('//server/path/to/file'))
3754+
# Localhost paths
3755+
self.assertEqual(P.from_uri('file://localhost/c:/path/to/file'), P('c:/path/to/file'))
3756+
self.assertEqual(P.from_uri('file://localhost/c|/path/to/file'), P('c:/path/to/file'))
3757+
# Invalid paths
3758+
self.assertRaises(ValueError, P.from_uri, 'foo/bar')
3759+
self.assertRaises(ValueError, P.from_uri, 'c:/foo/bar')
3760+
self.assertRaises(ValueError, P.from_uri, '//foo/bar')
3761+
self.assertRaises(ValueError, P.from_uri, 'file:foo/bar')
3762+
self.assertRaises(ValueError, P.from_uri, 'http://foo/bar')
3763+
3764+
def test_from_uri_pathname2url(self):
3765+
P = self.cls
3766+
self.assertEqual(P.from_uri('file:' + pathname2url(r'c:\path\to\file')), P('c:/path/to/file'))
3767+
self.assertEqual(P.from_uri('file:' + pathname2url(r'\\server\path\to\file')), P('//server/path/to/file'))
37243768

37253769

37263770
class PathSubclassTest(PathTest):
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add :meth:`pathlib.Path.from_uri` classmethod.

0 commit comments

Comments
 (0)