Skip to content

Commit 76929fd

Browse files
authored
GH-110109: Add pathlib._PurePathBase (#110670)
Add private `pathlib._PurePathBase` class: a private superclass of both `PurePath` and `_PathBase`. Unlike `PurePath`, it does not define any of these special methods: `__fspath__`, `__bytes__`, `__reduce__`, `__hash__`, `__eq__`, `__lt__`, `__le__`, `__gt__`, `__ge__`. Its initializer and path joining methods accept only strings, not os.PathLike objects more broadly. This is important for supporting *virtual paths*: user subclasses of `_PathBase` that provide access to archive files, FTP servers, etc. In these classes, the above methods should be implemented by users only as appropriate, with due consideration for the hash/equality of any backing objects, such as file objects or sockets.
1 parent 5a0137c commit 76929fd

File tree

3 files changed

+115
-53
lines changed

3 files changed

+115
-53
lines changed

Lib/pathlib.py

Lines changed: 47 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -198,14 +198,13 @@ def __repr__(self):
198198
return "<{}.parents>".format(type(self._path).__name__)
199199

200200

201-
class PurePath:
202-
"""Base class for manipulating paths without I/O.
201+
class _PurePathBase:
202+
"""Base class for pure path objects.
203203
204-
PurePath represents a filesystem path and offers operations which
205-
don't imply any actual filesystem I/O. Depending on your system,
206-
instantiating a PurePath will return either a PurePosixPath or a
207-
PureWindowsPath object. You can also instantiate either of these classes
208-
directly, regardless of your system.
204+
This class *does not* provide several magic methods that are defined in
205+
its subclass PurePath. They are: __fspath__, __bytes__, __reduce__,
206+
__hash__, __eq__, __lt__, __le__, __gt__, __ge__. Its initializer and path
207+
joining methods accept only strings, not os.PathLike objects more broadly.
209208
"""
210209

211210
__slots__ = (
@@ -227,29 +226,17 @@ class PurePath:
227226
# for the first time. It's used to implement `_str_normcase`
228227
'_str',
229228

230-
# The `_str_normcase_cached` slot stores the string path with
231-
# normalized case. It is set when the `_str_normcase` property is
232-
# accessed for the first time. It's used to implement `__eq__()`
233-
# `__hash__()`, and `_parts_normcase`
234-
'_str_normcase_cached',
235-
236-
# The `_parts_normcase_cached` slot stores the case-normalized
237-
# string path after splitting on path separators. It's set when the
238-
# `_parts_normcase` property is accessed for the first time. It's used
239-
# to implement comparison methods like `__lt__()`.
240-
'_parts_normcase_cached',
241-
242-
# The `_hash` slot stores the hash of the case-normalized string
243-
# path. It's set when `__hash__()` is called for the first time.
244-
'_hash',
245-
246229
# The '_resolving' slot stores a boolean indicating whether the path
247230
# is being processed by `_PathBase.resolve()`. This prevents duplicate
248231
# work from occurring when `resolve()` calls `stat()` or `readlink()`.
249232
'_resolving',
250233
)
251234
pathmod = os.path
252235

236+
def __init__(self, *paths):
237+
self._raw_paths = paths
238+
self._resolving = False
239+
253240
def with_segments(self, *pathsegments):
254241
"""Construct a new path object from any number of path-like objects.
255242
Subclasses may override this method to customize how new path objects
@@ -444,7 +431,7 @@ def relative_to(self, other, /, *_deprecated, walk_up=False):
444431
warnings._deprecated("pathlib.PurePath.relative_to(*args)", msg,
445432
remove=(3, 14))
446433
other = self.with_segments(other, *_deprecated)
447-
elif not isinstance(other, PurePath):
434+
elif not isinstance(other, _PurePathBase):
448435
other = self.with_segments(other)
449436
for step, path in enumerate(chain([other], other.parents)):
450437
if path == self or path in self.parents:
@@ -468,7 +455,7 @@ def is_relative_to(self, other, /, *_deprecated):
468455
warnings._deprecated("pathlib.PurePath.is_relative_to(*args)",
469456
msg, remove=(3, 14))
470457
other = self.with_segments(other, *_deprecated)
471-
elif not isinstance(other, PurePath):
458+
elif not isinstance(other, _PurePathBase):
472459
other = self.with_segments(other)
473460
return other == self or other in self.parents
474461

@@ -487,7 +474,7 @@ def joinpath(self, *pathsegments):
487474
paths) or a totally different path (if one of the arguments is
488475
anchored).
489476
"""
490-
return self.with_segments(self, *pathsegments)
477+
return self.with_segments(*self._raw_paths, *pathsegments)
491478

492479
def __truediv__(self, key):
493480
try:
@@ -497,7 +484,7 @@ def __truediv__(self, key):
497484

498485
def __rtruediv__(self, key):
499486
try:
500-
return self.with_segments(key, self)
487+
return self.with_segments(key, *self._raw_paths)
501488
except TypeError:
502489
return NotImplemented
503490

@@ -555,7 +542,7 @@ def match(self, path_pattern, *, case_sensitive=None):
555542
"""
556543
Return True if this path matches the given pattern.
557544
"""
558-
if not isinstance(path_pattern, PurePath):
545+
if not isinstance(path_pattern, _PurePathBase):
559546
path_pattern = self.with_segments(path_pattern)
560547
if case_sensitive is None:
561548
case_sensitive = _is_case_sensitive(self.pathmod)
@@ -570,6 +557,35 @@ def match(self, path_pattern, *, case_sensitive=None):
570557
match = _compile_pattern(pattern_str, sep, case_sensitive)
571558
return match(str(self)) is not None
572559

560+
561+
class PurePath(_PurePathBase):
562+
"""Base class for manipulating paths without I/O.
563+
564+
PurePath represents a filesystem path and offers operations which
565+
don't imply any actual filesystem I/O. Depending on your system,
566+
instantiating a PurePath will return either a PurePosixPath or a
567+
PureWindowsPath object. You can also instantiate either of these classes
568+
directly, regardless of your system.
569+
"""
570+
571+
__slots__ = (
572+
# The `_str_normcase_cached` slot stores the string path with
573+
# normalized case. It is set when the `_str_normcase` property is
574+
# accessed for the first time. It's used to implement `__eq__()`
575+
# `__hash__()`, and `_parts_normcase`
576+
'_str_normcase_cached',
577+
578+
# The `_parts_normcase_cached` slot stores the case-normalized
579+
# string path after splitting on path separators. It's set when the
580+
# `_parts_normcase` property is accessed for the first time. It's used
581+
# to implement comparison methods like `__lt__()`.
582+
'_parts_normcase_cached',
583+
584+
# The `_hash` slot stores the hash of the case-normalized string
585+
# path. It's set when `__hash__()` is called for the first time.
586+
'_hash',
587+
)
588+
573589
def __new__(cls, *args, **kwargs):
574590
"""Construct a PurePath from one or several strings and or existing
575591
PurePath objects. The strings and path objects are combined so as
@@ -600,8 +616,7 @@ def __init__(self, *args):
600616
"object where __fspath__ returns a str, "
601617
f"not {type(path).__name__!r}")
602618
paths.append(path)
603-
self._raw_paths = paths
604-
self._resolving = False
619+
super().__init__(*paths)
605620

606621
def __reduce__(self):
607622
# Using the parts tuple helps share interned path parts
@@ -719,7 +734,7 @@ class PureWindowsPath(PurePath):
719734
# Filesystem-accessing classes
720735

721736

722-
class _PathBase(PurePath):
737+
class _PathBase(_PurePathBase):
723738
"""Base class for concrete path objects.
724739
725740
This class provides dummy implementations for many methods that derived
@@ -733,8 +748,6 @@ class _PathBase(PurePath):
733748
such as paths in archive files or on remote storage systems.
734749
"""
735750
__slots__ = ()
736-
__bytes__ = None
737-
__fspath__ = None # virtual paths have no local file system representation
738751

739752
@classmethod
740753
def _unsupported(cls, method_name):
@@ -1341,7 +1354,7 @@ def as_uri(self):
13411354
self._unsupported("as_uri")
13421355

13431356

1344-
class Path(_PathBase):
1357+
class Path(_PathBase, PurePath):
13451358
"""PurePath subclass that can make system calls.
13461359
13471360
Path represents a filesystem path but unlike PurePath, also offers
@@ -1351,8 +1364,6 @@ class Path(_PathBase):
13511364
but cannot instantiate a WindowsPath on a POSIX system or vice versa.
13521365
"""
13531366
__slots__ = ()
1354-
__bytes__ = PurePath.__bytes__
1355-
__fspath__ = PurePath.__fspath__
13561367
as_uri = PurePath.as_uri
13571368

13581369
def __init__(self, *args, **kwargs):

Lib/test/test_pathlib.py

Lines changed: 65 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,35 @@ def test_is_notimplemented(self):
4949
# Tests for the pure classes.
5050
#
5151

52-
class PurePathTest(unittest.TestCase):
53-
cls = pathlib.PurePath
52+
53+
class PurePathBaseTest(unittest.TestCase):
54+
cls = pathlib._PurePathBase
55+
56+
def test_magic_methods(self):
57+
P = self.cls
58+
self.assertFalse(hasattr(P, '__fspath__'))
59+
self.assertFalse(hasattr(P, '__bytes__'))
60+
self.assertIs(P.__reduce__, object.__reduce__)
61+
self.assertIs(P.__hash__, object.__hash__)
62+
self.assertIs(P.__eq__, object.__eq__)
63+
self.assertIs(P.__lt__, object.__lt__)
64+
self.assertIs(P.__le__, object.__le__)
65+
self.assertIs(P.__gt__, object.__gt__)
66+
self.assertIs(P.__ge__, object.__ge__)
67+
68+
69+
class DummyPurePath(pathlib._PurePathBase):
70+
def __eq__(self, other):
71+
if not isinstance(other, DummyPurePath):
72+
return NotImplemented
73+
return str(self) == str(other)
74+
75+
def __hash__(self):
76+
return hash(str(self))
77+
78+
79+
class DummyPurePathTest(unittest.TestCase):
80+
cls = DummyPurePath
5481

5582
# Keys are canonical paths, values are list of tuples of arguments
5683
# supposed to produce equal paths.
@@ -82,12 +109,6 @@ def test_constructor_common(self):
82109
P('/a', 'b', 'c')
83110
P('a/b/c')
84111
P('/a/b/c')
85-
P(FakePath("a/b/c"))
86-
self.assertEqual(P(P('a')), P('a'))
87-
self.assertEqual(P(P('a'), 'b'), P('a/b'))
88-
self.assertEqual(P(P('a'), P('b')), P('a/b'))
89-
self.assertEqual(P(P('a'), P('b'), P('c')), P(FakePath("a/b/c")))
90-
self.assertEqual(P(P('./a:b')), P('./a:b'))
91112

92113
def test_concrete_class(self):
93114
if self.cls is pathlib.PurePath:
@@ -193,8 +214,6 @@ def test_join_common(self):
193214
self.assertIs(type(pp), type(p))
194215
pp = p.joinpath('c', 'd')
195216
self.assertEqual(pp, P('a/b/c/d'))
196-
pp = p.joinpath(P('c'))
197-
self.assertEqual(pp, P('a/b/c'))
198217
pp = p.joinpath('/c')
199218
self.assertEqual(pp, P('/c'))
200219

@@ -211,8 +230,6 @@ def test_div_common(self):
211230
self.assertEqual(pp, P('a/b/c/d'))
212231
pp = 'c' / p / 'd'
213232
self.assertEqual(pp, P('c/a/b/d'))
214-
pp = p / P('c')
215-
self.assertEqual(pp, P('a/b/c'))
216233
pp = p/ '/c'
217234
self.assertEqual(pp, P('/c'))
218235

@@ -678,6 +695,29 @@ def test_is_relative_to_common(self):
678695
self.assertFalse(p.is_relative_to(''))
679696
self.assertFalse(p.is_relative_to(P('a')))
680697

698+
699+
class PurePathTest(DummyPurePathTest):
700+
cls = pathlib.PurePath
701+
702+
def test_constructor_nested(self):
703+
P = self.cls
704+
P(FakePath("a/b/c"))
705+
self.assertEqual(P(P('a')), P('a'))
706+
self.assertEqual(P(P('a'), 'b'), P('a/b'))
707+
self.assertEqual(P(P('a'), P('b')), P('a/b'))
708+
self.assertEqual(P(P('a'), P('b'), P('c')), P(FakePath("a/b/c")))
709+
self.assertEqual(P(P('./a:b')), P('./a:b'))
710+
711+
def test_join_nested(self):
712+
P = self.cls
713+
p = P('a/b').joinpath(P('c'))
714+
self.assertEqual(p, P('a/b/c'))
715+
716+
def test_div_nested(self):
717+
P = self.cls
718+
p = P('a/b') / P('c')
719+
self.assertEqual(p, P('a/b/c'))
720+
681721
def test_pickling_common(self):
682722
P = self.cls
683723
p = P('/a/b')
@@ -1545,7 +1585,7 @@ class cls(pathlib.PurePath):
15451585
# Tests for the virtual classes.
15461586
#
15471587

1548-
class PathBaseTest(PurePathTest):
1588+
class PathBaseTest(PurePathBaseTest):
15491589
cls = pathlib._PathBase
15501590

15511591
def test_unsupported_operation(self):
@@ -1636,6 +1676,14 @@ class DummyPath(pathlib._PathBase):
16361676
_directories = {}
16371677
_symlinks = {}
16381678

1679+
def __eq__(self, other):
1680+
if not isinstance(other, DummyPath):
1681+
return NotImplemented
1682+
return str(self) == str(other)
1683+
1684+
def __hash__(self):
1685+
return hash(str(self))
1686+
16391687
def stat(self, *, follow_symlinks=True):
16401688
if follow_symlinks:
16411689
path = str(self.resolve())
@@ -1707,7 +1755,7 @@ def mkdir(self, mode=0o777, parents=False, exist_ok=False):
17071755
self.mkdir(mode, parents=False, exist_ok=exist_ok)
17081756

17091757

1710-
class DummyPathTest(unittest.TestCase):
1758+
class DummyPathTest(DummyPurePathTest):
17111759
"""Tests for PathBase methods that use stat(), open() and iterdir()."""
17121760

17131761
cls = DummyPath
@@ -2014,7 +2062,7 @@ def _check(path, glob, expected):
20142062

20152063
def test_rglob_common(self):
20162064
def _check(glob, expected):
2017-
self.assertEqual(sorted(glob), sorted(P(BASE, q) for q in expected))
2065+
self.assertEqual(set(glob), {P(BASE, q) for q in expected})
20182066
P = self.cls
20192067
p = P(BASE)
20202068
it = p.rglob("fileA")
@@ -2198,7 +2246,7 @@ def test_glob_above_recursion_limit(self):
21982246
# directory_depth > recursion_limit
21992247
directory_depth = recursion_limit + 10
22002248
base = self.cls(BASE, 'deep')
2201-
path = self.cls(base, *(['d'] * directory_depth))
2249+
path = base.joinpath(*(['d'] * directory_depth))
22022250
path.mkdir(parents=True)
22032251

22042252
with set_recursion_limit(recursion_limit):
@@ -2741,7 +2789,7 @@ def test_walk_above_recursion_limit(self):
27412789
# directory_depth > recursion_limit
27422790
directory_depth = recursion_limit + 10
27432791
base = self.cls(BASE, 'deep')
2744-
path = self.cls(base, *(['d'] * directory_depth))
2792+
path = base.joinpath(*(['d'] * directory_depth))
27452793
path.mkdir(parents=True)
27462794

27472795
with set_recursion_limit(recursion_limit):
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Add private ``pathlib._PurePathBase`` class: a base class for
2+
:class:`pathlib.PurePath` that omits certain magic methods. It may be made
3+
public (along with ``_PathBase``) in future.

0 commit comments

Comments
 (0)