Skip to content

Commit e418fc3

Browse files
authored
GH-82805: Fix handling of single-dot file extensions in pathlib (#118952)
pathlib now treats "`.`" as a valid file extension (suffix). This brings it in line with `os.path.splitext()`. In the (private) pathlib ABCs, we add a new `ParserBase.splitext()` method that splits a path into a `(root, ext)` pair, like `os.path.splitext()`. This method is called by `PurePathBase.stem`, `suffix`, etc. In a future version of pathlib, we might make these base classes public, and so users will be able to define their own `splitext()` method to control file extension splitting. In `pathlib.PurePath` we add optimised `stem`, `suffix` and `suffixes` properties that don't use `splitext()`, which avoids computing the path base name twice.
1 parent 0c5ebe1 commit e418fc3

File tree

5 files changed

+101
-35
lines changed

5 files changed

+101
-35
lines changed

Doc/library/pathlib.rst

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -449,6 +449,10 @@ Pure paths provide the following methods and properties:
449449

450450
This is commonly called the file extension.
451451

452+
.. versionchanged:: 3.14
453+
454+
A single dot ("``.``") is considered a valid suffix.
455+
452456
.. attribute:: PurePath.suffixes
453457

454458
A list of the path's suffixes, often called file extensions::
@@ -460,6 +464,10 @@ Pure paths provide the following methods and properties:
460464
>>> PurePosixPath('my/library').suffixes
461465
[]
462466

467+
.. versionchanged:: 3.14
468+
469+
A single dot ("``.``") is considered a valid suffix.
470+
463471

464472
.. attribute:: PurePath.stem
465473

@@ -713,6 +721,11 @@ Pure paths provide the following methods and properties:
713721
>>> p.with_suffix('')
714722
PureWindowsPath('README')
715723

724+
.. versionchanged:: 3.14
725+
726+
A single dot ("``.``") is considered a valid suffix. In previous
727+
versions, :exc:`ValueError` is raised if a single dot is supplied.
728+
716729

717730
.. method:: PurePath.with_segments(*pathsegments)
718731

Lib/pathlib/_abc.py

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,12 @@ def splitdrive(self, path):
6868
drive. Either part may be empty."""
6969
raise UnsupportedOperation(self._unsupported_msg('splitdrive()'))
7070

71+
def splitext(self, path):
72+
"""Split the path into a pair (root, ext), where *ext* is empty or
73+
begins with a begins with a period and contains at most one period,
74+
and *root* is everything before the extension."""
75+
raise UnsupportedOperation(self._unsupported_msg('splitext()'))
76+
7177
def normcase(self, path):
7278
"""Normalize the case of the path."""
7379
raise UnsupportedOperation(self._unsupported_msg('normcase()'))
@@ -151,12 +157,7 @@ def suffix(self):
151157
152158
This includes the leading period. For example: '.txt'
153159
"""
154-
name = self.name
155-
i = name.rfind('.')
156-
if 0 < i < len(name) - 1:
157-
return name[i:]
158-
else:
159-
return ''
160+
return self.parser.splitext(self.name)[1]
160161

161162
@property
162163
def suffixes(self):
@@ -165,21 +166,18 @@ def suffixes(self):
165166
166167
These include the leading periods. For example: ['.tar', '.gz']
167168
"""
168-
name = self.name
169-
if name.endswith('.'):
170-
return []
171-
name = name.lstrip('.')
172-
return ['.' + suffix for suffix in name.split('.')[1:]]
169+
split = self.parser.splitext
170+
stem, suffix = split(self.name)
171+
suffixes = []
172+
while suffix:
173+
suffixes.append(suffix)
174+
stem, suffix = split(stem)
175+
return suffixes[::-1]
173176

174177
@property
175178
def stem(self):
176179
"""The final path component, minus its last suffix."""
177-
name = self.name
178-
i = name.rfind('.')
179-
if 0 < i < len(name) - 1:
180-
return name[:i]
181-
else:
182-
return name
180+
return self.parser.splitext(self.name)[0]
183181

184182
def with_name(self, name):
185183
"""Return a new path with the file name changed."""
@@ -208,7 +206,7 @@ def with_suffix(self, suffix):
208206
if not stem:
209207
# If the stem is empty, we can't make the suffix non-empty.
210208
raise ValueError(f"{self!r} has an empty name")
211-
elif suffix and not (suffix.startswith('.') and len(suffix) > 1):
209+
elif suffix and not suffix.startswith('.'):
212210
raise ValueError(f"Invalid suffix {suffix!r}")
213211
else:
214212
return self.with_name(stem + suffix)

Lib/pathlib/_local.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,40 @@ def with_name(self, name):
361361
tail[-1] = name
362362
return self._from_parsed_parts(self.drive, self.root, tail)
363363

364+
@property
365+
def stem(self):
366+
"""The final path component, minus its last suffix."""
367+
name = self.name
368+
i = name.rfind('.')
369+
if i != -1:
370+
stem = name[:i]
371+
# Stem must contain at least one non-dot character.
372+
if stem.lstrip('.'):
373+
return stem
374+
return name
375+
376+
@property
377+
def suffix(self):
378+
"""
379+
The final component's last suffix, if any.
380+
381+
This includes the leading period. For example: '.txt'
382+
"""
383+
name = self.name.lstrip('.')
384+
i = name.rfind('.')
385+
if i != -1:
386+
return name[i:]
387+
return ''
388+
389+
@property
390+
def suffixes(self):
391+
"""
392+
A list of the final component's suffixes, if any.
393+
394+
These include the leading periods. For example: ['.tar', '.gz']
395+
"""
396+
return ['.' + ext for ext in self.name.lstrip('.').split('.')[1:]]
397+
364398
def relative_to(self, other, *, walk_up=False):
365399
"""Return the relative path to another path identified by the passed
366400
arguments. If the operation is not possible (because this is not

Lib/test/test_pathlib/test_pathlib_abc.py

Lines changed: 33 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ def test_unsupported_operation(self):
5050
self.assertRaises(e, m.join, 'foo')
5151
self.assertRaises(e, m.split, 'foo')
5252
self.assertRaises(e, m.splitdrive, 'foo')
53+
self.assertRaises(e, m.splitext, 'foo')
5354
self.assertRaises(e, m.normcase, 'foo')
5455
self.assertRaises(e, m.isabs, 'foo')
5556

@@ -789,8 +790,12 @@ def test_suffix_common(self):
789790
self.assertEqual(P('/a/.hg.rc').suffix, '.rc')
790791
self.assertEqual(P('a/b.tar.gz').suffix, '.gz')
791792
self.assertEqual(P('/a/b.tar.gz').suffix, '.gz')
792-
self.assertEqual(P('a/Some name. Ending with a dot.').suffix, '')
793-
self.assertEqual(P('/a/Some name. Ending with a dot.').suffix, '')
793+
self.assertEqual(P('a/trailing.dot.').suffix, '.')
794+
self.assertEqual(P('/a/trailing.dot.').suffix, '.')
795+
self.assertEqual(P('a/..d.o.t..').suffix, '.')
796+
self.assertEqual(P('a/inn.er..dots').suffix, '.dots')
797+
self.assertEqual(P('photo').suffix, '')
798+
self.assertEqual(P('photo.jpg').suffix, '.jpg')
794799

795800
@needs_windows
796801
def test_suffix_windows(self):
@@ -807,8 +812,8 @@ def test_suffix_windows(self):
807812
self.assertEqual(P('c:/a/.hg.rc').suffix, '.rc')
808813
self.assertEqual(P('c:a/b.tar.gz').suffix, '.gz')
809814
self.assertEqual(P('c:/a/b.tar.gz').suffix, '.gz')
810-
self.assertEqual(P('c:a/Some name. Ending with a dot.').suffix, '')
811-
self.assertEqual(P('c:/a/Some name. Ending with a dot.').suffix, '')
815+
self.assertEqual(P('c:a/trailing.dot.').suffix, '.')
816+
self.assertEqual(P('c:/a/trailing.dot.').suffix, '.')
812817
self.assertEqual(P('//My.py/Share.php').suffix, '')
813818
self.assertEqual(P('//My.py/Share.php/a/b').suffix, '')
814819

@@ -828,8 +833,12 @@ def test_suffixes_common(self):
828833
self.assertEqual(P('/a/.hg.rc').suffixes, ['.rc'])
829834
self.assertEqual(P('a/b.tar.gz').suffixes, ['.tar', '.gz'])
830835
self.assertEqual(P('/a/b.tar.gz').suffixes, ['.tar', '.gz'])
831-
self.assertEqual(P('a/Some name. Ending with a dot.').suffixes, [])
832-
self.assertEqual(P('/a/Some name. Ending with a dot.').suffixes, [])
836+
self.assertEqual(P('a/trailing.dot.').suffixes, ['.dot', '.'])
837+
self.assertEqual(P('/a/trailing.dot.').suffixes, ['.dot', '.'])
838+
self.assertEqual(P('a/..d.o.t..').suffixes, ['.o', '.t', '.', '.'])
839+
self.assertEqual(P('a/inn.er..dots').suffixes, ['.er', '.', '.dots'])
840+
self.assertEqual(P('photo').suffixes, [])
841+
self.assertEqual(P('photo.jpg').suffixes, ['.jpg'])
833842

834843
@needs_windows
835844
def test_suffixes_windows(self):
@@ -848,8 +857,8 @@ def test_suffixes_windows(self):
848857
self.assertEqual(P('c:/a/b.tar.gz').suffixes, ['.tar', '.gz'])
849858
self.assertEqual(P('//My.py/Share.php').suffixes, [])
850859
self.assertEqual(P('//My.py/Share.php/a/b').suffixes, [])
851-
self.assertEqual(P('c:a/Some name. Ending with a dot.').suffixes, [])
852-
self.assertEqual(P('c:/a/Some name. Ending with a dot.').suffixes, [])
860+
self.assertEqual(P('c:a/trailing.dot.').suffixes, ['.dot', '.'])
861+
self.assertEqual(P('c:/a/trailing.dot.').suffixes, ['.dot', '.'])
853862

854863
def test_stem_empty(self):
855864
P = self.cls
@@ -865,8 +874,11 @@ def test_stem_common(self):
865874
self.assertEqual(P('a/.hgrc').stem, '.hgrc')
866875
self.assertEqual(P('a/.hg.rc').stem, '.hg')
867876
self.assertEqual(P('a/b.tar.gz').stem, 'b.tar')
868-
self.assertEqual(P('a/Some name. Ending with a dot.').stem,
869-
'Some name. Ending with a dot.')
877+
self.assertEqual(P('a/trailing.dot.').stem, 'trailing.dot')
878+
self.assertEqual(P('a/..d.o.t..').stem, '..d.o.t.')
879+
self.assertEqual(P('a/inn.er..dots').stem, 'inn.er.')
880+
self.assertEqual(P('photo').stem, 'photo')
881+
self.assertEqual(P('photo.jpg').stem, 'photo')
870882

871883
@needs_windows
872884
def test_stem_windows(self):
@@ -880,8 +892,8 @@ def test_stem_windows(self):
880892
self.assertEqual(P('c:a/.hgrc').stem, '.hgrc')
881893
self.assertEqual(P('c:a/.hg.rc').stem, '.hg')
882894
self.assertEqual(P('c:a/b.tar.gz').stem, 'b.tar')
883-
self.assertEqual(P('c:a/Some name. Ending with a dot.').stem,
884-
'Some name. Ending with a dot.')
895+
self.assertEqual(P('c:a/trailing.dot.').stem, 'trailing.dot')
896+
885897
def test_with_name_common(self):
886898
P = self.cls
887899
self.assertEqual(P('a/b').with_name('d.xml'), P('a/d.xml'))
@@ -929,16 +941,16 @@ def test_with_stem_common(self):
929941
self.assertEqual(P('a/b.py').with_stem('d'), P('a/d.py'))
930942
self.assertEqual(P('/a/b.py').with_stem('d'), P('/a/d.py'))
931943
self.assertEqual(P('/a/b.tar.gz').with_stem('d'), P('/a/d.gz'))
932-
self.assertEqual(P('a/Dot ending.').with_stem('d'), P('a/d'))
933-
self.assertEqual(P('/a/Dot ending.').with_stem('d'), P('/a/d'))
944+
self.assertEqual(P('a/Dot ending.').with_stem('d'), P('a/d.'))
945+
self.assertEqual(P('/a/Dot ending.').with_stem('d'), P('/a/d.'))
934946

935947
@needs_windows
936948
def test_with_stem_windows(self):
937949
P = self.cls
938950
self.assertEqual(P('c:a/b').with_stem('d'), P('c:a/d'))
939951
self.assertEqual(P('c:/a/b').with_stem('d'), P('c:/a/d'))
940-
self.assertEqual(P('c:a/Dot ending.').with_stem('d'), P('c:a/d'))
941-
self.assertEqual(P('c:/a/Dot ending.').with_stem('d'), P('c:/a/d'))
952+
self.assertEqual(P('c:a/Dot ending.').with_stem('d'), P('c:a/d.'))
953+
self.assertEqual(P('c:/a/Dot ending.').with_stem('d'), P('c:/a/d.'))
942954
self.assertRaises(ValueError, P('c:').with_stem, 'd')
943955
self.assertRaises(ValueError, P('c:/').with_stem, 'd')
944956
self.assertRaises(ValueError, P('//My/Share').with_stem, 'd')
@@ -974,6 +986,11 @@ def test_with_suffix_common(self):
974986
# Stripping suffix.
975987
self.assertEqual(P('a/b.py').with_suffix(''), P('a/b'))
976988
self.assertEqual(P('/a/b').with_suffix(''), P('/a/b'))
989+
# Single dot
990+
self.assertEqual(P('a/b').with_suffix('.'), P('a/b.'))
991+
self.assertEqual(P('/a/b').with_suffix('.'), P('/a/b.'))
992+
self.assertEqual(P('a/b.py').with_suffix('.'), P('a/b.'))
993+
self.assertEqual(P('/a/b.py').with_suffix('.'), P('/a/b.'))
977994

978995
@needs_windows
979996
def test_with_suffix_windows(self):
@@ -1012,7 +1029,6 @@ def test_with_suffix_invalid(self):
10121029
# Invalid suffix.
10131030
self.assertRaises(ValueError, P('a/b').with_suffix, 'gz')
10141031
self.assertRaises(ValueError, P('a/b').with_suffix, '/')
1015-
self.assertRaises(ValueError, P('a/b').with_suffix, '.')
10161032
self.assertRaises(ValueError, P('a/b').with_suffix, '/.gz')
10171033
self.assertRaises(ValueError, P('a/b').with_suffix, 'c/d')
10181034
self.assertRaises(ValueError, P('a/b').with_suffix, '.c/.d')
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Support single-dot file extensions in :attr:`pathlib.PurePath.suffix` and
2+
related attributes and methods. For example, the
3+
:attr:`~pathlib.PurePath.suffixes` of ``PurePath('foo.bar.')`` are now
4+
``['.bar', '.']`` rather than ``[]``. This brings file extension splitting
5+
in line with :func:`os.path.splitext`.

0 commit comments

Comments
 (0)