Skip to content

bpo-27827: pathlib: identify a greater range of reserved filename on Windows. #26698

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 21 additions & 11 deletions Lib/pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,16 +124,25 @@ class _WindowsFlavour(_Flavour):
ext_namespace_prefix = '\\\\?\\'

reserved_names = (
{'CON', 'PRN', 'AUX', 'NUL'} |
{'COM%d' % i for i in range(1, 10)} |
{'LPT%d' % i for i in range(1, 10)}
{'CON', 'PRN', 'AUX', 'NUL', 'CONIN$', 'CONOUT$'} |
{'COM%s' % c for c in '123456789\xb9\xb2\xb3'} |
{'LPT%s' % c for c in '123456789\xb9\xb2\xb3'}
)

# Interesting findings about extended paths:
# - '\\?\c:\a', '//?/c:\a' and '//?/c:/a' are all supported
# but '\\?\c:/a' is not
# - extended paths are always absolute; "relative" extended paths will
# fail.
# * '\\?\c:\a' is an extended path, which bypasses normal Windows API
# path processing. Thus relative paths are not resolved and slash is not
# translated to backslash. It has the native NT path limit of 32767
# characters, but a bit less after resolving device symbolic links,
# such as '\??\C:' => '\Device\HarddiskVolume2'.
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do those in-memory symlinks really detract from the limit?
I.e. is a new full path really formed,
or just the remaining part is passed to the FS driver?

# * '\\?\c:/a' looks for a device named 'C:/a' because slash is a
# regular name character in the object namespace.
# * '\\?\c:\foo/bar' is invalid because '/' is illegal in NT filesystems.
# The only path separator at the filesystem level is backslash.
# * '//?/c:\a' and '//?/c:/a' are effectively equivalent to '\\.\c:\a' and
# thus limited to MAX_PATH.
# * Prior to Windows 8, ANSI API bytes paths are limited to MAX_PATH,
# even with the '\\?\' prefix.

def splitroot(self, part, sep=sep):
first = part[0:1]
Expand Down Expand Up @@ -195,15 +204,16 @@ def _split_extended_path(self, s, ext_prefix=ext_namespace_prefix):

def is_reserved(self, parts):
# NOTE: the rules for reserved names seem somewhat complicated
# (e.g. r"..\NUL" is reserved but not r"foo\NUL").
# We err on the side of caution and return True for paths which are
# not considered reserved by Windows.
# (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not
# exist). We err on the side of caution and return True for paths
# which are not considered reserved by Windows.
if not parts:
return False
if parts[0].startswith('\\\\'):
# UNC paths are never reserved
return False
return parts[-1].partition('.')[0].upper() in self.reserved_names
name = parts[-1].partition('.')[0].partition(':')[0].rstrip(' ')
return name.upper() in self.reserved_names

def make_uri(self, path):
# Under Windows, file URIs use the UTF-8 encoding.
Expand Down
32 changes: 24 additions & 8 deletions Lib/test/test_pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -1282,19 +1282,35 @@ def test_is_reserved(self):
self.assertIs(False, P('').is_reserved())
self.assertIs(False, P('/').is_reserved())
self.assertIs(False, P('/foo/bar').is_reserved())
# UNC paths are never reserved.
self.assertIs(False, P('//my/share/nul/con/aux').is_reserved())
# Case-insenstive DOS-device names are reserved.
self.assertIs(True, P('nul').is_reserved())
self.assertIs(True, P('aux').is_reserved())
self.assertIs(True, P('prn').is_reserved())
self.assertIs(True, P('con').is_reserved())
self.assertIs(True, P('NUL').is_reserved())
self.assertIs(True, P('conin$').is_reserved())
self.assertIs(True, P('conout$').is_reserved())
# COM/LPT + 1-9 or + superscript 1-3 are reserved.
self.assertIs(True, P('COM1').is_reserved())
self.assertIs(True, P('LPT9').is_reserved())
self.assertIs(True, P('com\xb9').is_reserved())
self.assertIs(True, P('com\xb2').is_reserved())
self.assertIs(True, P('lpt\xb3').is_reserved())
# DOS-device name mataching ignores characters after a dot or
# a colon and also ignores trailing spaces.
self.assertIs(True, P('NUL.txt').is_reserved())
self.assertIs(True, P('com1').is_reserved())
self.assertIs(True, P('com9.bar').is_reserved())
self.assertIs(True, P('PRN ').is_reserved())
self.assertIs(True, P('AUX .txt').is_reserved())
self.assertIs(True, P('COM1:bar').is_reserved())
self.assertIs(True, P('LPT9 :bar').is_reserved())
# DOS-device names are only matched at the beginning
# of a path component.
self.assertIs(False, P('bar.com9').is_reserved())
self.assertIs(True, P('lpt1').is_reserved())
self.assertIs(True, P('lpt9.bar').is_reserved())
self.assertIs(False, P('bar.lpt9').is_reserved())
# Only the last component matters.
# Only the last path component matters.
self.assertIs(True, P('c:/baz/con/NUL').is_reserved())
self.assertIs(False, P('c:/NUL/con/baz').is_reserved())
# UNC paths are never reserved.
self.assertIs(False, P('//my/share/nul/con/aux').is_reserved())

class PurePathTest(_BasePurePathTest, unittest.TestCase):
cls = pathlib.PurePath
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
:meth:`pathlib.PureWindowsPath.is_reserved` now identifies a greater range of
reserved filenames, including those with trailing spaces or colons.