Skip to content

bpo-25996: Added support of file descriptors in os.scandir() on Unix. #502

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion Doc/library/os.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2029,6 +2029,9 @@ features:
attributes of each :class:`os.DirEntry` will be ``bytes``; in all other
circumstances, they will be of type ``str``.

This function can also support :ref:`specifying a file descriptor
<path_fd>`; the file descriptor must refer to a directory.

The :func:`scandir` iterator supports the :term:`context manager` protocol
and has the following method:

Expand Down Expand Up @@ -2075,6 +2078,9 @@ features:

The function accepts a :term:`path-like object`.

.. versionchanged:: 3.7
Added support for :ref:`file descriptors <path_fd>` on Unix.


.. class:: DirEntry

Expand Down Expand Up @@ -2114,7 +2120,9 @@ features:
The entry's full path name: equivalent to ``os.path.join(scandir_path,
entry.name)`` where *scandir_path* is the :func:`scandir` *path*
argument. The path is only absolute if the :func:`scandir` *path*
argument was absolute.
argument was absolute. If the :func:`scandir` *path*
argument was a :ref:`file descriptor <path_fd>`, the :attr:`path`
attribute is the same as the :attr:`name` attribute.

The :attr:`path` attribute will be ``bytes`` if the :func:`scandir`
*path* argument is of type ``bytes`` and ``str`` otherwise. Use
Expand Down
7 changes: 7 additions & 0 deletions Doc/whatsnew/3.7.rst
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ os
Added support for :class:`bytes` paths in :func:`~os.fwalk`. (Contributed by
Serhiy Storchaka in :issue:`28682`.)

Added support for :ref:`file descriptors <path_fd>` in :func:`~os.scandir`
on Unix. (Contributed by Serhiy Storchaka in :issue:`25996`.)

unittest.mock
-------------

Expand Down Expand Up @@ -143,6 +146,10 @@ Optimizations
:func:`~math.erfc` in the :mod:`math` module. (Contributed by Serhiy
Storchaka in :issue:`26121`.)

* The :func:`os.fwalk` function has been sped up by 2 times. This was done
using the :func:`os.scandir` function.
(Contributed by Serhiy Storchaka in :issue:`25996`.)


Build and C API Changes
=======================
Expand Down
42 changes: 25 additions & 17 deletions Lib/os.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ def _add(str, fn):
_add("HAVE_FCHMOD", "chmod")
_add("HAVE_FCHOWN", "chown")
_add("HAVE_FDOPENDIR", "listdir")
_add("HAVE_FDOPENDIR", "scandir")
_add("HAVE_FEXECVE", "execve")
_set.add(stat) # fstat always works
_add("HAVE_FTRUNCATE", "truncate")
Expand Down Expand Up @@ -416,7 +417,7 @@ def walk(top, topdown=True, onerror=None, followlinks=False):

__all__.append("walk")

if {open, stat} <= supports_dir_fd and {listdir, stat} <= supports_fd:
if {open, stat} <= supports_dir_fd and {scandir, stat} <= supports_fd:

def fwalk(top=".", topdown=True, onerror=None, *, follow_symlinks=False, dir_fd=None):
"""Directory tree generator.
Expand Down Expand Up @@ -455,7 +456,8 @@ def fwalk(top=".", topdown=True, onerror=None, *, follow_symlinks=False, dir_fd=
top = fspath(top)
# Note: To guard against symlink races, we use the standard
# lstat()/open()/fstat() trick.
orig_st = stat(top, follow_symlinks=False, dir_fd=dir_fd)
if not follow_symlinks:
orig_st = stat(top, follow_symlinks=False, dir_fd=dir_fd)
topfd = open(top, O_RDONLY, dir_fd=dir_fd)
try:
if (follow_symlinks or (st.S_ISDIR(orig_st.st_mode) and
Expand All @@ -470,35 +472,41 @@ def _fwalk(topfd, toppath, isbytes, topdown, onerror, follow_symlinks):
# necessary, it can be adapted to only require O(1) FDs, see issue
# #13734.

names = listdir(topfd)
if isbytes:
names = map(fsencode, names)
dirs, nondirs = [], []
for name in names:
scandir_it = scandir(topfd)
dirs = []
nondirs = []
entries = None if topdown or follow_symlinks else []
for entry in scandir_it:
name = entry.name
if isbytes:
name = fsencode(name)
try:
# Here, we don't use AT_SYMLINK_NOFOLLOW to be consistent with
# walk() which reports symlinks to directories as directories.
# We do however check for symlinks before recursing into
# a subdirectory.
if st.S_ISDIR(stat(name, dir_fd=topfd).st_mode):
if entry.is_dir():
dirs.append(name)
if entries is not None:
entries.append(entry)
else:
nondirs.append(name)
except OSError:
try:
# Add dangling symlinks, ignore disappeared files
if st.S_ISLNK(stat(name, dir_fd=topfd, follow_symlinks=False)
.st_mode):
if entry.is_symlink():
nondirs.append(name)
except OSError:
continue
pass

if topdown:
yield toppath, dirs, nondirs, topfd

for name in dirs:
for name in dirs if entries is None else zip(dirs, entries):
try:
orig_st = stat(name, dir_fd=topfd, follow_symlinks=follow_symlinks)
if not follow_symlinks:
if topdown:
orig_st = stat(name, dir_fd=topfd, follow_symlinks=False)
else:
assert entries is not None
name, entry = name
orig_st = entry.stat(follow_symlinks=False)
dirfd = open(name, O_RDONLY, dir_fd=topfd)
except OSError as err:
if onerror is not None:
Expand Down
31 changes: 30 additions & 1 deletion Lib/test/test_os.py
Original file line number Diff line number Diff line change
Expand Up @@ -3313,6 +3313,35 @@ def test_bytes(self):
self.assertEqual(entry.path,
os.fsencode(os.path.join(self.path, 'file.txt')))

@unittest.skipUnless(os.listdir in os.supports_fd,
'fd support for listdir required for this test.')
def test_fd(self):
self.assertIn(os.scandir, os.supports_fd)
self.create_file('file.txt')
expected_names = ['file.txt']
if support.can_symlink():
os.symlink('file.txt', os.path.join(self.path, 'link'))
expected_names.append('link')

fd = os.open(self.path, os.O_RDONLY)
try:
with os.scandir(fd) as it:
entries = list(it)
names = [entry.name for entry in entries]
self.assertEqual(sorted(names), expected_names)
self.assertEqual(names, os.listdir(fd))
for entry in entries:
self.assertEqual(entry.path, entry.name)
self.assertEqual(os.fspath(entry), entry.name)
self.assertEqual(entry.is_symlink(), entry.name == 'link')
if os.stat in os.supports_dir_fd:
st = os.stat(entry.name, dir_fd=fd)
self.assertEqual(entry.stat(), st)
st = os.stat(entry.name, dir_fd=fd, follow_symlinks=False)
self.assertEqual(entry.stat(follow_symlinks=False), st)
finally:
os.close(fd)

def test_empty_path(self):
self.assertRaises(FileNotFoundError, os.scandir, '')

Expand All @@ -3328,7 +3357,7 @@ def test_consume_iterator_twice(self):
self.assertEqual(len(entries2), 0, entries2)

def test_bad_path_type(self):
for obj in [1234, 1.234, {}, []]:
for obj in [1.234, {}, []]:
self.assertRaises(TypeError, os.scandir, obj)

def test_close(self):
Expand Down
3 changes: 3 additions & 0 deletions Misc/NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,9 @@ Extension Modules
Library
-------

- bpo-25996: Added support of file descriptors in os.scandir() on Unix.
os.fwalk() is sped up by 2 times by using os.scandir().

- bpo-28699: Fixed a bug in pools in multiprocessing.pool that raising an
exception at the very first of an iterable may swallow the exception or
make the program hang. Patch by Davin Potts and Xiang Zhang.
Expand Down
4 changes: 2 additions & 2 deletions Modules/clinic/posixmodule.c.h
Original file line number Diff line number Diff line change
Expand Up @@ -5926,7 +5926,7 @@ os_scandir(PyObject *module, PyObject **args, Py_ssize_t nargs, PyObject *kwname
PyObject *return_value = NULL;
static const char * const _keywords[] = {"path", NULL};
static _PyArg_Parser _parser = {"|O&:scandir", _keywords, 0};
path_t path = PATH_T_INITIALIZE("scandir", "path", 1, 0);
path_t path = PATH_T_INITIALIZE("scandir", "path", 1, PATH_HAVE_FDOPENDIR);

if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser,
path_converter, &path)) {
Expand Down Expand Up @@ -6493,4 +6493,4 @@ os_getrandom(PyObject *module, PyObject **args, Py_ssize_t nargs, PyObject *kwna
#ifndef OS_GETRANDOM_METHODDEF
#define OS_GETRANDOM_METHODDEF
#endif /* !defined(OS_GETRANDOM_METHODDEF) */
/*[clinic end generated code: output=5a0be969e3f71660 input=a9049054013a1b77]*/
/*[clinic end generated code: output=5529857101c08b49 input=a9049054013a1b77]*/
105 changes: 83 additions & 22 deletions Modules/posixmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -11161,6 +11161,7 @@ typedef struct {
unsigned char d_type;
#endif
ino_t d_ino;
int dir_fd;
#endif
} DirEntry;

Expand Down Expand Up @@ -11210,19 +11211,31 @@ DirEntry_fetch_stat(DirEntry *self, int follow_symlinks)
PyObject *ub;

#ifdef MS_WINDOWS
if (PyUnicode_FSDecoder(self->path, &ub)) {
const wchar_t *path = PyUnicode_AsUnicode(ub);
if (!PyUnicode_FSDecoder(self->path, &ub))
return NULL;
const wchar_t *path = PyUnicode_AsUnicode(ub);
#else /* POSIX */
if (PyUnicode_FSConverter(self->path, &ub)) {
const char *path = PyBytes_AS_STRING(ub);
if (!PyUnicode_FSConverter(self->path, &ub))
return NULL;
const char *path = PyBytes_AS_STRING(ub);
if (self->dir_fd != DEFAULT_DIR_FD) {
#ifdef HAVE_FSTATAT
result = fstatat(self->dir_fd, path, &st,
follow_symlinks ? 0 : AT_SYMLINK_NOFOLLOW);
#else
PyErr_SetString(PyExc_NotImplementedError, "can't fetch stat");
return NULL;
#endif /* HAVE_FSTATAT */
}
else
#endif
{
if (follow_symlinks)
result = STAT(path, &st);
else
result = LSTAT(path, &st);
Py_DECREF(ub);
} else
return NULL;
}
Py_DECREF(ub);

if (result != 0)
return path_object_error(self->path);
Expand Down Expand Up @@ -11633,20 +11646,36 @@ DirEntry_from_posix_info(path_t *path, const char *name, Py_ssize_t name_len,
entry->stat = NULL;
entry->lstat = NULL;

joined_path = join_path_filename(path->narrow, name, name_len);
if (!joined_path)
goto error;
if (path->fd != -1) {
entry->dir_fd = path->fd;
joined_path = NULL;
}
else {
entry->dir_fd = DEFAULT_DIR_FD;
joined_path = join_path_filename(path->narrow, name, name_len);
if (!joined_path)
goto error;
}

if (!path->narrow || !PyBytes_Check(path->object)) {
entry->name = PyUnicode_DecodeFSDefaultAndSize(name, name_len);
entry->path = PyUnicode_DecodeFSDefault(joined_path);
if (joined_path)
entry->path = PyUnicode_DecodeFSDefault(joined_path);
}
else {
entry->name = PyBytes_FromStringAndSize(name, name_len);
entry->path = PyBytes_FromString(joined_path);
if (joined_path)
entry->path = PyBytes_FromString(joined_path);
}
PyMem_Free(joined_path);
if (!entry->name || !entry->path)
if (!entry->name)
goto error;

if (path->fd != -1) {
entry->path = entry->name;
Py_INCREF(entry->path);
}
else if (!entry->path)
goto error;

#ifdef HAVE_DIRENT_D_TYPE
Expand Down Expand Up @@ -11674,6 +11703,9 @@ typedef struct {
#else /* POSIX */
DIR *dirp;
#endif
#ifdef HAVE_FDOPENDIR
int fd;
#endif
} ScandirIterator;

#ifdef MS_WINDOWS
Expand Down Expand Up @@ -11758,6 +11790,10 @@ ScandirIterator_closedir(ScandirIterator *iterator)

iterator->dirp = NULL;
Py_BEGIN_ALLOW_THREADS
#ifdef HAVE_FDOPENDIR
if (iterator->path.fd != -1)
rewinddir(dirp);
#endif
closedir(dirp);
Py_END_ALLOW_THREADS
return;
Expand Down Expand Up @@ -11933,7 +11969,7 @@ static PyTypeObject ScandirIteratorType = {
/*[clinic input]
os.scandir

path : path_t(nullable=True) = None
path : path_t(nullable=True, allow_fd='PATH_HAVE_FDOPENDIR') = None

Return an iterator of DirEntry objects for given path.

Expand All @@ -11946,13 +11982,16 @@ If path is None, uses the path='.'.

static PyObject *
os_scandir_impl(PyObject *module, path_t *path)
/*[clinic end generated code: output=6eb2668b675ca89e input=e62b08b3cd41f604]*/
/*[clinic end generated code: output=6eb2668b675ca89e input=b139dc1c57f60846]*/
{
ScandirIterator *iterator;
#ifdef MS_WINDOWS
wchar_t *path_strW;
#else
const char *path_str;
#ifdef HAVE_FDOPENDIR
int fd = -1;
#endif
#endif

iterator = PyObject_New(ScandirIterator, &ScandirIteratorType);
Expand Down Expand Up @@ -11988,18 +12027,40 @@ os_scandir_impl(PyObject *module, path_t *path)
goto error;
}
#else /* POSIX */
if (iterator->path.narrow)
path_str = iterator->path.narrow;
errno = 0;
#ifdef HAVE_FDOPENDIR
if (path->fd != -1) {
/* closedir() closes the FD, so we duplicate it */
fd = _Py_dup(path->fd);
if (fd == -1)
goto error;

Py_BEGIN_ALLOW_THREADS
iterator->dirp = fdopendir(fd);
Py_END_ALLOW_THREADS
}
else
path_str = ".";
#endif
{
if (iterator->path.narrow)
path_str = iterator->path.narrow;
else
path_str = ".";

errno = 0;
Py_BEGIN_ALLOW_THREADS
iterator->dirp = opendir(path_str);
Py_END_ALLOW_THREADS
Py_BEGIN_ALLOW_THREADS
iterator->dirp = opendir(path_str);
Py_END_ALLOW_THREADS
}

if (!iterator->dirp) {
path_error(&iterator->path);
#ifdef HAVE_FDOPENDIR
if (fd != -1) {
Py_BEGIN_ALLOW_THREADS
close(fd);
Py_END_ALLOW_THREADS
}
#endif
goto error;
}
#endif
Expand Down