Skip to content

Commit ea720fe

Browse files
bpo-25996: Added support of file descriptors in os.scandir() on Unix. (#502)
os.fwalk() is sped up by 2 times by using os.scandir().
1 parent 0a58f72 commit ea720fe

File tree

7 files changed

+159
-43
lines changed

7 files changed

+159
-43
lines changed

Doc/library/os.rst

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2029,6 +2029,9 @@ features:
20292029
attributes of each :class:`os.DirEntry` will be ``bytes``; in all other
20302030
circumstances, they will be of type ``str``.
20312031

2032+
This function can also support :ref:`specifying a file descriptor
2033+
<path_fd>`; the file descriptor must refer to a directory.
2034+
20322035
The :func:`scandir` iterator supports the :term:`context manager` protocol
20332036
and has the following method:
20342037

@@ -2075,6 +2078,9 @@ features:
20752078

20762079
The function accepts a :term:`path-like object`.
20772080

2081+
.. versionchanged:: 3.7
2082+
Added support for :ref:`file descriptors <path_fd>` on Unix.
2083+
20782084

20792085
.. class:: DirEntry
20802086

@@ -2114,7 +2120,9 @@ features:
21142120
The entry's full path name: equivalent to ``os.path.join(scandir_path,
21152121
entry.name)`` where *scandir_path* is the :func:`scandir` *path*
21162122
argument. The path is only absolute if the :func:`scandir` *path*
2117-
argument was absolute.
2123+
argument was absolute. If the :func:`scandir` *path*
2124+
argument was a :ref:`file descriptor <path_fd>`, the :attr:`path`
2125+
attribute is the same as the :attr:`name` attribute.
21182126

21192127
The :attr:`path` attribute will be ``bytes`` if the :func:`scandir`
21202128
*path* argument is of type ``bytes`` and ``str`` otherwise. Use

Doc/whatsnew/3.7.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,9 @@ os
108108
Added support for :class:`bytes` paths in :func:`~os.fwalk`. (Contributed by
109109
Serhiy Storchaka in :issue:`28682`.)
110110

111+
Added support for :ref:`file descriptors <path_fd>` in :func:`~os.scandir`
112+
on Unix. (Contributed by Serhiy Storchaka in :issue:`25996`.)
113+
111114
unittest.mock
112115
-------------
113116

@@ -148,6 +151,10 @@ Optimizations
148151
:func:`~math.erfc` in the :mod:`math` module. (Contributed by Serhiy
149152
Storchaka in :issue:`26121`.)
150153

154+
* The :func:`os.fwalk` function has been sped up by 2 times. This was done
155+
using the :func:`os.scandir` function.
156+
(Contributed by Serhiy Storchaka in :issue:`25996`.)
157+
151158

152159
Build and C API Changes
153160
=======================

Lib/os.py

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ def _add(str, fn):
129129
_add("HAVE_FCHMOD", "chmod")
130130
_add("HAVE_FCHOWN", "chown")
131131
_add("HAVE_FDOPENDIR", "listdir")
132+
_add("HAVE_FDOPENDIR", "scandir")
132133
_add("HAVE_FEXECVE", "execve")
133134
_set.add(stat) # fstat always works
134135
_add("HAVE_FTRUNCATE", "truncate")
@@ -416,7 +417,7 @@ def walk(top, topdown=True, onerror=None, followlinks=False):
416417

417418
__all__.append("walk")
418419

419-
if {open, stat} <= supports_dir_fd and {listdir, stat} <= supports_fd:
420+
if {open, stat} <= supports_dir_fd and {scandir, stat} <= supports_fd:
420421

421422
def fwalk(top=".", topdown=True, onerror=None, *, follow_symlinks=False, dir_fd=None):
422423
"""Directory tree generator.
@@ -455,7 +456,8 @@ def fwalk(top=".", topdown=True, onerror=None, *, follow_symlinks=False, dir_fd=
455456
top = fspath(top)
456457
# Note: To guard against symlink races, we use the standard
457458
# lstat()/open()/fstat() trick.
458-
orig_st = stat(top, follow_symlinks=False, dir_fd=dir_fd)
459+
if not follow_symlinks:
460+
orig_st = stat(top, follow_symlinks=False, dir_fd=dir_fd)
459461
topfd = open(top, O_RDONLY, dir_fd=dir_fd)
460462
try:
461463
if (follow_symlinks or (st.S_ISDIR(orig_st.st_mode) and
@@ -470,35 +472,41 @@ def _fwalk(topfd, toppath, isbytes, topdown, onerror, follow_symlinks):
470472
# necessary, it can be adapted to only require O(1) FDs, see issue
471473
# #13734.
472474

473-
names = listdir(topfd)
474-
if isbytes:
475-
names = map(fsencode, names)
476-
dirs, nondirs = [], []
477-
for name in names:
475+
scandir_it = scandir(topfd)
476+
dirs = []
477+
nondirs = []
478+
entries = None if topdown or follow_symlinks else []
479+
for entry in scandir_it:
480+
name = entry.name
481+
if isbytes:
482+
name = fsencode(name)
478483
try:
479-
# Here, we don't use AT_SYMLINK_NOFOLLOW to be consistent with
480-
# walk() which reports symlinks to directories as directories.
481-
# We do however check for symlinks before recursing into
482-
# a subdirectory.
483-
if st.S_ISDIR(stat(name, dir_fd=topfd).st_mode):
484+
if entry.is_dir():
484485
dirs.append(name)
486+
if entries is not None:
487+
entries.append(entry)
485488
else:
486489
nondirs.append(name)
487490
except OSError:
488491
try:
489492
# Add dangling symlinks, ignore disappeared files
490-
if st.S_ISLNK(stat(name, dir_fd=topfd, follow_symlinks=False)
491-
.st_mode):
493+
if entry.is_symlink():
492494
nondirs.append(name)
493495
except OSError:
494-
continue
496+
pass
495497

496498
if topdown:
497499
yield toppath, dirs, nondirs, topfd
498500

499-
for name in dirs:
501+
for name in dirs if entries is None else zip(dirs, entries):
500502
try:
501-
orig_st = stat(name, dir_fd=topfd, follow_symlinks=follow_symlinks)
503+
if not follow_symlinks:
504+
if topdown:
505+
orig_st = stat(name, dir_fd=topfd, follow_symlinks=False)
506+
else:
507+
assert entries is not None
508+
name, entry = name
509+
orig_st = entry.stat(follow_symlinks=False)
502510
dirfd = open(name, O_RDONLY, dir_fd=topfd)
503511
except OSError as err:
504512
if onerror is not None:

Lib/test/test_os.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3313,6 +3313,35 @@ def test_bytes(self):
33133313
self.assertEqual(entry.path,
33143314
os.fsencode(os.path.join(self.path, 'file.txt')))
33153315

3316+
@unittest.skipUnless(os.listdir in os.supports_fd,
3317+
'fd support for listdir required for this test.')
3318+
def test_fd(self):
3319+
self.assertIn(os.scandir, os.supports_fd)
3320+
self.create_file('file.txt')
3321+
expected_names = ['file.txt']
3322+
if support.can_symlink():
3323+
os.symlink('file.txt', os.path.join(self.path, 'link'))
3324+
expected_names.append('link')
3325+
3326+
fd = os.open(self.path, os.O_RDONLY)
3327+
try:
3328+
with os.scandir(fd) as it:
3329+
entries = list(it)
3330+
names = [entry.name for entry in entries]
3331+
self.assertEqual(sorted(names), expected_names)
3332+
self.assertEqual(names, os.listdir(fd))
3333+
for entry in entries:
3334+
self.assertEqual(entry.path, entry.name)
3335+
self.assertEqual(os.fspath(entry), entry.name)
3336+
self.assertEqual(entry.is_symlink(), entry.name == 'link')
3337+
if os.stat in os.supports_dir_fd:
3338+
st = os.stat(entry.name, dir_fd=fd)
3339+
self.assertEqual(entry.stat(), st)
3340+
st = os.stat(entry.name, dir_fd=fd, follow_symlinks=False)
3341+
self.assertEqual(entry.stat(follow_symlinks=False), st)
3342+
finally:
3343+
os.close(fd)
3344+
33163345
def test_empty_path(self):
33173346
self.assertRaises(FileNotFoundError, os.scandir, '')
33183347

@@ -3328,7 +3357,7 @@ def test_consume_iterator_twice(self):
33283357
self.assertEqual(len(entries2), 0, entries2)
33293358

33303359
def test_bad_path_type(self):
3331-
for obj in [1234, 1.234, {}, []]:
3360+
for obj in [1.234, {}, []]:
33323361
self.assertRaises(TypeError, os.scandir, obj)
33333362

33343363
def test_close(self):

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,9 @@ Extension Modules
294294
Library
295295
-------
296296

297+
- bpo-25996: Added support of file descriptors in os.scandir() on Unix.
298+
os.fwalk() is sped up by 2 times by using os.scandir().
299+
297300
- bpo-28699: Fixed a bug in pools in multiprocessing.pool that raising an
298301
exception at the very first of an iterable may swallow the exception or
299302
make the program hang. Patch by Davin Potts and Xiang Zhang.

Modules/clinic/posixmodule.c.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5926,7 +5926,7 @@ os_scandir(PyObject *module, PyObject **args, Py_ssize_t nargs, PyObject *kwname
59265926
PyObject *return_value = NULL;
59275927
static const char * const _keywords[] = {"path", NULL};
59285928
static _PyArg_Parser _parser = {"|O&:scandir", _keywords, 0};
5929-
path_t path = PATH_T_INITIALIZE("scandir", "path", 1, 0);
5929+
path_t path = PATH_T_INITIALIZE("scandir", "path", 1, PATH_HAVE_FDOPENDIR);
59305930

59315931
if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser,
59325932
path_converter, &path)) {
@@ -6493,4 +6493,4 @@ os_getrandom(PyObject *module, PyObject **args, Py_ssize_t nargs, PyObject *kwna
64936493
#ifndef OS_GETRANDOM_METHODDEF
64946494
#define OS_GETRANDOM_METHODDEF
64956495
#endif /* !defined(OS_GETRANDOM_METHODDEF) */
6496-
/*[clinic end generated code: output=5a0be969e3f71660 input=a9049054013a1b77]*/
6496+
/*[clinic end generated code: output=5529857101c08b49 input=a9049054013a1b77]*/

Modules/posixmodule.c

Lines changed: 83 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -11161,6 +11161,7 @@ typedef struct {
1116111161
unsigned char d_type;
1116211162
#endif
1116311163
ino_t d_ino;
11164+
int dir_fd;
1116411165
#endif
1116511166
} DirEntry;
1116611167

@@ -11210,19 +11211,31 @@ DirEntry_fetch_stat(DirEntry *self, int follow_symlinks)
1121011211
PyObject *ub;
1121111212

1121211213
#ifdef MS_WINDOWS
11213-
if (PyUnicode_FSDecoder(self->path, &ub)) {
11214-
const wchar_t *path = PyUnicode_AsUnicode(ub);
11214+
if (!PyUnicode_FSDecoder(self->path, &ub))
11215+
return NULL;
11216+
const wchar_t *path = PyUnicode_AsUnicode(ub);
1121511217
#else /* POSIX */
11216-
if (PyUnicode_FSConverter(self->path, &ub)) {
11217-
const char *path = PyBytes_AS_STRING(ub);
11218+
if (!PyUnicode_FSConverter(self->path, &ub))
11219+
return NULL;
11220+
const char *path = PyBytes_AS_STRING(ub);
11221+
if (self->dir_fd != DEFAULT_DIR_FD) {
11222+
#ifdef HAVE_FSTATAT
11223+
result = fstatat(self->dir_fd, path, &st,
11224+
follow_symlinks ? 0 : AT_SYMLINK_NOFOLLOW);
11225+
#else
11226+
PyErr_SetString(PyExc_NotImplementedError, "can't fetch stat");
11227+
return NULL;
11228+
#endif /* HAVE_FSTATAT */
11229+
}
11230+
else
1121811231
#endif
11232+
{
1121911233
if (follow_symlinks)
1122011234
result = STAT(path, &st);
1122111235
else
1122211236
result = LSTAT(path, &st);
11223-
Py_DECREF(ub);
11224-
} else
11225-
return NULL;
11237+
}
11238+
Py_DECREF(ub);
1122611239

1122711240
if (result != 0)
1122811241
return path_object_error(self->path);
@@ -11633,20 +11646,36 @@ DirEntry_from_posix_info(path_t *path, const char *name, Py_ssize_t name_len,
1163311646
entry->stat = NULL;
1163411647
entry->lstat = NULL;
1163511648

11636-
joined_path = join_path_filename(path->narrow, name, name_len);
11637-
if (!joined_path)
11638-
goto error;
11649+
if (path->fd != -1) {
11650+
entry->dir_fd = path->fd;
11651+
joined_path = NULL;
11652+
}
11653+
else {
11654+
entry->dir_fd = DEFAULT_DIR_FD;
11655+
joined_path = join_path_filename(path->narrow, name, name_len);
11656+
if (!joined_path)
11657+
goto error;
11658+
}
1163911659

1164011660
if (!path->narrow || !PyBytes_Check(path->object)) {
1164111661
entry->name = PyUnicode_DecodeFSDefaultAndSize(name, name_len);
11642-
entry->path = PyUnicode_DecodeFSDefault(joined_path);
11662+
if (joined_path)
11663+
entry->path = PyUnicode_DecodeFSDefault(joined_path);
1164311664
}
1164411665
else {
1164511666
entry->name = PyBytes_FromStringAndSize(name, name_len);
11646-
entry->path = PyBytes_FromString(joined_path);
11667+
if (joined_path)
11668+
entry->path = PyBytes_FromString(joined_path);
1164711669
}
1164811670
PyMem_Free(joined_path);
11649-
if (!entry->name || !entry->path)
11671+
if (!entry->name)
11672+
goto error;
11673+
11674+
if (path->fd != -1) {
11675+
entry->path = entry->name;
11676+
Py_INCREF(entry->path);
11677+
}
11678+
else if (!entry->path)
1165011679
goto error;
1165111680

1165211681
#ifdef HAVE_DIRENT_D_TYPE
@@ -11674,6 +11703,9 @@ typedef struct {
1167411703
#else /* POSIX */
1167511704
DIR *dirp;
1167611705
#endif
11706+
#ifdef HAVE_FDOPENDIR
11707+
int fd;
11708+
#endif
1167711709
} ScandirIterator;
1167811710

1167911711
#ifdef MS_WINDOWS
@@ -11758,6 +11790,10 @@ ScandirIterator_closedir(ScandirIterator *iterator)
1175811790

1175911791
iterator->dirp = NULL;
1176011792
Py_BEGIN_ALLOW_THREADS
11793+
#ifdef HAVE_FDOPENDIR
11794+
if (iterator->path.fd != -1)
11795+
rewinddir(dirp);
11796+
#endif
1176111797
closedir(dirp);
1176211798
Py_END_ALLOW_THREADS
1176311799
return;
@@ -11933,7 +11969,7 @@ static PyTypeObject ScandirIteratorType = {
1193311969
/*[clinic input]
1193411970
os.scandir
1193511971
11936-
path : path_t(nullable=True) = None
11972+
path : path_t(nullable=True, allow_fd='PATH_HAVE_FDOPENDIR') = None
1193711973
1193811974
Return an iterator of DirEntry objects for given path.
1193911975
@@ -11946,13 +11982,16 @@ If path is None, uses the path='.'.
1194611982

1194711983
static PyObject *
1194811984
os_scandir_impl(PyObject *module, path_t *path)
11949-
/*[clinic end generated code: output=6eb2668b675ca89e input=e62b08b3cd41f604]*/
11985+
/*[clinic end generated code: output=6eb2668b675ca89e input=b139dc1c57f60846]*/
1195011986
{
1195111987
ScandirIterator *iterator;
1195211988
#ifdef MS_WINDOWS
1195311989
wchar_t *path_strW;
1195411990
#else
1195511991
const char *path_str;
11992+
#ifdef HAVE_FDOPENDIR
11993+
int fd = -1;
11994+
#endif
1195611995
#endif
1195711996

1195811997
iterator = PyObject_New(ScandirIterator, &ScandirIteratorType);
@@ -11988,18 +12027,40 @@ os_scandir_impl(PyObject *module, path_t *path)
1198812027
goto error;
1198912028
}
1199012029
#else /* POSIX */
11991-
if (iterator->path.narrow)
11992-
path_str = iterator->path.narrow;
12030+
errno = 0;
12031+
#ifdef HAVE_FDOPENDIR
12032+
if (path->fd != -1) {
12033+
/* closedir() closes the FD, so we duplicate it */
12034+
fd = _Py_dup(path->fd);
12035+
if (fd == -1)
12036+
goto error;
12037+
12038+
Py_BEGIN_ALLOW_THREADS
12039+
iterator->dirp = fdopendir(fd);
12040+
Py_END_ALLOW_THREADS
12041+
}
1199312042
else
11994-
path_str = ".";
12043+
#endif
12044+
{
12045+
if (iterator->path.narrow)
12046+
path_str = iterator->path.narrow;
12047+
else
12048+
path_str = ".";
1199512049

11996-
errno = 0;
11997-
Py_BEGIN_ALLOW_THREADS
11998-
iterator->dirp = opendir(path_str);
11999-
Py_END_ALLOW_THREADS
12050+
Py_BEGIN_ALLOW_THREADS
12051+
iterator->dirp = opendir(path_str);
12052+
Py_END_ALLOW_THREADS
12053+
}
1200012054

1200112055
if (!iterator->dirp) {
1200212056
path_error(&iterator->path);
12057+
#ifdef HAVE_FDOPENDIR
12058+
if (fd != -1) {
12059+
Py_BEGIN_ALLOW_THREADS
12060+
close(fd);
12061+
Py_END_ALLOW_THREADS
12062+
}
12063+
#endif
1200312064
goto error;
1200412065
}
1200512066
#endif

0 commit comments

Comments
 (0)