Skip to content

Commit 93ebc1f

Browse files
committed
Use dynamic memoryview() size based on file size
As per #7160 (comment) it appears using memoryview() on Windows is faster also for small files if memoryview() length is equal to file size.
1 parent be40ae1 commit 93ebc1f

File tree

5 files changed

+43
-43
lines changed

5 files changed

+43
-43
lines changed

Doc/library/shutil.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -413,8 +413,8 @@ On Linux, Solaris and other POSIX platforms where :func:`os.sendfile` supports
413413
copies between 2 regular file descriptors :func:`os.sendfile` is used.
414414

415415
On Windows :func:`shutil.copyfile` uses a bigger default buffer size (1 MiB
416-
instead of 16 KiB) and if file size >= 128 MiB a :func:`memoryview`-based
417-
variant of :func:`shutil.copyfileobj` is used.
416+
instead of 16 KiB) and a :func:`memoryview`-based variant of
417+
:func:`shutil.copyfileobj` is used.
418418

419419
If the fast-copy operation fails and no data was written in the destination
420420
file then shutil will silently fallback on using less efficient

Doc/whatsnew/3.8.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,9 +100,9 @@ Optimizations
100100
"fast-copy" means that the copying operation occurs within the kernel,
101101
avoiding the use of userspace buffers in Python as in
102102
"``outfd.write(infd.read())``".
103-
On Windows :func:`shutil.copyfile` default buffer size was increased
104-
from 16 KiB to 1 MiB and a faster implementation of :func:`shutil.copyfileobj`
105-
is used for files bigger than 128 MiB.
103+
On Windows :func:`shutil.copyfile` uses a bigger default buffer size (1 MiB
104+
instead of 16 KiB) and a :func:`memoryview`-based variant of
105+
:func:`shutil.copyfileobj` is used.
106106
The speedup for copying a 512 MiB file within the same partition is about
107107
+26% on Linux, +50% on macOS and +40% on Windows. Also, much less CPU cycles
108108
are consumed.

Lib/shutil.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -169,8 +169,11 @@ def _fastcopy_sendfile(fsrc, fdst):
169169
break # EOF
170170
offset += sent
171171

172-
def _copybinfileobj(fsrc, fdst, length=COPY_BUFSIZE):
173-
"""readinto()/memoryview() based variant of copyfileobj()."""
172+
def _copyfileobj_readinto(fsrc, fdst, length=COPY_BUFSIZE):
173+
"""readinto()/memoryview() based variant of copyfileobj().
174+
*fsrc* must support readinto() method and both files must be
175+
open in binary mode.
176+
"""
174177
# Localize variable access to minimize overhead.
175178
fsrc_readinto = fsrc.readinto
176179
fdst_write = fdst.write
@@ -250,12 +253,10 @@ def copyfile(src, dst, *, follow_symlinks=True):
250253
return dst
251254
except _GiveupOnFastCopy:
252255
pass
253-
# Windows: for files >= 128 MiB in size we observe a
254-
# considerable speedup by using a readinto()/memoryview()
255-
# variant of copyfileobj(), see:
256-
# https://github.com/python/cpython/pull/7160#discussion_r195162475
257-
elif _WINDOWS and file_size >= 128 * 1024 * 1024:
258-
_copybinfileobj(fsrc, fdst)
256+
# Windows, see:
257+
# https://github.com/python/cpython/pull/7160#discussion_r195405230
258+
elif _WINDOWS and file_size > 0:
259+
_copyfileobj_readinto(fsrc, fdst, min(file_size, COPY_BUFSIZE))
259260
return dst
260261

261262
copyfileobj(fsrc, fdst)

Lib/test/test_shutil.py

Lines changed: 27 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1936,35 +1936,34 @@ def test_file_offset(self):
19361936
self.assertEqual(dst.tell(), self.FILESIZE)
19371937

19381938
@unittest.skipIf(os.name != 'nt', "Windows only")
1939-
def test_alternate_win_impl(self):
1940-
# On Windows copyfile() uses copyfileobj() for files < 128 MiB,
1941-
# else an alternate memoryview()-based implementation.
1942-
def os_stat_mocked(path):
1943-
# Make shutil believe src file is 128 MiB.
1944-
if path == TESTFN:
1945-
mock = unittest.mock.Mock()
1946-
mock.st_size = 128 * 1024 * 1024
1947-
mock.st_mode = os.lstat(path).st_mode
1948-
return mock
1949-
else:
1950-
return os.lstat(path)
1951-
1952-
# Make sure it's not called.
1953-
with unittest.mock.patch("shutil._copybinfileobj") as m:
1939+
def test_win_impl(self):
1940+
# Make sure alternate Windows implementation is called.
1941+
with unittest.mock.patch("shutil._copyfileobj_readinto") as m:
19541942
shutil.copyfile(TESTFN, TESTFN2)
1955-
assert not m.called
1956-
# Make sure it's called.
1957-
with unittest.mock.patch('shutil.os.stat', create=True,
1958-
side_effect=os_stat_mocked) as m1:
1959-
with unittest.mock.patch("shutil._copybinfileobj") as m2:
1960-
shutil.copyfile(TESTFN, TESTFN2)
1961-
assert m1.called
1962-
assert m2.called
1963-
# Test it.
1964-
with unittest.mock.patch('shutil.os.stat', create=True,
1965-
side_effect=os_stat_mocked) as m1:
1966-
shutil.copyfile(TESTFN, TESTFN2)
1967-
self.assert_files_eq(TESTFN, TESTFN2)
1943+
assert m.called
1944+
1945+
# File size is 2 MiB but max buf size should be 1 MiB.
1946+
self.assertEqual(m.call_args[0][2], 1 * 1024 * 1024)
1947+
1948+
# If file size < 1 MiB memoryview() length must be equal to
1949+
# the actual file size.
1950+
with tempfile.NamedTemporaryFile(delete=False) as f:
1951+
f.write(b'foo')
1952+
fname = f.name
1953+
self.addCleanup(support.unlink, fname)
1954+
with unittest.mock.patch("shutil._copyfileobj_readinto") as m:
1955+
shutil.copyfile(fname, TESTFN2)
1956+
self.assertEqual(m.call_args[0][2], 3)
1957+
1958+
# Empty files should not rely on readinto() variant.
1959+
with tempfile.NamedTemporaryFile(delete=False) as f:
1960+
pass
1961+
fname = f.name
1962+
self.addCleanup(support.unlink, fname)
1963+
with unittest.mock.patch("shutil._copyfileobj_readinto") as m:
1964+
shutil.copyfile(fname, TESTFN2)
1965+
assert not m.called
1966+
self.assert_files_eq(fname, TESTFN2)
19681967

19691968

19701969
class _ZeroCopyFileTest(object):

Misc/NEWS.d/next/Library/2018-05-28-23-25-17.bpo-33671.GIdKKi.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
fast-copy syscalls on Linux, Solaris and macOS in order to copy the file
44
more efficiently.
55
On Windows :func:`shutil.copyfile` uses a bigger default buffer size (1 MiB
6-
instead of 16 KiB) and if file size >= 128 MiB a :func:`memoryview`-based
7-
variant of :func:`shutil.copyfileobj` is used.
6+
instead of 16 KiB) and a :func:`memoryview`-based variant of
7+
:func:`shutil.copyfileobj` is used.
88
The speedup for copying a 512MiB file is about +26% on Linux, +50% on macOS and
99
+40% on Windows. Also, much less CPU cycles are consumed.
1010
(Contributed by Giampaolo Rodola' in :issue:`25427`.)

0 commit comments

Comments
 (0)