Skip to content

Commit 8a3f88e

Browse files
committed
Reintroduce memoryview() variant of copyfileobj()
...and use it from copyfile() only if WINDOWS and file size >= 128 MiB
1 parent 06d3e7a commit 8a3f88e

File tree

2 files changed

+53
-6
lines changed

2 files changed

+53
-6
lines changed

Lib/shutil.py

Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,14 @@
4343
except ImportError:
4444
getgrnam = None
4545

46+
_WINDOWS = os.name == 'nt'
4647
posix = nt = None
4748
if os.name == 'posix':
4849
import posix
49-
elif os.name == 'nt':
50+
elif _WINDOWS:
5051
import nt
5152

52-
COPY_BUFSIZE = 1024 * 1024 if os.name == 'nt' else 16 * 1024
53+
COPY_BUFSIZE = 1024 * 1024 if _WINDOWS else 16 * 1024
5354
_HAS_SENDFILE = posix and hasattr(os, "sendfile")
5455
_HAS_FCOPYFILE = posix and hasattr(posix, "_fcopyfile") # macOS
5556

@@ -168,6 +169,25 @@ def _fastcopy_sendfile(fsrc, fdst):
168169
break # EOF
169170
offset += sent
170171

172+
def _copybinfileobj(fsrc, fdst, length=COPY_BUFSIZE):
173+
"""Copy 2 regular file objects open in binary mode.
174+
This is used on Windows only for files >= 128 MiB as it appears to
175+
give a considerable boost.
176+
"""
177+
# Localize variable access to minimize overhead.
178+
fsrc_readinto = fsrc.readinto
179+
fdst_write = fdst.write
180+
with memoryview(bytearray(length)) as mv:
181+
while True:
182+
n = fsrc_readinto(mv)
183+
if not n:
184+
break
185+
elif n < length:
186+
with mv[:n] as smv:
187+
fdst.write(smv)
188+
else:
189+
fdst_write(mv)
190+
171191
def copyfileobj(fsrc, fdst, length=COPY_BUFSIZE):
172192
"""copy data from file-like object fsrc to file-like object fdst"""
173193
# Localize variable access to minimize overhead.
@@ -192,7 +212,7 @@ def _samefile(src, dst):
192212
os.path.normcase(os.path.abspath(dst)))
193213

194214
def copyfile(src, dst, *, follow_symlinks=True):
195-
"""Copy data from src to dst.
215+
"""Copy data from src to dst in the most efficient way possible.
196216
197217
If follow_symlinks is not set and src is a symbolic link, a new
198218
symlink will be created instead of copying the file it points to.
@@ -201,7 +221,8 @@ def copyfile(src, dst, *, follow_symlinks=True):
201221
if _samefile(src, dst):
202222
raise SameFileError("{!r} and {!r} are the same file".format(src, dst))
203223

204-
for fn in [src, dst]:
224+
fsize = 0
225+
for i, fn in enumerate([src, dst]):
205226
try:
206227
st = os.stat(fn)
207228
except OSError:
@@ -211,6 +232,8 @@ def copyfile(src, dst, *, follow_symlinks=True):
211232
# XXX What about other special files? (sockets, devices...)
212233
if stat.S_ISFIFO(st.st_mode):
213234
raise SpecialFileError("`%s` is a named pipe" % fn)
235+
if _WINDOWS and i == 0:
236+
fsize = st.st_size
214237

215238
if not follow_symlinks and os.path.islink(src):
216239
os.symlink(os.readlink(src), dst)
@@ -230,7 +253,14 @@ def copyfile(src, dst, *, follow_symlinks=True):
230253
except _GiveupOnFastCopy:
231254
pass
232255

233-
copyfileobj(fsrc, fdst)
256+
if _WINDOWS and fsize >= 128 * 1024 * 1024:
257+
# Use alternate memoryview() based implementation on Windows
258+
# for files >= 128 MiB. It appears this gives a considerable
259+
# speedup, see:
260+
# https://github.com/python/cpython/pull/7160#discussion_r195162475
261+
_copybinfileobj(fsrc, fdst)
262+
else:
263+
copyfileobj(fsrc, fdst)
234264

235265
return dst
236266

@@ -1124,7 +1154,7 @@ def disk_usage(path):
11241154
used = (st.f_blocks - st.f_bfree) * st.f_frsize
11251155
return _ntuple_diskusage(total, used, free)
11261156

1127-
elif os.name == 'nt':
1157+
elif _WINDOWS:
11281158

11291159
__all__.append('disk_usage')
11301160
_ntuple_diskusage = collections.namedtuple('usage', 'total used free')

Lib/test/test_shutil.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1935,6 +1935,23 @@ def test_file_offset(self):
19351935
self.assertEqual(src.tell(), self.FILESIZE)
19361936
self.assertEqual(dst.tell(), self.FILESIZE)
19371937

1938+
@unittest.skipIf(os.name != 'nt', "Windows only")
1939+
def test_alternate_win_impl(self):
1940+
# On Windows copyfile() uses copyfileobj() for files < 128 MiB,
1941+
# else an alternate memoryview()-based implementation.
1942+
with unittest.mock.patch("shutil._copybinfileobj") as m:
1943+
shutil.copyfile(TESTFN, TESTFN2)
1944+
assert not m.called
1945+
1946+
fname = TESTFN + '-win'
1947+
self.addCleanup(support.unlink, fname)
1948+
write_test_file(fname, 128 * 1024 * 1024)
1949+
with unittest.mock.patch("shutil._copybinfileobj") as m:
1950+
shutil.copyfile(fname, TESTFN2)
1951+
assert m.called
1952+
shutil.copyfile(fname, TESTFN2)
1953+
self.assert_files_eq(fname, TESTFN2)
1954+
19381955

19391956
class _ZeroCopyFileTest(object):
19401957
"""Tests common to all zero-copy APIs."""

0 commit comments

Comments
 (0)