-
-
Notifications
You must be signed in to change notification settings - Fork 32.2k
bpo-33671: efficient zero-copy for shutil.copy* functions (Linux, OSX and Win) #7160
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
1a72c01
77c4bfa
2afa04a
542cd17
3520c6c
050a722
c1fd38a
2ab6317
dacc3b6
29d5881
114c4de
501c0dd
41b4506
fdb0973
64d2bc5
3a3c8ef
7861737
f3eecfd
f67ce57
d457254
8eb211d
a0fe703
7296147
d0c3bba
2cafd80
bb2a75f
e5025dc
a36a534
e9da3fa
9fcc2e7
4f32242
24ad25a
24d20e6
7b6e576
b82ddc9
b62b61e
34e9618
6b20902
abf3ecb
91e492c
e02c69d
73837e2
28be4c1
6c59adf
700629d
077912e
62c6568
a40a755
7ba0085
6c96d97
80fbe6e
fdf4bcb
185f130
c8c98ae
17bb5e6
d8b9bf9
b59ac57
8eefce7
4fc8c6b
3048e3d
11102e1
7545273
3261b74
51c476d
729dd23
1823828
a9d6a07
e3ce917
f81a0ec
3e7475b
05dd3cf
9b54930
2bec11c
c87648f
941f740
4d28c12
2149b8b
6a02a2a
2287508
b9da5d5
c921f46
bb24490
fef8b32
71be453
6035fe2
8dc651e
5d0eada
d67cdc5
f65c8ae
9c4508e
bb1fee6
566898a
f435053
30c9a57
33f362f
e17e729
bc46f75
cabbc02
d22ee08
7a08203
ab284e9
ac9479d
fd77a7e
42a597e
5008a8d
e89dd20
c0dc4b8
29b9730
a1bed32
d9d27a7
17bd78b
b1d4917
5ce94e4
07bcef5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,6 +10,7 @@ | |
import fnmatch | ||
import collections | ||
import errno | ||
import io | ||
|
||
try: | ||
import zlib | ||
|
@@ -42,6 +43,16 @@ | |
except ImportError: | ||
getgrnam = None | ||
|
||
posix = nt = None | ||
if os.name == 'posix': | ||
import posix | ||
elif os.name == 'nt': | ||
import nt | ||
|
||
COPY_BUFSIZE = 1024 * 1024 if os.name == 'nt' else 16 * 1024 | ||
_HAS_SENDFILE = posix and hasattr(os, "sendfile") | ||
_HAS_FCOPYFILE = posix and hasattr(posix, "_fcopyfile") # OSX | ||
|
||
__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2", | ||
"copytree", "move", "rmtree", "Error", "SpecialFileError", | ||
"ExecError", "make_archive", "get_archive_formats", | ||
|
@@ -72,14 +83,124 @@ class RegistryError(Exception): | |
"""Raised when a registry operation with the archiving | ||
and unpacking registries fails""" | ||
|
||
class _GiveupOnFastCopy(Exception): | ||
"""Raised as a signal to fallback on using raw read()/write() | ||
file copy when fast-copy functions fail to do so. | ||
""" | ||
|
||
def _fastcopy_osx(fsrc, fdst, flags): | ||
"""Copy a regular file content or metadata by using high-performance | ||
fcopyfile(3) syscall (OSX). | ||
""" | ||
try: | ||
infd = fsrc.fileno() | ||
outfd = fdst.fileno() | ||
except Exception as err: | ||
raise _GiveupOnFastCopy(err) # not a regular file | ||
|
||
try: | ||
posix._fcopyfile(infd, outfd, flags) | ||
except OSError as err: | ||
err.filename = fsrc.name | ||
err.filename2 = fdst.name | ||
if err.errno in {errno.EINVAL, errno.ENOTSUP}: | ||
raise _GiveupOnFastCopy(err) | ||
else: | ||
raise err from None | ||
|
||
def _fastcopy_sendfile(fsrc, fdst): | ||
"""Copy data from one regular mmap-like fd to another by using | ||
high-performance sendfile(2) syscall. | ||
This should work on Linux >= 2.6.33 and Solaris only. | ||
""" | ||
# Note: copyfileobj() is left alone in order to not introduce any | ||
# unexpected breakage. Possible risks by using zero-copy calls | ||
# in copyfileobj() are: | ||
# - fdst cannot be open in "a"(ppend) mode | ||
# - fsrc and fdst may be open in "t"(ext) mode | ||
# - fsrc may be a BufferedReader (which hides unread data in a buffer), | ||
# GzipFile (which decompresses data), HTTPResponse (which decodes | ||
# chunks). | ||
# - possibly others (e.g. encrypted fs/partition?) | ||
global _HAS_SENDFILE | ||
try: | ||
infd = fsrc.fileno() | ||
outfd = fdst.fileno() | ||
except Exception as err: | ||
raise _GiveupOnFastCopy(err) # not a regular file | ||
|
||
# Hopefully the whole file will be copied in a single call. | ||
# sendfile() is called in a loop 'till EOF is reached (0 return) | ||
# so a bufsize smaller or bigger than the actual file size | ||
# should not make any difference, also in case the file content | ||
# changes while being copied. | ||
try: | ||
blocksize = max(os.fstat(infd).st_size, 2 ** 23) # min 8MB | ||
except Exception: | ||
blocksize = 2 ** 27 # 128MB | ||
|
||
offset = 0 | ||
while True: | ||
try: | ||
sent = os.sendfile(outfd, infd, offset, blocksize) | ||
except OSError as err: | ||
# ...in oder to have a more informative exception. | ||
err.filename = fsrc.name | ||
err.filename2 = fdst.name | ||
|
||
if err.errno == errno.ENOTSOCK: | ||
# sendfile() on this platform (probably Linux < 2.6.33) | ||
# does not support copies between regular files (only | ||
# sockets). | ||
_HAS_SENDFILE = False | ||
raise _GiveupOnFastCopy(err) | ||
|
||
if err.errno == errno.ENOSPC: # filesystem is full | ||
raise err from None | ||
|
||
# Give up on first call and if no data was copied. | ||
if offset == 0 and os.lseek(outfd, 0, os.SEEK_CUR) == 0: | ||
raise _GiveupOnFastCopy(err) | ||
|
||
raise err | ||
else: | ||
if sent == 0: | ||
break # EOF | ||
offset += sent | ||
|
||
def _copybinfileobj(fsrc, fdst, length=COPY_BUFSIZE): | ||
"""Copy 2 regular file objects open in binary mode.""" | ||
# Localize variable access to minimize overhead. | ||
fsrc_readinto = fsrc.readinto | ||
fdst_write = fdst.write | ||
with memoryview(bytearray(length)) as mv: | ||
while True: | ||
n = fsrc_readinto(mv) | ||
if not n: | ||
break | ||
elif n < length: | ||
fdst_write(mv[:n]) | ||
else: | ||
fdst_write(mv) | ||
|
||
def _is_binary_files_pair(fsrc, fdst): | ||
return hasattr(fsrc, 'readinto') and \ | ||
isinstance(fsrc, io.BytesIO) or 'b' in getattr(fsrc, 'mode', '') and \ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Which objects provide There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think catching There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I benchmarked There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know how you're testing, but the performance difference with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is how I'm testing it: $ python -c "import os; f = open('f1', 'wb'); f.write(os.urandom(8 * 1024 * 1024))"
$ time ./python -m timeit -s 'import shutil; p1 = "f1"; p2 = "f2"' 'shutil.copyfile(p1, p2)' There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wrote a batch script to figure out timings on Windows more easily and this is the result (first value is original
I think the On the other hand, the same test on Linux shows there is no relevant difference for 512 MB files and a performance degradation for smaller ones. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This command is wrong:
The files need to be opened for each pass of the loop, not the setup. That explains the unexpected results. I corrected it to open the files in the loop statement instead of the setup and tested a broad range of file sizes. In the table below all times are best of five for the give number of loops, and normalized overall to make the 64KiB result in the RI_S column equal to 100 time units. I discuss the RI_S case in more detail below.
Originally I had tested at 128 MiB with a custom test script to focus on the effects of cached vs non-cached I/O. I assumed the results would be similar for other cases. As shown in the RI_1M column, that's basically true for files larger than 1 megabyte. But there's a significant performance degradation for smaller files. In the RI_S case, I address this by calling RI_S also experiments with calling If I have time, I may run another experiment using Below is the code for RI_S:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Interesting. Thanks for the very detailed benchmark. I updated the other branch which now dynamically sets There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. #7681 was merged |
||
isinstance(fdst, io.BytesIO) or 'b' in getattr(fdst, 'mode', '') | ||
|
||
def copyfileobj(fsrc, fdst, length=16*1024): | ||
def copyfileobj(fsrc, fdst, length=COPY_BUFSIZE): | ||
"""copy data from file-like object fsrc to file-like object fdst""" | ||
while 1: | ||
buf = fsrc.read(length) | ||
if not buf: | ||
break | ||
fdst.write(buf) | ||
if _is_binary_files_pair(fsrc, fdst): | ||
_copybinfileobj(fsrc, fdst, length=length) | ||
else: | ||
# Localize variable access to minimize overhead. | ||
fsrc_read = fsrc.read | ||
fdst_write = fdst.write | ||
while 1: | ||
buf = fsrc_read(length) | ||
if not buf: | ||
break | ||
fdst_write(buf) | ||
|
||
def _samefile(src, dst): | ||
# Macintosh, Unix. | ||
|
@@ -117,9 +238,23 @@ def copyfile(src, dst, *, follow_symlinks=True): | |
if not follow_symlinks and os.path.islink(src): | ||
os.symlink(os.readlink(src), dst) | ||
else: | ||
with open(src, 'rb') as fsrc: | ||
with open(dst, 'wb') as fdst: | ||
copyfileobj(fsrc, fdst) | ||
with open(src, 'rb') as fsrc, open(dst, 'wb') as fdst: | ||
if _HAS_SENDFILE: | ||
try: | ||
_fastcopy_sendfile(fsrc, fdst) | ||
return dst | ||
except _GiveupOnFastCopy: | ||
pass | ||
|
||
if _HAS_FCOPYFILE: | ||
try: | ||
_fastcopy_osx(fsrc, fdst, posix._COPYFILE_DATA) | ||
return dst | ||
except _GiveupOnFastCopy: | ||
pass | ||
|
||
_copybinfileobj(fsrc, fdst) | ||
|
||
return dst | ||
|
||
def copymode(src, dst, *, follow_symlinks=True): | ||
|
@@ -244,13 +379,12 @@ def copy(src, dst, *, follow_symlinks=True): | |
|
||
def copy2(src, dst, *, follow_symlinks=True): | ||
"""Copy data and all stat info ("cp -p src dst"). Return the file's | ||
destination." | ||
destination. | ||
|
||
The destination may be a directory. | ||
|
||
If follow_symlinks is false, symlinks won't be followed. This | ||
resembles GNU's "cp -P src dst". | ||
|
||
""" | ||
if os.path.isdir(dst): | ||
dst = os.path.join(dst, os.path.basename(src)) | ||
|
@@ -1015,7 +1149,6 @@ def disk_usage(path): | |
|
||
elif os.name == 'nt': | ||
|
||
import nt | ||
__all__.append('disk_usage') | ||
_ntuple_diskusage = collections.namedtuple('usage', 'total used free') | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In my test case I used another
with
block to release thismv[:n]
view, rather than depend on implicit deallocation to release it. For example:This
bytearray
is internal, but is there any issue with memory usage in garbage-collected versions of Python (e.g. Jython, IronPython) if the views on the buffer (1 MiB in Windows) aren't released explicitly? If not you can remove the firstwith
block as well.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Uhm... yes, given the big bufsize I think it makes sense to also immediately release the sliced memoryview.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@eryksun If I recall correctly, memory views inadvertently keeping large memory buffers alive on GC based implementations was a key driver in adding context management support to
memoryview
in the first place, so that's definitely a concern worth keeping in mind for this kind of code.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
https://bugs.python.org/issue9757