|
10 | 10 | import fnmatch
|
11 | 11 | import collections
|
12 | 12 | import errno
|
| 13 | +import io |
13 | 14 |
|
14 | 15 | try:
|
15 | 16 | import zlib
|
|
42 | 43 | except ImportError:
|
43 | 44 | getgrnam = None
|
44 | 45 |
|
| 46 | +posix = nt = None |
| 47 | +if os.name == 'posix': |
| 48 | + import posix |
| 49 | +elif os.name == 'nt': |
| 50 | + import nt |
| 51 | + |
| 52 | +COPY_BUFSIZE = 1024 * 1024 if os.name == 'nt' else 16 * 1024 |
| 53 | +_HAS_SENDFILE = posix and hasattr(os, "sendfile") |
| 54 | +_HAS_FCOPYFILE = posix and hasattr(posix, "_fcopyfile") # OSX |
| 55 | + |
45 | 56 | __all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
|
46 | 57 | "copytree", "move", "rmtree", "Error", "SpecialFileError",
|
47 | 58 | "ExecError", "make_archive", "get_archive_formats",
|
@@ -72,14 +83,124 @@ class RegistryError(Exception):
|
72 | 83 | """Raised when a registry operation with the archiving
|
73 | 84 | and unpacking registries fails"""
|
74 | 85 |
|
| 86 | +class _GiveupOnFastCopy(Exception): |
| 87 | + """Raised as a signal to fallback on using raw read()/write() |
| 88 | + file copy when fast-copy functions fail to do so. |
| 89 | + """ |
| 90 | + |
| 91 | +def _fastcopy_osx(fsrc, fdst, flags): |
| 92 | + """Copy a regular file content or metadata by using high-performance |
| 93 | + fcopyfile(3) syscall (OSX). |
| 94 | + """ |
| 95 | + try: |
| 96 | + infd = fsrc.fileno() |
| 97 | + outfd = fdst.fileno() |
| 98 | + except Exception as err: |
| 99 | + raise _GiveupOnFastCopy(err) # not a regular file |
| 100 | + |
| 101 | + try: |
| 102 | + posix._fcopyfile(infd, outfd, flags) |
| 103 | + except OSError as err: |
| 104 | + err.filename = fsrc.name |
| 105 | + err.filename2 = fdst.name |
| 106 | + if err.errno in {errno.EINVAL, errno.ENOTSUP}: |
| 107 | + raise _GiveupOnFastCopy(err) |
| 108 | + else: |
| 109 | + raise err from None |
| 110 | + |
| 111 | +def _fastcopy_sendfile(fsrc, fdst): |
| 112 | + """Copy data from one regular mmap-like fd to another by using |
| 113 | + high-performance sendfile(2) syscall. |
| 114 | + This should work on Linux >= 2.6.33 and Solaris only. |
| 115 | + """ |
| 116 | + # Note: copyfileobj() is left alone in order to not introduce any |
| 117 | + # unexpected breakage. Possible risks by using zero-copy calls |
| 118 | + # in copyfileobj() are: |
| 119 | + # - fdst cannot be open in "a"(ppend) mode |
| 120 | + # - fsrc and fdst may be open in "t"(ext) mode |
| 121 | + # - fsrc may be a BufferedReader (which hides unread data in a buffer), |
| 122 | + # GzipFile (which decompresses data), HTTPResponse (which decodes |
| 123 | + # chunks). |
| 124 | + # - possibly others (e.g. encrypted fs/partition?) |
| 125 | + global _HAS_SENDFILE |
| 126 | + try: |
| 127 | + infd = fsrc.fileno() |
| 128 | + outfd = fdst.fileno() |
| 129 | + except Exception as err: |
| 130 | + raise _GiveupOnFastCopy(err) # not a regular file |
| 131 | + |
| 132 | + # Hopefully the whole file will be copied in a single call. |
| 133 | + # sendfile() is called in a loop 'till EOF is reached (0 return) |
| 134 | + # so a bufsize smaller or bigger than the actual file size |
| 135 | + # should not make any difference, also in case the file content |
| 136 | + # changes while being copied. |
| 137 | + try: |
| 138 | + blocksize = max(os.fstat(infd).st_size, 2 ** 23) # min 8MB |
| 139 | + except Exception: |
| 140 | + blocksize = 2 ** 27 # 128MB |
| 141 | + |
| 142 | + offset = 0 |
| 143 | + while True: |
| 144 | + try: |
| 145 | + sent = os.sendfile(outfd, infd, offset, blocksize) |
| 146 | + except OSError as err: |
| 147 | + # ...in oder to have a more informative exception. |
| 148 | + err.filename = fsrc.name |
| 149 | + err.filename2 = fdst.name |
| 150 | + |
| 151 | + if err.errno == errno.ENOTSOCK: |
| 152 | + # sendfile() on this platform (probably Linux < 2.6.33) |
| 153 | + # does not support copies between regular files (only |
| 154 | + # sockets). |
| 155 | + _HAS_SENDFILE = False |
| 156 | + raise _GiveupOnFastCopy(err) |
| 157 | + |
| 158 | + if err.errno == errno.ENOSPC: # filesystem is full |
| 159 | + raise err from None |
| 160 | + |
| 161 | + # Give up on first call and if no data was copied. |
| 162 | + if offset == 0 and os.lseek(outfd, 0, os.SEEK_CUR) == 0: |
| 163 | + raise _GiveupOnFastCopy(err) |
| 164 | + |
| 165 | + raise err |
| 166 | + else: |
| 167 | + if sent == 0: |
| 168 | + break # EOF |
| 169 | + offset += sent |
| 170 | + |
| 171 | +def _copybinfileobj(fsrc, fdst, length=COPY_BUFSIZE): |
| 172 | + """Copy 2 regular file objects open in binary mode.""" |
| 173 | + # Localize variable access to minimize overhead. |
| 174 | + fsrc_readinto = fsrc.readinto |
| 175 | + fdst_write = fdst.write |
| 176 | + with memoryview(bytearray(length)) as mv: |
| 177 | + while True: |
| 178 | + n = fsrc_readinto(mv) |
| 179 | + if not n: |
| 180 | + break |
| 181 | + elif n < length: |
| 182 | + fdst_write(mv[:n]) |
| 183 | + else: |
| 184 | + fdst_write(mv) |
| 185 | + |
| 186 | +def _is_binary_files_pair(fsrc, fdst): |
| 187 | + return hasattr(fsrc, 'readinto') and \ |
| 188 | + isinstance(fsrc, io.BytesIO) or 'b' in getattr(fsrc, 'mode', '') and \ |
| 189 | + isinstance(fdst, io.BytesIO) or 'b' in getattr(fdst, 'mode', '') |
75 | 190 |
|
76 |
| -def copyfileobj(fsrc, fdst, length=16*1024): |
| 191 | +def copyfileobj(fsrc, fdst, length=COPY_BUFSIZE): |
77 | 192 | """copy data from file-like object fsrc to file-like object fdst"""
|
78 |
| - while 1: |
79 |
| - buf = fsrc.read(length) |
80 |
| - if not buf: |
81 |
| - break |
82 |
| - fdst.write(buf) |
| 193 | + if _is_binary_files_pair(fsrc, fdst): |
| 194 | + _copybinfileobj(fsrc, fdst, length=length) |
| 195 | + else: |
| 196 | + # Localize variable access to minimize overhead. |
| 197 | + fsrc_read = fsrc.read |
| 198 | + fdst_write = fdst.write |
| 199 | + while 1: |
| 200 | + buf = fsrc_read(length) |
| 201 | + if not buf: |
| 202 | + break |
| 203 | + fdst_write(buf) |
83 | 204 |
|
84 | 205 | def _samefile(src, dst):
|
85 | 206 | # Macintosh, Unix.
|
@@ -117,9 +238,23 @@ def copyfile(src, dst, *, follow_symlinks=True):
|
117 | 238 | if not follow_symlinks and os.path.islink(src):
|
118 | 239 | os.symlink(os.readlink(src), dst)
|
119 | 240 | else:
|
120 |
| - with open(src, 'rb') as fsrc: |
121 |
| - with open(dst, 'wb') as fdst: |
122 |
| - copyfileobj(fsrc, fdst) |
| 241 | + with open(src, 'rb') as fsrc, open(dst, 'wb') as fdst: |
| 242 | + if _HAS_SENDFILE: |
| 243 | + try: |
| 244 | + _fastcopy_sendfile(fsrc, fdst) |
| 245 | + return dst |
| 246 | + except _GiveupOnFastCopy: |
| 247 | + pass |
| 248 | + |
| 249 | + if _HAS_FCOPYFILE: |
| 250 | + try: |
| 251 | + _fastcopy_osx(fsrc, fdst, posix._COPYFILE_DATA) |
| 252 | + return dst |
| 253 | + except _GiveupOnFastCopy: |
| 254 | + pass |
| 255 | + |
| 256 | + _copybinfileobj(fsrc, fdst) |
| 257 | + |
123 | 258 | return dst
|
124 | 259 |
|
125 | 260 | def copymode(src, dst, *, follow_symlinks=True):
|
@@ -244,13 +379,12 @@ def copy(src, dst, *, follow_symlinks=True):
|
244 | 379 |
|
245 | 380 | def copy2(src, dst, *, follow_symlinks=True):
|
246 | 381 | """Copy data and all stat info ("cp -p src dst"). Return the file's
|
247 |
| - destination." |
| 382 | + destination. |
248 | 383 |
|
249 | 384 | The destination may be a directory.
|
250 | 385 |
|
251 | 386 | If follow_symlinks is false, symlinks won't be followed. This
|
252 | 387 | resembles GNU's "cp -P src dst".
|
253 |
| -
|
254 | 388 | """
|
255 | 389 | if os.path.isdir(dst):
|
256 | 390 | dst = os.path.join(dst, os.path.basename(src))
|
@@ -1015,7 +1149,6 @@ def disk_usage(path):
|
1015 | 1149 |
|
1016 | 1150 | elif os.name == 'nt':
|
1017 | 1151 |
|
1018 |
| - import nt |
1019 | 1152 | __all__.append('disk_usage')
|
1020 | 1153 | _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
|
1021 | 1154 |
|
|
0 commit comments