43
43
except ImportError :
44
44
getgrnam = None
45
45
46
+ _WINDOWS = os .name == 'nt'
46
47
posix = nt = None
47
48
if os .name == 'posix' :
48
49
import posix
49
- elif os . name == 'nt' :
50
+ elif _WINDOWS :
50
51
import nt
51
52
52
- COPY_BUFSIZE = 1024 * 1024 if os . name == 'nt' else 16 * 1024
53
+ COPY_BUFSIZE = 1024 * 1024 if _WINDOWS else 16 * 1024
53
54
_HAS_SENDFILE = posix and hasattr (os , "sendfile" )
54
55
_HAS_FCOPYFILE = posix and hasattr (posix , "_fcopyfile" ) # macOS
55
56
@@ -168,6 +169,25 @@ def _fastcopy_sendfile(fsrc, fdst):
168
169
break # EOF
169
170
offset += sent
170
171
172
+ def _copybinfileobj (fsrc , fdst , length = COPY_BUFSIZE ):
173
+ """Copy 2 regular file objects open in binary mode.
174
+ This is used on Windows only for files >= 128 MiB as it appears to
175
+ give a considerable boost.
176
+ """
177
+ # Localize variable access to minimize overhead.
178
+ fsrc_readinto = fsrc .readinto
179
+ fdst_write = fdst .write
180
+ with memoryview (bytearray (length )) as mv :
181
+ while True :
182
+ n = fsrc_readinto (mv )
183
+ if not n :
184
+ break
185
+ elif n < length :
186
+ with mv [:n ] as smv :
187
+ fdst .write (smv )
188
+ else :
189
+ fdst_write (mv )
190
+
171
191
def copyfileobj (fsrc , fdst , length = COPY_BUFSIZE ):
172
192
"""copy data from file-like object fsrc to file-like object fdst"""
173
193
# Localize variable access to minimize overhead.
@@ -192,7 +212,7 @@ def _samefile(src, dst):
192
212
os .path .normcase (os .path .abspath (dst )))
193
213
194
214
def copyfile (src , dst , * , follow_symlinks = True ):
195
- """Copy data from src to dst.
215
+ """Copy data from src to dst in the most efficient way possible .
196
216
197
217
If follow_symlinks is not set and src is a symbolic link, a new
198
218
symlink will be created instead of copying the file it points to.
@@ -201,7 +221,8 @@ def copyfile(src, dst, *, follow_symlinks=True):
201
221
if _samefile (src , dst ):
202
222
raise SameFileError ("{!r} and {!r} are the same file" .format (src , dst ))
203
223
204
- for fn in [src , dst ]:
224
+ fsize = 0
225
+ for i , fn in enumerate ([src , dst ]):
205
226
try :
206
227
st = os .stat (fn )
207
228
except OSError :
@@ -211,6 +232,8 @@ def copyfile(src, dst, *, follow_symlinks=True):
211
232
# XXX What about other special files? (sockets, devices...)
212
233
if stat .S_ISFIFO (st .st_mode ):
213
234
raise SpecialFileError ("`%s` is a named pipe" % fn )
235
+ if _WINDOWS and i == 0 :
236
+ fsize = st .st_size
214
237
215
238
if not follow_symlinks and os .path .islink (src ):
216
239
os .symlink (os .readlink (src ), dst )
@@ -230,7 +253,14 @@ def copyfile(src, dst, *, follow_symlinks=True):
230
253
except _GiveupOnFastCopy :
231
254
pass
232
255
233
- copyfileobj (fsrc , fdst )
256
+ if _WINDOWS and fsize >= 128 * 1024 * 1024 :
257
+ # Use alternate memoryview() based implementation on Windows
258
+ # for files >= 128 MiB. It appears this gives a considerable
259
+ # speedup, see:
260
+ # https://github.com/python/cpython/pull/7160#discussion_r195162475
261
+ _copybinfileobj (fsrc , fdst )
262
+ else :
263
+ copyfileobj (fsrc , fdst )
234
264
235
265
return dst
236
266
@@ -1124,7 +1154,7 @@ def disk_usage(path):
1124
1154
used = (st .f_blocks - st .f_bfree ) * st .f_frsize
1125
1155
return _ntuple_diskusage (total , used , free )
1126
1156
1127
- elif os . name == 'nt' :
1157
+ elif _WINDOWS :
1128
1158
1129
1159
__all__ .append ('disk_usage' )
1130
1160
_ntuple_diskusage = collections .namedtuple ('usage' , 'total used free' )
0 commit comments