Skip to content

Commit fe251fb

Browse files
committed
main/streams/streams: use copy_file_range() on Linux
copy_file_range() is a Linux-specific system call which allows efficient copying between two file descriptors, eliminating the need to transfer data from the kernel to userspace and back. For networking file systems like NFS and Ceph, it even eliminates copying data to the client, and local filesystems like Btrfs and XFS can create shared extents.
1 parent ebd922d commit fe251fb

File tree

2 files changed

+86
-0
lines changed

2 files changed

+86
-0
lines changed

configure.ac

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,7 @@ PHP_CHECK_FUNC(socketpair, socket, network)
364364
PHP_CHECK_FUNC(htonl, socket, network)
365365
PHP_CHECK_FUNC(gethostname, nsl, network)
366366
PHP_CHECK_FUNC(gethostbyaddr, nsl, network)
367+
PHP_CHECK_FUNC(copy_file_range)
367368
PHP_CHECK_FUNC(dlopen, dl, root)
368369
PHP_CHECK_FUNC(dlsym, dl, root)
369370
if test "$ac_cv_func_dlopen" = "yes"; then

main/streams/streams.c

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1555,6 +1555,91 @@ PHPAPI zend_result _php_stream_copy_to_stream_ex(php_stream *src, php_stream *de
15551555
return SUCCESS;
15561556
}
15571557

1558+
#ifdef HAVE_COPY_FILE_RANGE
1559+
if (php_stream_is(src, PHP_STREAM_IS_STDIO) &&
1560+
php_stream_is(dest, PHP_STREAM_IS_STDIO) &&
1561+
src->writepos == src->readpos &&
1562+
php_stream_can_cast(src, PHP_STREAM_AS_FD) == SUCCESS &&
1563+
php_stream_can_cast(dest, PHP_STREAM_AS_FD) == SUCCESS) {
1564+
/* both php_stream instances are backed by a file
1565+
descriptor, are not filtered and the read buffer is
1566+
empty: we can use copy_file_range() */
1567+
1568+
int src_fd, dest_fd;
1569+
1570+
php_stream_cast(src, PHP_STREAM_AS_FD, (void*)&src_fd, 0);
1571+
php_stream_cast(dest, PHP_STREAM_AS_FD, (void*)&dest_fd, 0);
1572+
1573+
/* clamp to INT_MAX to avoid EOVERFLOW */
1574+
const size_t cfr_max = MIN(maxlen, (size_t)SSIZE_MAX);
1575+
1576+
/* copy_file_range() is a Linux-specific system call
1577+
which allows efficient copying between two file
1578+
descriptors, eliminating the need to transfer data
1579+
from the kernel to userspace and back. For
1580+
networking file systems like NFS and Ceph, it even
1581+
eliminates copying data to the client, and local
1582+
filesystems like Btrfs and XFS can create shared
1583+
extents. */
1584+
1585+
#ifdef __FreeBSD__
1586+
// TODO: experimental kludge, see https://github.com/php/php-src/pull/8413#issuecomment-1105486376=
1587+
const unsigned flags = 0x01000000;
1588+
#else
1589+
const unsigned flags = 0;
1590+
#endif
1591+
1592+
ssize_t result = copy_file_range(src_fd, NULL,
1593+
dest_fd, NULL,
1594+
cfr_max, flags);
1595+
if (result > 0) {
1596+
size_t nbytes = (size_t)result;
1597+
haveread += nbytes;
1598+
1599+
src->position += nbytes;
1600+
dest->position += nbytes;
1601+
1602+
if ((maxlen != PHP_STREAM_COPY_ALL && nbytes == maxlen) ||
1603+
php_stream_eof(src)) {
1604+
/* the whole request was satisfied or
1605+
end-of-file reached - done */
1606+
*len = haveread;
1607+
return SUCCESS;
1608+
}
1609+
1610+
/* there may be more data; continue copying
1611+
using the fallback code below */
1612+
} else if (result == 0) {
1613+
/* end of file */
1614+
*len = haveread;
1615+
return SUCCESS;
1616+
} else if (result < 0) {
1617+
switch (errno) {
1618+
case EINVAL:
1619+
/* some formal error, e.g. overlapping
1620+
file ranges */
1621+
break;
1622+
1623+
case EXDEV:
1624+
/* pre Linux 5.3 error */
1625+
break;
1626+
1627+
case ENOSYS:
1628+
/* not implemented by this Linux kernel */
1629+
break;
1630+
1631+
default:
1632+
/* unexpected I/O error - give up, no
1633+
fallback */
1634+
*len = haveread;
1635+
return FAILURE;
1636+
}
1637+
1638+
/* fall back to classic copying */
1639+
}
1640+
}
1641+
#endif
1642+
15581643
if (maxlen == PHP_STREAM_COPY_ALL) {
15591644
maxlen = 0;
15601645
}

0 commit comments

Comments
 (0)