Skip to content

Improvements to _php_stream_copy_to_stream_ex() #10603

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion ext/zend_test/php_test.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,9 @@ ZEND_BEGIN_MODULE_GLOBALS(zend_test)
int replace_zend_execute_ex;
int register_passes;
bool print_stderr_mshutdown;
zend_long limit_copy_file_range;
zend_long limit_copy_file_range_length;
zend_long limit_copy_file_range_times;
zend_long amount_of_times_called_copy_file_range;
zend_test_fiber *active_fiber;
zend_long quantity_value;
zend_string *str_test;
Expand Down
13 changes: 10 additions & 3 deletions ext/zend_test/test.c
Original file line number Diff line number Diff line change
Expand Up @@ -722,7 +722,8 @@ PHP_INI_BEGIN()
STD_PHP_INI_BOOLEAN("zend_test.register_passes", "0", PHP_INI_SYSTEM, OnUpdateBool, register_passes, zend_zend_test_globals, zend_test_globals)
STD_PHP_INI_BOOLEAN("zend_test.print_stderr_mshutdown", "0", PHP_INI_SYSTEM, OnUpdateBool, print_stderr_mshutdown, zend_zend_test_globals, zend_test_globals)
#ifdef HAVE_COPY_FILE_RANGE
STD_PHP_INI_ENTRY("zend_test.limit_copy_file_range", "-1", PHP_INI_ALL, OnUpdateLong, limit_copy_file_range, zend_zend_test_globals, zend_test_globals)
STD_PHP_INI_ENTRY("zend_test.limit_copy_file_range_length", "-1", PHP_INI_ALL, OnUpdateLong, limit_copy_file_range_length, zend_zend_test_globals, zend_test_globals)
STD_PHP_INI_ENTRY("zend_test.limit_copy_file_range_times", "-1", PHP_INI_ALL, OnUpdateLong, limit_copy_file_range_times, zend_zend_test_globals, zend_test_globals)
#endif
STD_PHP_INI_ENTRY("zend_test.quantity_value", "0", PHP_INI_ALL, OnUpdateLong, quantity_value, zend_zend_test_globals, zend_test_globals)
STD_PHP_INI_ENTRY("zend_test.str_test", "", PHP_INI_ALL, OnUpdateStr, str_test, zend_zend_test_globals, zend_test_globals)
Expand Down Expand Up @@ -962,9 +963,15 @@ PHP_ZEND_TEST_API void bug_gh9090_void_int_char_var(int i, char *fmt, ...) {
*/
PHP_ZEND_TEST_API ssize_t copy_file_range(int fd_in, off64_t *off_in, int fd_out, off64_t *off_out, size_t len, unsigned int flags)
{
if (ZT_G(limit_copy_file_range_times) >= Z_L(0)) {
if (++ZT_G(amount_of_times_called_copy_file_range) > ZT_G(limit_copy_file_range_times)) {
errno = EIO;
return -1;
}
}
ssize_t (*original_copy_file_range)(int, off64_t *, int, off64_t *, size_t, unsigned int) = dlsym(RTLD_NEXT, "copy_file_range");
if (ZT_G(limit_copy_file_range) >= Z_L(0)) {
len = ZT_G(limit_copy_file_range);
if (ZT_G(limit_copy_file_range_length) >= Z_L(0)) {
len = MIN(len, ZT_G(limit_copy_file_range_length));
}
return original_copy_file_range(fd_in, off_in, fd_out, off_out, len, flags);
}
Expand Down
2 changes: 1 addition & 1 deletion ext/zend_test/tests/gh10370_1.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ if (PHP_OS != 'Linux') {
}
?>
--INI--
zend_test.limit_copy_file_range=3584
zend_test.limit_copy_file_range_length=3584
--FILE--
<?php
/* Note: the value 3584 is chosen so that the mmap in _php_stream_copy_to_stream_ex() will mmap
Expand Down
2 changes: 1 addition & 1 deletion ext/zend_test/tests/gh10370_2.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ if (PHP_OS != 'Linux') {
}
?>
--INI--
zend_test.limit_copy_file_range=4096
zend_test.limit_copy_file_range_length=4096
--FILE--
<?php
/* Note: the value 4096 is chosen so that the mmap in _php_stream_copy_to_stream_ex() will mmap
Expand Down
2 changes: 1 addition & 1 deletion ext/zend_test/tests/gh10370_3.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ if (PHP_OS != 'Linux') {
}
?>
--INI--
zend_test.limit_copy_file_range=3584
zend_test.limit_copy_file_range_length=3584
--FILE--
<?php
/* Note: the value 3584 is chosen so that the mmap in _php_stream_copy_to_stream_ex() will mmap
Expand Down
2 changes: 1 addition & 1 deletion ext/zend_test/tests/gh10370_4.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ if (PHP_OS != 'Linux') {
}
?>
--INI--
zend_test.limit_copy_file_range=4096
zend_test.limit_copy_file_range_length=4096
--FILE--
<?php
/* Note: the value 4096 is chosen so that the mmap in _php_stream_copy_to_stream_ex() will mmap
Expand Down
37 changes: 37 additions & 0 deletions ext/zend_test/tests/gh10370_5.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
--TEST--
GH-10370: File corruption in _php_stream_copy_to_stream_ex when using copy_file_range - partial copy using stream_copy_to_stream, limit times
--EXTENSIONS--
zend_test
--SKIPIF--
<?php
if (PHP_OS != 'Linux') {
die('skip For Linux only');
}
?>
--INI--
zend_test.limit_copy_file_range_length=3584
zend_test.limit_copy_file_range_times=2
--FILE--
<?php
/* Note: the value 3584 is chosen so that the mmap in _php_stream_copy_to_stream_ex() will mmap
* at an offset of a multiple of 4096, which is the standard page size in most Linux systems. */
mkdir(__DIR__ . DIRECTORY_SEPARATOR . 'gh10370_005');

$input = fopen(__DIR__ . DIRECTORY_SEPARATOR . 'gh10370.tar', 'r');
$output = fopen(__DIR__ . DIRECTORY_SEPARATOR . 'gh10370_005' . DIRECTORY_SEPARATOR . 'testfile', 'w');

var_dump(stream_copy_to_stream($input, $output, 10240, 0x200));

fclose($input);
fclose($output);

var_dump(sha1_file(__DIR__ . DIRECTORY_SEPARATOR . 'gh10370_005' . DIRECTORY_SEPARATOR . 'testfile'));
?>
--EXPECT--
int(10240)
string(40) "a723ae4ec7eababff73ca961a771b794be6388d2"
--CLEAN--
<?php
@unlink(__DIR__ . DIRECTORY_SEPARATOR . 'gh10370_005' . DIRECTORY_SEPARATOR . 'testfile');
@rmdir(__DIR__ . DIRECTORY_SEPARATOR . 'gh10370_005');
?>
37 changes: 37 additions & 0 deletions ext/zend_test/tests/gh10370_6.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
--TEST--
GH-10370: File corruption in _php_stream_copy_to_stream_ex when using copy_file_range - unlimited copy using stream_copy_to_stream, limit times
--EXTENSIONS--
zend_test
--SKIPIF--
<?php
if (PHP_OS != 'Linux') {
die('skip For Linux only');
}
?>
--INI--
zend_test.limit_copy_file_range_length=4096
zend_test.limit_copy_file_range_times=2
--FILE--
<?php
/* Note: the value 4096 is chosen so that the mmap in _php_stream_copy_to_stream_ex() will mmap
* at an offset of a multiple of 4096, which is the standard page size in most Linux systems. */

$input = fopen(__DIR__ . DIRECTORY_SEPARATOR . 'gh10370.tar', 'r');
$output = fopen(__DIR__ . DIRECTORY_SEPARATOR . 'gh10370_006_out.tar', 'w');

var_dump(stream_copy_to_stream($input, $output));

fclose($input);
fclose($output);

var_dump(sha1_file(__DIR__ . DIRECTORY_SEPARATOR . 'gh10370.tar'));
var_dump(sha1_file(__DIR__ . DIRECTORY_SEPARATOR . 'gh10370_006_out.tar'));
?>
--EXPECT--
int(11776)
string(40) "edcad8cd6c276f5e318c826ad77a5604d6a6e93d"
string(40) "edcad8cd6c276f5e318c826ad77a5604d6a6e93d"
--CLEAN--
<?php
@unlink(__DIR__ . DIRECTORY_SEPARATOR . 'gh10370_006_out.tar');
?>
116 changes: 68 additions & 48 deletions main/streams/streams.c
Original file line number Diff line number Diff line change
Expand Up @@ -1563,73 +1563,93 @@ PHPAPI zend_result _php_stream_copy_to_stream_ex(php_stream *src, php_stream *de
src->writepos == src->readpos) {
/* both php_stream instances are backed by a file descriptor, are not filtered and the
* read buffer is empty: we can use copy_file_range() */
int src_fd, dest_fd, dest_open_flags = 0;
int src_fd, dest_fd, dest_open_flags;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There might have been some warning in of the tool about potentially uninitialised memory but not 100% sure.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair point. Should be cheap anyway, so I'll restore this.


/* get dest open flags to check if the stream is open in append mode */
php_stream_parse_fopen_modes(dest->mode, &dest_open_flags);
/* Some filesystems will cause failures if the max length is greater than the file length.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't this just a minor use case? Does it make sense to optimize for that and penalise (even though it might be minor penalisation but it's still extra syscall) majority of use cases?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought the stat would be pretty cheap in comparison to the file IO, but you might be right about this. I'll think about it a bit more

* We therefore use a stat call to get the actual file size. Since stat calls are cached
* this shouldn't have much impact on performance.
* In case the cached result is less than the actual size, the code will just copy more
* in the next iteration. In case it is too large, it will at worst fall into the EIO error
* case and therefore fall back to the mmap or regular copy. In case of races the reasoning
* is the same as for a wrong cached result. Therefore, this won't cause problems. */
php_stream_statbuf statbuf;

/* copy_file_range does not work with O_APPEND */
if (php_stream_cast(src, PHP_STREAM_AS_FD, (void*)&src_fd, 0) == SUCCESS &&
php_stream_cast(dest, PHP_STREAM_AS_FD, (void*)&dest_fd, 0) == SUCCESS &&
/* get dest open flags to check if the stream is open in append mode */
php_stream_parse_fopen_modes(dest->mode, &dest_open_flags) == SUCCESS &&
!(dest_open_flags & O_APPEND)) {

/* clamp to INT_MAX to avoid EOVERFLOW */
const size_t cfr_max = MIN(maxlen, (size_t)SSIZE_MAX);

!(dest_open_flags & O_APPEND) &&
php_stream_stat(src, &statbuf) == SUCCESS &&
/* Some files (such as /proc files) report a size of zero, avoid problems */
statbuf.sb.st_size > 0) {
/* copy_file_range() is a Linux-specific system call which allows efficient copying
* between two file descriptors, eliminating the need to transfer data from the kernel
* to userspace and back. For networking file systems like NFS and Ceph, it even
* eliminates copying data to the client, and local filesystems like Btrfs and XFS can
* create shared extents. */
ssize_t result = copy_file_range(src_fd, NULL, dest_fd, NULL, cfr_max, 0);
if (result > 0) {
size_t nbytes = (size_t)result;
haveread += nbytes;
ssize_t result;
size_t must_read = MIN(maxlen, statbuf.sb.st_size);
do {
/* clamp to SSIZE_MAX to avoid EOVERFLOW */
const size_t cfr_max = MIN(must_read - haveread, (size_t)SSIZE_MAX);

src->position += nbytes;
dest->position += nbytes;
result = copy_file_range(src_fd, NULL, dest_fd, NULL, cfr_max, 0);

if ((maxlen != PHP_STREAM_COPY_ALL && nbytes == maxlen) || php_stream_eof(src)) {
/* the whole request was satisfied or end-of-file reached - done */
if (result > 0) {
size_t nbytes = (size_t)result;
haveread += nbytes;

src->position += nbytes;
dest->position += nbytes;

/* We must check maxlen here instead of must_read, because must_read may be
* a stale value and must therefore only be used as a hint. */
if ((maxlen != PHP_STREAM_COPY_ALL && haveread == maxlen) || php_stream_eof(src)) {
/* the whole request was satisfied or end-of-file reached - done */
*len = haveread;
return SUCCESS;
}

/* copy_file_range may return early without copying all data.
* There may be more data to copy; continue copying using file_copy_range, and
* if a failure would occur, it will fall back to the code below. */
} else if (result == 0) {
/* end of file */
*len = haveread;
return SUCCESS;
}
} while (result >= 0);

/* there may be more data; continue copying using the fallback code below */
} else if (result == 0) {
/* end of file */
*len = haveread;
return SUCCESS;
} else if (result < 0) {
switch (errno) {
case EINVAL:
/* some formal error, e.g. overlapping file ranges */
break;

case EXDEV:
/* pre Linux 5.3 error */
break;

case ENOSYS:
/* not implemented by this Linux kernel */
break;

case EIO:
/* Some filesystems will cause failures if the max length is greater than the file length
* in certain circumstances and configuration. In those cases the errno is EIO and we will
* fall back to other methods. We cannot use stat to determine the file length upfront because
* that is prone to races and outdated caching. */
break;

default:
/* unexpected I/O error - give up, no fallback */
*len = haveread;
return FAILURE;
}
ZEND_ASSERT(result < 0);

/* fall back to classic copying */
switch (errno) {
case EINVAL:
/* some formal error, e.g. overlapping file ranges */
break;

case EXDEV:
/* pre Linux 5.3 error */
break;

case ENOSYS:
/* not implemented by this Linux kernel */
break;

case EIO:
/* Some filesystems will cause failures if the max length is greater than the file length
* in certain circumstances and configuration. In those cases the errno is EIO and we will
* fall back to other methods. We cannot use stat to determine the file length upfront because
* that is prone to races and outdated caching. */
break;

default:
/* unexpected I/O error - give up, no fallback */
*len = haveread;
return FAILURE;
}

/* fall back to classic copying */
}
}
#endif // HAVE_COPY_FILE_RANGE
Expand Down