Skip to content

[libc++] use copy_file_range for fs::copy #109211

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jan 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
172 changes: 139 additions & 33 deletions libcxx/src/filesystem/operations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include <filesystem>
#include <iterator>
#include <string_view>
#include <system_error>
#include <type_traits>
#include <vector>

Expand All @@ -32,22 +33,35 @@
# include <dirent.h>
# include <sys/stat.h>
# include <sys/statvfs.h>
# include <sys/types.h>
# include <unistd.h>
#endif
#include <fcntl.h> /* values for fchmodat */
#include <time.h>

// since Linux 4.5 and FreeBSD 13, but the Linux libc wrapper is only provided by glibc and musl
#if (defined(__linux__) && (defined(__GLIBC__) || _LIBCPP_HAS_MUSL_LIBC)) || defined(__FreeBSD__)
# define _LIBCPP_FILESYSTEM_USE_COPY_FILE_RANGE
#endif
#if __has_include(<sys/sendfile.h>)
# include <sys/sendfile.h>
# define _LIBCPP_FILESYSTEM_USE_SENDFILE
#elif defined(__APPLE__) || __has_include(<copyfile.h>)
# include <copyfile.h>
# define _LIBCPP_FILESYSTEM_USE_COPYFILE
#else
# include <fstream>
# define _LIBCPP_FILESYSTEM_USE_FSTREAM
#endif

// sendfile and copy_file_range need to fall back
// to the fstream implementation for special files
#if (defined(_LIBCPP_FILESYSTEM_USE_SENDFILE) || defined(_LIBCPP_FILESYSTEM_USE_COPY_FILE_RANGE) || \
defined(_LIBCPP_FILESYSTEM_USE_FSTREAM)) && \
_LIBCPP_HAS_LOCALIZATION
# include <fstream>
# define _LIBCPP_FILESYSTEM_NEED_FSTREAM
#endif

#if defined(__ELF__) && defined(_LIBCPP_LINK_RT_LIB)
# pragma comment(lib, "rt")
#endif
Expand Down Expand Up @@ -178,9 +192,83 @@ void __copy(const path& from, const path& to, copy_options options, error_code*
namespace detail {
namespace {

#if defined(_LIBCPP_FILESYSTEM_NEED_FSTREAM)
bool copy_file_impl_fstream(FileDescriptor& read_fd, FileDescriptor& write_fd, error_code& ec) {
ifstream in;
in.__open(read_fd.fd, ios::binary);
if (!in.is_open()) {
// This assumes that __open didn't reset the error code.
ec = capture_errno();
return false;
}
read_fd.fd = -1;
ofstream out;
out.__open(write_fd.fd, ios::binary);
if (!out.is_open()) {
ec = capture_errno();
return false;
}
write_fd.fd = -1;

if (in.good() && out.good()) {
using InIt = istreambuf_iterator<char>;
using OutIt = ostreambuf_iterator<char>;
InIt bin(in);
InIt ein;
OutIt bout(out);
copy(bin, ein, bout);
}
if (out.fail() || in.fail()) {
ec = make_error_code(errc::io_error);
return false;
}

ec.clear();
return true;
}
#endif

#if defined(_LIBCPP_FILESYSTEM_USE_COPY_FILE_RANGE)
bool copy_file_impl_copy_file_range(FileDescriptor& read_fd, FileDescriptor& write_fd, error_code& ec) {
size_t count = read_fd.get_stat().st_size;
// a zero-length file is either empty, or not copyable by this syscall
// return early to avoid the syscall cost
if (count == 0) {
ec = {EINVAL, generic_category()};
return false;
}
// do not modify the fd positions as copy_file_impl_sendfile may be called after a partial copy
off_t off_in = 0;
off_t off_out = 0;
do {
ssize_t res;

if ((res = ::copy_file_range(read_fd.fd, &off_in, write_fd.fd, &off_out, count, 0)) == -1) {
ec = capture_errno();
return false;
}
count -= res;
} while (count > 0);

ec.clear();

return true;
}
#endif

#if defined(_LIBCPP_FILESYSTEM_USE_SENDFILE)
bool copy_file_impl(FileDescriptor& read_fd, FileDescriptor& write_fd, error_code& ec) {
bool copy_file_impl_sendfile(FileDescriptor& read_fd, FileDescriptor& write_fd, error_code& ec) {
size_t count = read_fd.get_stat().st_size;
// a zero-length file is either empty, or not copyable by this syscall
// return early to avoid the syscall cost
// however, we can't afford this luxury in the no-locale build,
// as we can't utilize the fstream impl to copy empty files
# if _LIBCPP_HAS_LOCALIZATION
if (count == 0) {
ec = {EINVAL, generic_category()};
return false;
}
# endif
do {
ssize_t res;
if ((res = ::sendfile(write_fd.fd, read_fd.fd, nullptr, count)) == -1) {
Expand All @@ -194,6 +282,54 @@ bool copy_file_impl(FileDescriptor& read_fd, FileDescriptor& write_fd, error_cod

return true;
}
#endif

#if defined(_LIBCPP_FILESYSTEM_USE_COPY_FILE_RANGE) || defined(_LIBCPP_FILESYSTEM_USE_SENDFILE)
// If we have copy_file_range or sendfile, try both in succession (if available).
// If both fail, fall back to using fstream.
bool copy_file_impl(FileDescriptor& read_fd, FileDescriptor& write_fd, error_code& ec) {
# if defined(_LIBCPP_FILESYSTEM_USE_COPY_FILE_RANGE)
if (copy_file_impl_copy_file_range(read_fd, write_fd, ec)) {
return true;
}
// EINVAL: src and dst are the same file (this is not cheaply
// detectable from userspace)
// EINVAL: copy_file_range is unsupported for this file type by the
// underlying filesystem
// ENOTSUP: undocumented, can arise with old kernels and NFS
// EOPNOTSUPP: filesystem does not implement copy_file_range
// ETXTBSY: src or dst is an active swapfile (nonsensical, but allowed
// with normal copying)
// EXDEV: src and dst are on different filesystems that do not support
// cross-fs copy_file_range
// ENOENT: undocumented, can arise with CIFS
// ENOSYS: unsupported by kernel or blocked by seccomp
if (ec.value() != EINVAL && ec.value() != ENOTSUP && ec.value() != EOPNOTSUPP && ec.value() != ETXTBSY &&
ec.value() != EXDEV && ec.value() != ENOENT && ec.value() != ENOSYS) {
return false;
}
ec.clear();
# endif

# if defined(_LIBCPP_FILESYSTEM_USE_SENDFILE)
if (copy_file_impl_sendfile(read_fd, write_fd, ec)) {
return true;
}
// EINVAL: unsupported file type
if (ec.value() != EINVAL) {
return false;
}
ec.clear();
# endif

# if defined(_LIBCPP_FILESYSTEM_NEED_FSTREAM)
return copy_file_impl_fstream(read_fd, write_fd, ec);
# else
// since iostreams are unavailable in the no-locale build, just fail after a failed sendfile
ec.assign(EINVAL, std::system_category());
return false;
# endif
}
#elif defined(_LIBCPP_FILESYSTEM_USE_COPYFILE)
bool copy_file_impl(FileDescriptor& read_fd, FileDescriptor& write_fd, error_code& ec) {
struct CopyFileState {
Expand All @@ -217,37 +353,7 @@ bool copy_file_impl(FileDescriptor& read_fd, FileDescriptor& write_fd, error_cod
}
#elif defined(_LIBCPP_FILESYSTEM_USE_FSTREAM)
bool copy_file_impl(FileDescriptor& read_fd, FileDescriptor& write_fd, error_code& ec) {
ifstream in;
in.__open(read_fd.fd, ios::binary);
if (!in.is_open()) {
// This assumes that __open didn't reset the error code.
ec = capture_errno();
return false;
}
read_fd.fd = -1;
ofstream out;
out.__open(write_fd.fd, ios::binary);
if (!out.is_open()) {
ec = capture_errno();
return false;
}
write_fd.fd = -1;

if (in.good() && out.good()) {
using InIt = istreambuf_iterator<char>;
using OutIt = ostreambuf_iterator<char>;
InIt bin(in);
InIt ein;
OutIt bout(out);
copy(bin, ein, bout);
}
if (out.fail() || in.fail()) {
ec = make_error_code(errc::io_error);
return false;
}

ec.clear();
return true;
return copy_file_impl_fstream(read_fd, write_fd, ec);
}
#else
# error "Unknown implementation for copy_file_impl"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

// UNSUPPORTED: c++03, c++11, c++14
// REQUIRES: linux
// UNSUPPORTED: no-filesystem
// XFAIL: no-localization
// UNSUPPORTED: availability-filesystem-missing

// <filesystem>

// bool copy_file(const path& from, const path& to);
// bool copy_file(const path& from, const path& to, error_code& ec) noexcept;
// bool copy_file(const path& from, const path& to, copy_options options);
// bool copy_file(const path& from, const path& to, copy_options options,
// error_code& ec) noexcept;

#include <cassert>
#include <filesystem>
#include <system_error>

#include "test_macros.h"
#include "filesystem_test_helper.h"

namespace fs = std::filesystem;

// Linux has various virtual filesystems such as /proc and /sys
// where files may have no length (st_size == 0), but still contain data.
// This is because the to-be-read data is usually generated ad-hoc by the reading syscall
// These files can not be copied with kernel-side copies like copy_file_range or sendfile,
// and must instead be copied via a traditional userspace read + write loop.
int main(int, char** argv) {
const fs::path procfile{"/proc/self/comm"};
assert(file_size(procfile) == 0);

scoped_test_env env;
std::error_code ec = GetTestEC();

const fs::path dest = env.make_env_path("dest");

assert(copy_file(procfile, dest, ec));
assert(!ec);

// /proc/self/comm contains the filename of the executable, plus a null terminator
assert(file_size(dest) == fs::path(argv[0]).filename().string().size() + 1);

return 0;
}
Loading