Skip to content

[libcxx] Cache file attributes during directory iteration. #93316

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Sep 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 42 additions & 2 deletions libcxx/include/__filesystem/directory_entry.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,9 @@ class directory_entry {
_IterNonSymlink,
_RefreshSymlink,
_RefreshSymlinkUnresolved,
_RefreshNonSymlink
_RefreshNonSymlink,
_IterCachedSymlink,
_IterCachedNonSymlink
};

struct __cached_data {
Expand Down Expand Up @@ -240,6 +242,29 @@ class directory_entry {
return __data;
}

_LIBCPP_HIDE_FROM_ABI static __cached_data
__create_iter_cached_result(file_type __ft, uintmax_t __size, perms __perm, file_time_type __write_time) {
__cached_data __data;
__data.__type_ = __ft;
__data.__size_ = __size;
__data.__write_time_ = __write_time;
if (__ft == file_type::symlink)
__data.__sym_perms_ = __perm;
else
__data.__non_sym_perms_ = __perm;
__data.__cache_type_ = [&]() {
switch (__ft) {
case file_type::none:
return _Empty;
case file_type::symlink:
return _IterCachedSymlink;
default:
return _IterCachedNonSymlink;
}
}();
return __data;
}

_LIBCPP_HIDE_FROM_ABI void __assign_iter_entry(_Path&& __p, __cached_data __dt) {
__p_ = std::move(__p);
__data_ = __dt;
Expand Down Expand Up @@ -281,29 +306,34 @@ class directory_entry {
case _Empty:
return __symlink_status(__p_, __ec).type();
case _IterSymlink:
case _IterCachedSymlink:
case _RefreshSymlink:
case _RefreshSymlinkUnresolved:
if (__ec)
__ec->clear();
return file_type::symlink;
case _IterCachedNonSymlink:
case _IterNonSymlink:
case _RefreshNonSymlink:
case _RefreshNonSymlink: {
file_status __st(__data_.__type_);
if (__ec && !filesystem::exists(__st))
*__ec = make_error_code(errc::no_such_file_or_directory);
else if (__ec)
__ec->clear();
return __data_.__type_;
}
}
__libcpp_unreachable();
}

_LIBCPP_HIDE_FROM_ABI file_type __get_ft(error_code* __ec = nullptr) const {
switch (__data_.__cache_type_) {
case _Empty:
case _IterSymlink:
case _IterCachedSymlink:
case _RefreshSymlinkUnresolved:
return __status(__p_, __ec).type();
case _IterCachedNonSymlink:
case _IterNonSymlink:
case _RefreshNonSymlink:
case _RefreshSymlink: {
Expand All @@ -323,8 +353,10 @@ class directory_entry {
case _Empty:
case _IterNonSymlink:
case _IterSymlink:
case _IterCachedSymlink:
case _RefreshSymlinkUnresolved:
return __status(__p_, __ec);
case _IterCachedNonSymlink:
case _RefreshNonSymlink:
case _RefreshSymlink:
return file_status(__get_ft(__ec), __data_.__non_sym_perms_);
Expand All @@ -338,8 +370,10 @@ class directory_entry {
case _IterNonSymlink:
case _IterSymlink:
return __symlink_status(__p_, __ec);
case _IterCachedNonSymlink:
case _RefreshNonSymlink:
return file_status(__get_sym_ft(__ec), __data_.__non_sym_perms_);
case _IterCachedSymlink:
case _RefreshSymlink:
case _RefreshSymlinkUnresolved:
return file_status(__get_sym_ft(__ec), __data_.__sym_perms_);
Expand All @@ -352,8 +386,10 @@ class directory_entry {
case _Empty:
case _IterNonSymlink:
case _IterSymlink:
case _IterCachedSymlink:
case _RefreshSymlinkUnresolved:
return filesystem::__file_size(__p_, __ec);
case _IterCachedNonSymlink:
case _RefreshSymlink:
case _RefreshNonSymlink: {
error_code __m_ec;
Expand All @@ -374,6 +410,8 @@ class directory_entry {
case _Empty:
case _IterNonSymlink:
case _IterSymlink:
case _IterCachedNonSymlink:
case _IterCachedSymlink:
case _RefreshSymlinkUnresolved:
return filesystem::__hard_link_count(__p_, __ec);
case _RefreshSymlink:
Expand All @@ -392,8 +430,10 @@ class directory_entry {
case _Empty:
case _IterNonSymlink:
case _IterSymlink:
case _IterCachedSymlink:
case _RefreshSymlinkUnresolved:
return filesystem::__last_write_time(__p_, __ec);
case _IterCachedNonSymlink:
case _RefreshSymlink:
case _RefreshNonSymlink: {
error_code __m_ec;
Expand Down
12 changes: 6 additions & 6 deletions libcxx/src/filesystem/directory_iterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,13 +77,13 @@ class __dir_stream {
bool assign() {
if (!wcscmp(__data_.cFileName, L".") || !wcscmp(__data_.cFileName, L".."))
return false;
// FIXME: Cache more of this
// directory_entry::__cached_data cdata;
// cdata.__type_ = get_file_type(__data_);
// cdata.__size_ = get_file_size(__data_);
// cdata.__write_time_ = get_write_time(__data_);
__entry_.__assign_iter_entry(
__root_ / __data_.cFileName, directory_entry::__create_iter_result(detail::get_file_type(__data_)));
__root_ / __data_.cFileName,
directory_entry::__create_iter_cached_result(
detail::get_file_type(__data_),
detail::get_file_size(__data_),
detail::get_file_perm(__data_),
detail::get_write_time(__data_)));
return true;
}

Expand Down
15 changes: 11 additions & 4 deletions libcxx/src/filesystem/file_descriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,11 +97,18 @@ inline uintmax_t get_file_size(const WIN32_FIND_DATAW& data) {
return (static_cast<uint64_t>(data.nFileSizeHigh) << 32) + data.nFileSizeLow;
}
inline file_time_type get_write_time(const WIN32_FIND_DATAW& data) {
ULARGE_INTEGER tmp;
using detail::fs_time;
const FILETIME& time = data.ftLastWriteTime;
tmp.u.LowPart = time.dwLowDateTime;
tmp.u.HighPart = time.dwHighDateTime;
return file_time_type(file_time_type::duration(tmp.QuadPart));
auto ts = filetime_to_timespec(time);
if (!fs_time::is_representable(ts))
return file_time_type::min();
return fs_time::convert_from_timespec(ts);
}
inline perms get_file_perm(const WIN32_FIND_DATAW& data) {
unsigned st_mode = 0555; // Read-only
if (!(data.dwFileAttributes & FILE_ATTRIBUTE_READONLY))
st_mode |= 0222; // Write
return static_cast<perms>(st_mode) & perms::mask;
}

#endif // !_LIBCPP_WIN32API
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

// REQUIRES: can-create-symlinks
// UNSUPPORTED: c++03, c++11, c++14
// UNSUPPORTED: no-filesystem
// UNSUPPORTED: availability-filesystem-missing

// <filesystem>

// recursive_directory_iterator

#include <filesystem>
#include <type_traits>
#include <set>
#include <cassert>

#include "test_macros.h"
#include "filesystem_test_helper.h"
namespace fs = std::filesystem;

#if defined(_WIN32)
static void set_last_write_time_in_iteration(const fs::path& dir) {
// Windows can postpone updating last write time for file especially for
// directory because last write time of directory depends of its childs.
// See
// https://learn.microsoft.com/en-us/windows/win32/sysinfo/file-times
// To force updating file entries calls "last_write_time" with own value.
const fs::recursive_directory_iterator end_it{};

std::error_code ec;
fs::recursive_directory_iterator it(dir, ec);
assert(!ec);

fs::file_time_type now_time = fs::file_time_type::clock::now();
for (; it != end_it; ++it) {
const fs::path entry = *it;
fs::last_write_time(entry, now_time, ec);
assert(!ec);
}

assert(it == end_it);
}

struct directory_entry_and_values {
fs::directory_entry entry;

fs::file_status symlink_status;
fs::file_status status;
std::uintmax_t file_size;
fs::file_time_type last_write_time;
};

std::vector<directory_entry_and_values>
get_directory_entries_for(const fs::path& dir, const std::set<fs::path>& dir_contents) {
const fs::recursive_directory_iterator end_it{};

std::error_code ec;
fs::recursive_directory_iterator it(dir, ec);
assert(!ec);

std::vector<directory_entry_and_values> dir_entries;
std::set<fs::path> unseen_entries = dir_contents;
while (!unseen_entries.empty()) {
assert(it != end_it);
const fs::directory_entry& entry = *it;

assert(unseen_entries.erase(entry.path()) == 1);

dir_entries.push_back(directory_entry_and_values{
.entry = entry,
.symlink_status = entry.symlink_status(),
.status = entry.status(),
.file_size = entry.is_regular_file() ? entry.file_size() : 0,
.last_write_time = entry.last_write_time()});

fs::recursive_directory_iterator& it_ref = it.increment(ec);
assert(!ec);
assert(&it_ref == &it);
}
return dir_entries;
}
#endif // _WIN32

// Checks that the directory_entry properties will be the same before and after
// calling "refresh" in case of iteration.
// In case of Windows expects that directory_entry caches the properties during
// iteration.
static void test_cache_and_refresh_in_iteration() {
static_test_env static_env;
const fs::path test_dir = static_env.Dir;
#if defined(_WIN32)
set_last_write_time_in_iteration(test_dir);
#endif
const std::set<fs::path> dir_contents(static_env.RecDirIterationList.begin(), static_env.RecDirIterationList.end());
const fs::recursive_directory_iterator end_it{};

std::error_code ec;
fs::recursive_directory_iterator it(test_dir, ec);
assert(!ec);

std::set<fs::path> unseen_entries = dir_contents;
while (!unseen_entries.empty()) {
assert(it != end_it);
const fs::directory_entry& entry = *it;

assert(unseen_entries.erase(entry.path()) == 1);

fs::file_status symlink_status = entry.symlink_status();
fs::file_status status = entry.status();
std::uintmax_t file_size = entry.is_regular_file() ? entry.file_size() : 0;
fs::file_time_type last_write_time = entry.last_write_time();

fs::directory_entry mutable_entry = *it;
mutable_entry.refresh();
fs::file_status upd_symlink_status = mutable_entry.symlink_status();
fs::file_status upd_status = mutable_entry.status();
std::uintmax_t upd_file_size = mutable_entry.is_regular_file() ? mutable_entry.file_size() : 0;
fs::file_time_type upd_last_write_time = mutable_entry.last_write_time();
assert(upd_symlink_status.type() == symlink_status.type() &&
upd_symlink_status.permissions() == symlink_status.permissions());
assert(upd_status.type() == status.type() && upd_status.permissions() == status.permissions());
assert(upd_file_size == file_size);
assert(upd_last_write_time == last_write_time);

fs::recursive_directory_iterator& it_ref = it.increment(ec);
assert(!ec);
assert(&it_ref == &it);
}
}

#if defined(_WIN32)
// In case of Windows expects that the directory_entry caches the properties
// during iteration and the properties don't change after deleting folders
// and files.
static void test_cached_values_in_iteration() {
std::vector<directory_entry_and_values> dir_entries;
{
static_test_env static_env;
const fs::path testDir = static_env.Dir;
set_last_write_time_in_iteration(testDir);
const std::set<fs::path> dir_contents(static_env.RecDirIterationList.begin(), static_env.RecDirIterationList.end());
dir_entries = get_directory_entries_for(testDir, dir_contents);
}
// Testing folder should be deleted after destoying static_test_env.

for (const auto& dir_entry : dir_entries) {
// During iteration Windows provides information only about symlink itself
// not about file/folder which symlink points to.
if (dir_entry.entry.is_symlink()) {
// Check that symlink is not using cached value about existing file.
assert(!dir_entry.entry.exists());
} else {
// Check that entry uses cached value about existing file.
assert(dir_entry.entry.exists());
}
fs::file_status symlink_status = dir_entry.entry.symlink_status();
assert(dir_entry.symlink_status.type() == symlink_status.type() &&
dir_entry.symlink_status.permissions() == symlink_status.permissions());

if (!dir_entry.entry.is_symlink()) {
fs::file_status status = dir_entry.entry.status();
assert(dir_entry.status.type() == status.type() && dir_entry.status.permissions() == status.permissions());

std::uintmax_t file_size = dir_entry.entry.is_regular_file() ? dir_entry.entry.file_size() : 0;
assert(dir_entry.file_size == file_size);

fs::file_time_type last_write_time = dir_entry.entry.last_write_time();
assert(dir_entry.last_write_time == last_write_time);
}
}
}
#endif // _WIN32

int main(int, char**) {
test_cache_and_refresh_in_iteration();
#if defined(_WIN32)
test_cached_values_in_iteration();
#endif

return 0;
}
Loading