Skip to content

Commit b1b9b7b

Browse files
authored
[libc++] Cache file attributes during directory iteration (#93316)
This patch adds caching of file attributes during directory iteration on Windows. This improves the performance when working with files being iterated on in a directory.
1 parent 09b231c commit b1b9b7b

File tree

4 files changed

+246
-12
lines changed

4 files changed

+246
-12
lines changed

libcxx/include/__filesystem/directory_entry.h

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,9 @@ class directory_entry {
202202
_IterNonSymlink,
203203
_RefreshSymlink,
204204
_RefreshSymlinkUnresolved,
205-
_RefreshNonSymlink
205+
_RefreshNonSymlink,
206+
_IterCachedSymlink,
207+
_IterCachedNonSymlink
206208
};
207209

208210
struct __cached_data {
@@ -241,6 +243,29 @@ class directory_entry {
241243
return __data;
242244
}
243245

246+
_LIBCPP_HIDE_FROM_ABI static __cached_data
247+
__create_iter_cached_result(file_type __ft, uintmax_t __size, perms __perm, file_time_type __write_time) {
248+
__cached_data __data;
249+
__data.__type_ = __ft;
250+
__data.__size_ = __size;
251+
__data.__write_time_ = __write_time;
252+
if (__ft == file_type::symlink)
253+
__data.__sym_perms_ = __perm;
254+
else
255+
__data.__non_sym_perms_ = __perm;
256+
__data.__cache_type_ = [&]() {
257+
switch (__ft) {
258+
case file_type::none:
259+
return _Empty;
260+
case file_type::symlink:
261+
return _IterCachedSymlink;
262+
default:
263+
return _IterCachedNonSymlink;
264+
}
265+
}();
266+
return __data;
267+
}
268+
244269
_LIBCPP_HIDE_FROM_ABI void __assign_iter_entry(_Path&& __p, __cached_data __dt) {
245270
__p_ = std::move(__p);
246271
__data_ = __dt;
@@ -282,29 +307,34 @@ class directory_entry {
282307
case _Empty:
283308
return __symlink_status(__p_, __ec).type();
284309
case _IterSymlink:
310+
case _IterCachedSymlink:
285311
case _RefreshSymlink:
286312
case _RefreshSymlinkUnresolved:
287313
if (__ec)
288314
__ec->clear();
289315
return file_type::symlink;
316+
case _IterCachedNonSymlink:
290317
case _IterNonSymlink:
291-
case _RefreshNonSymlink:
318+
case _RefreshNonSymlink: {
292319
file_status __st(__data_.__type_);
293320
if (__ec && !filesystem::exists(__st))
294321
*__ec = make_error_code(errc::no_such_file_or_directory);
295322
else if (__ec)
296323
__ec->clear();
297324
return __data_.__type_;
298325
}
326+
}
299327
__libcpp_unreachable();
300328
}
301329

302330
_LIBCPP_HIDE_FROM_ABI file_type __get_ft(error_code* __ec = nullptr) const {
303331
switch (__data_.__cache_type_) {
304332
case _Empty:
305333
case _IterSymlink:
334+
case _IterCachedSymlink:
306335
case _RefreshSymlinkUnresolved:
307336
return __status(__p_, __ec).type();
337+
case _IterCachedNonSymlink:
308338
case _IterNonSymlink:
309339
case _RefreshNonSymlink:
310340
case _RefreshSymlink: {
@@ -324,8 +354,10 @@ class directory_entry {
324354
case _Empty:
325355
case _IterNonSymlink:
326356
case _IterSymlink:
357+
case _IterCachedSymlink:
327358
case _RefreshSymlinkUnresolved:
328359
return __status(__p_, __ec);
360+
case _IterCachedNonSymlink:
329361
case _RefreshNonSymlink:
330362
case _RefreshSymlink:
331363
return file_status(__get_ft(__ec), __data_.__non_sym_perms_);
@@ -339,8 +371,10 @@ class directory_entry {
339371
case _IterNonSymlink:
340372
case _IterSymlink:
341373
return __symlink_status(__p_, __ec);
374+
case _IterCachedNonSymlink:
342375
case _RefreshNonSymlink:
343376
return file_status(__get_sym_ft(__ec), __data_.__non_sym_perms_);
377+
case _IterCachedSymlink:
344378
case _RefreshSymlink:
345379
case _RefreshSymlinkUnresolved:
346380
return file_status(__get_sym_ft(__ec), __data_.__sym_perms_);
@@ -353,8 +387,10 @@ class directory_entry {
353387
case _Empty:
354388
case _IterNonSymlink:
355389
case _IterSymlink:
390+
case _IterCachedSymlink:
356391
case _RefreshSymlinkUnresolved:
357392
return filesystem::__file_size(__p_, __ec);
393+
case _IterCachedNonSymlink:
358394
case _RefreshSymlink:
359395
case _RefreshNonSymlink: {
360396
error_code __m_ec;
@@ -375,6 +411,8 @@ class directory_entry {
375411
case _Empty:
376412
case _IterNonSymlink:
377413
case _IterSymlink:
414+
case _IterCachedNonSymlink:
415+
case _IterCachedSymlink:
378416
case _RefreshSymlinkUnresolved:
379417
return filesystem::__hard_link_count(__p_, __ec);
380418
case _RefreshSymlink:
@@ -393,8 +431,10 @@ class directory_entry {
393431
case _Empty:
394432
case _IterNonSymlink:
395433
case _IterSymlink:
434+
case _IterCachedSymlink:
396435
case _RefreshSymlinkUnresolved:
397436
return filesystem::__last_write_time(__p_, __ec);
437+
case _IterCachedNonSymlink:
398438
case _RefreshSymlink:
399439
case _RefreshNonSymlink: {
400440
error_code __m_ec;

libcxx/src/filesystem/directory_iterator.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -77,13 +77,13 @@ class __dir_stream {
7777
bool assign() {
7878
if (!wcscmp(__data_.cFileName, L".") || !wcscmp(__data_.cFileName, L".."))
7979
return false;
80-
// FIXME: Cache more of this
81-
// directory_entry::__cached_data cdata;
82-
// cdata.__type_ = get_file_type(__data_);
83-
// cdata.__size_ = get_file_size(__data_);
84-
// cdata.__write_time_ = get_write_time(__data_);
8580
__entry_.__assign_iter_entry(
86-
__root_ / __data_.cFileName, directory_entry::__create_iter_result(detail::get_file_type(__data_)));
81+
__root_ / __data_.cFileName,
82+
directory_entry::__create_iter_cached_result(
83+
detail::get_file_type(__data_),
84+
detail::get_file_size(__data_),
85+
detail::get_file_perm(__data_),
86+
detail::get_write_time(__data_)));
8787
return true;
8888
}
8989

libcxx/src/filesystem/file_descriptor.h

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -97,11 +97,18 @@ inline uintmax_t get_file_size(const WIN32_FIND_DATAW& data) {
9797
return (static_cast<uint64_t>(data.nFileSizeHigh) << 32) + data.nFileSizeLow;
9898
}
9999
inline file_time_type get_write_time(const WIN32_FIND_DATAW& data) {
100-
ULARGE_INTEGER tmp;
100+
using detail::fs_time;
101101
const FILETIME& time = data.ftLastWriteTime;
102-
tmp.u.LowPart = time.dwLowDateTime;
103-
tmp.u.HighPart = time.dwHighDateTime;
104-
return file_time_type(file_time_type::duration(tmp.QuadPart));
102+
auto ts = filetime_to_timespec(time);
103+
if (!fs_time::is_representable(ts))
104+
return file_time_type::min();
105+
return fs_time::convert_from_timespec(ts);
106+
}
107+
inline perms get_file_perm(const WIN32_FIND_DATAW& data) {
108+
unsigned st_mode = 0555; // Read-only
109+
if (!(data.dwFileAttributes & FILE_ATTRIBUTE_READONLY))
110+
st_mode |= 0222; // Write
111+
return static_cast<perms>(st_mode) & perms::mask;
105112
}
106113

107114
#endif // !_LIBCPP_WIN32API
Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
// REQUIRES: can-create-symlinks
10+
// UNSUPPORTED: c++03, c++11, c++14
11+
// UNSUPPORTED: no-filesystem
12+
// UNSUPPORTED: availability-filesystem-missing
13+
14+
// <filesystem>
15+
16+
// recursive_directory_iterator
17+
18+
#include <filesystem>
19+
#include <type_traits>
20+
#include <set>
21+
#include <cassert>
22+
23+
#include "test_macros.h"
24+
#include "filesystem_test_helper.h"
25+
namespace fs = std::filesystem;
26+
27+
#if defined(_WIN32)
28+
static void set_last_write_time_in_iteration(const fs::path& dir) {
29+
// Windows can postpone updating last write time for file especially for
30+
// directory because last write time of directory depends of its childs.
31+
// See
32+
// https://learn.microsoft.com/en-us/windows/win32/sysinfo/file-times
33+
// To force updating file entries calls "last_write_time" with own value.
34+
const fs::recursive_directory_iterator end_it{};
35+
36+
std::error_code ec;
37+
fs::recursive_directory_iterator it(dir, ec);
38+
assert(!ec);
39+
40+
fs::file_time_type now_time = fs::file_time_type::clock::now();
41+
for (; it != end_it; ++it) {
42+
const fs::path entry = *it;
43+
fs::last_write_time(entry, now_time, ec);
44+
assert(!ec);
45+
}
46+
47+
assert(it == end_it);
48+
}
49+
50+
struct directory_entry_and_values {
51+
fs::directory_entry entry;
52+
53+
fs::file_status symlink_status;
54+
fs::file_status status;
55+
std::uintmax_t file_size;
56+
fs::file_time_type last_write_time;
57+
};
58+
59+
std::vector<directory_entry_and_values>
60+
get_directory_entries_for(const fs::path& dir, const std::set<fs::path>& dir_contents) {
61+
const fs::recursive_directory_iterator end_it{};
62+
63+
std::error_code ec;
64+
fs::recursive_directory_iterator it(dir, ec);
65+
assert(!ec);
66+
67+
std::vector<directory_entry_and_values> dir_entries;
68+
std::set<fs::path> unseen_entries = dir_contents;
69+
while (!unseen_entries.empty()) {
70+
assert(it != end_it);
71+
const fs::directory_entry& entry = *it;
72+
73+
assert(unseen_entries.erase(entry.path()) == 1);
74+
75+
dir_entries.push_back(directory_entry_and_values{
76+
.entry = entry,
77+
.symlink_status = entry.symlink_status(),
78+
.status = entry.status(),
79+
.file_size = entry.is_regular_file() ? entry.file_size() : 0,
80+
.last_write_time = entry.last_write_time()});
81+
82+
fs::recursive_directory_iterator& it_ref = it.increment(ec);
83+
assert(!ec);
84+
assert(&it_ref == &it);
85+
}
86+
return dir_entries;
87+
}
88+
#endif // _WIN32
89+
90+
// Checks that the directory_entry properties will be the same before and after
91+
// calling "refresh" in case of iteration.
92+
// In case of Windows expects that directory_entry caches the properties during
93+
// iteration.
94+
static void test_cache_and_refresh_in_iteration() {
95+
static_test_env static_env;
96+
const fs::path test_dir = static_env.Dir;
97+
#if defined(_WIN32)
98+
set_last_write_time_in_iteration(test_dir);
99+
#endif
100+
const std::set<fs::path> dir_contents(static_env.RecDirIterationList.begin(), static_env.RecDirIterationList.end());
101+
const fs::recursive_directory_iterator end_it{};
102+
103+
std::error_code ec;
104+
fs::recursive_directory_iterator it(test_dir, ec);
105+
assert(!ec);
106+
107+
std::set<fs::path> unseen_entries = dir_contents;
108+
while (!unseen_entries.empty()) {
109+
assert(it != end_it);
110+
const fs::directory_entry& entry = *it;
111+
112+
assert(unseen_entries.erase(entry.path()) == 1);
113+
114+
fs::file_status symlink_status = entry.symlink_status();
115+
fs::file_status status = entry.status();
116+
std::uintmax_t file_size = entry.is_regular_file() ? entry.file_size() : 0;
117+
fs::file_time_type last_write_time = entry.last_write_time();
118+
119+
fs::directory_entry mutable_entry = *it;
120+
mutable_entry.refresh();
121+
fs::file_status upd_symlink_status = mutable_entry.symlink_status();
122+
fs::file_status upd_status = mutable_entry.status();
123+
std::uintmax_t upd_file_size = mutable_entry.is_regular_file() ? mutable_entry.file_size() : 0;
124+
fs::file_time_type upd_last_write_time = mutable_entry.last_write_time();
125+
assert(upd_symlink_status.type() == symlink_status.type() &&
126+
upd_symlink_status.permissions() == symlink_status.permissions());
127+
assert(upd_status.type() == status.type() && upd_status.permissions() == status.permissions());
128+
assert(upd_file_size == file_size);
129+
assert(upd_last_write_time == last_write_time);
130+
131+
fs::recursive_directory_iterator& it_ref = it.increment(ec);
132+
assert(!ec);
133+
assert(&it_ref == &it);
134+
}
135+
}
136+
137+
#if defined(_WIN32)
138+
// In case of Windows expects that the directory_entry caches the properties
139+
// during iteration and the properties don't change after deleting folders
140+
// and files.
141+
static void test_cached_values_in_iteration() {
142+
std::vector<directory_entry_and_values> dir_entries;
143+
{
144+
static_test_env static_env;
145+
const fs::path testDir = static_env.Dir;
146+
set_last_write_time_in_iteration(testDir);
147+
const std::set<fs::path> dir_contents(static_env.RecDirIterationList.begin(), static_env.RecDirIterationList.end());
148+
dir_entries = get_directory_entries_for(testDir, dir_contents);
149+
}
150+
// Testing folder should be deleted after destoying static_test_env.
151+
152+
for (const auto& dir_entry : dir_entries) {
153+
// During iteration Windows provides information only about symlink itself
154+
// not about file/folder which symlink points to.
155+
if (dir_entry.entry.is_symlink()) {
156+
// Check that symlink is not using cached value about existing file.
157+
assert(!dir_entry.entry.exists());
158+
} else {
159+
// Check that entry uses cached value about existing file.
160+
assert(dir_entry.entry.exists());
161+
}
162+
fs::file_status symlink_status = dir_entry.entry.symlink_status();
163+
assert(dir_entry.symlink_status.type() == symlink_status.type() &&
164+
dir_entry.symlink_status.permissions() == symlink_status.permissions());
165+
166+
if (!dir_entry.entry.is_symlink()) {
167+
fs::file_status status = dir_entry.entry.status();
168+
assert(dir_entry.status.type() == status.type() && dir_entry.status.permissions() == status.permissions());
169+
170+
std::uintmax_t file_size = dir_entry.entry.is_regular_file() ? dir_entry.entry.file_size() : 0;
171+
assert(dir_entry.file_size == file_size);
172+
173+
fs::file_time_type last_write_time = dir_entry.entry.last_write_time();
174+
assert(dir_entry.last_write_time == last_write_time);
175+
}
176+
}
177+
}
178+
#endif // _WIN32
179+
180+
int main(int, char**) {
181+
test_cache_and_refresh_in_iteration();
182+
#if defined(_WIN32)
183+
test_cached_values_in_iteration();
184+
#endif
185+
186+
return 0;
187+
}

0 commit comments

Comments
 (0)