Skip to content

Commit 5683850

Browse files
committed
feat!: Support precompose_unicode flag in fs::walkdir types.
1 parent ed9baa7 commit 5683850

File tree

3 files changed

+238
-20
lines changed

3 files changed

+238
-20
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

gix-features/Cargo.toml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ progress-unit-human-numbers = ["prodash?/unit-human"]
2525
progress-unit-bytes = ["dep:bytesize", "prodash?/unit-bytes"]
2626

2727
## If set, walkdir iterators will be multi-threaded.
28-
fs-walkdir-parallel = [ "dep:jwalk" ]
28+
fs-walkdir-parallel = [ "dep:jwalk", "dep:gix-utils" ]
2929

3030
## Implement `tracing` with `tracing-core`, which provides applications with valuable performance details if they opt-in to it.
3131
##
@@ -47,7 +47,7 @@ parallel = ["dep:crossbeam-channel",
4747
once_cell = ["dep:once_cell"]
4848
## Makes facilities of the `walkdir` crate partially available.
4949
## In conjunction with the **parallel** feature, directory walking will be parallel instead behind a compatible interface.
50-
walkdir = ["dep:walkdir"]
50+
walkdir = ["dep:walkdir", "dep:gix-utils"]
5151
#* an in-memory unidirectional pipe using `bytes` as efficient transfer mechanism.
5252
io-pipe = ["dep:bytes"]
5353
## provide a proven and fast `crc32` implementation.
@@ -117,6 +117,9 @@ required-features = ["io-pipe"]
117117
gix-hash = { version = "^0.14.1", path = "../gix-hash" }
118118
gix-trace = { version = "^0.1.6", path = "../gix-trace" }
119119

120+
# for walkdir
121+
gix-utils = { version = "^0.1.8", path = "../gix-utils", optional = true }
122+
120123
# 'parallel' feature
121124
crossbeam-channel = { version = "0.5.0", optional = true }
122125
parking_lot = { version = "0.12.0", default-features = false, optional = true }

gix-features/src/fs.rs

Lines changed: 232 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -21,17 +21,152 @@ mod shared {
2121
}
2222
}
2323

24+
#[cfg(any(feature = "walkdir", feature = "fs-walkdir-parallel"))]
25+
mod walkdir_precompose {
26+
use std::borrow::Cow;
27+
use std::ffi::OsStr;
28+
use std::path::Path;
29+
30+
pub struct DirEntry<T> {
31+
inner: T,
32+
precompose_unicode: bool,
33+
}
34+
35+
pub trait DirEntryApi {
36+
fn path(&self) -> Cow<'_, Path>;
37+
fn file_name(&self) -> Cow<'_, OsStr>;
38+
fn file_type(&self) -> std::fs::FileType;
39+
}
40+
41+
impl<T: DirEntryApi> DirEntry<T> {
42+
/// Obtain the full path of this entry, possibly with precomposed unicode if enabled.
43+
///
44+
/// Note that decomposing filesystem like those made by Apple accept both precomposed and
45+
/// decomposed names, and consider them equal.
46+
pub fn path(&self) -> Cow<'_, Path> {
47+
let path = self.inner.path();
48+
if self.precompose_unicode {
49+
gix_utils::str::precompose_path(path)
50+
} else {
51+
path
52+
}
53+
}
54+
55+
/// Obtain filen name of this entry, possibly with precomposed unicode if enabled.
56+
pub fn file_name(&self) -> Cow<'_, OsStr> {
57+
let name = self.inner.file_name();
58+
if self.precompose_unicode {
59+
gix_utils::str::precompose_os_string(name)
60+
} else {
61+
name
62+
}
63+
}
64+
65+
/// Return the file type for the file that this entry points to.
66+
///
67+
/// If `follow_links` was `true`, this is the file type of the item the link points to.
68+
pub fn file_type(&self) -> std::fs::FileType {
69+
self.inner.file_type()
70+
}
71+
}
72+
73+
/// A platform over entries in a directory, which may or may not precompose unicode after retrieving
74+
/// paths from the file system.
75+
pub struct WalkDir<T> {
76+
pub(crate) inner: Option<T>,
77+
pub(crate) precompose_unicode: bool,
78+
}
79+
80+
pub struct WalkDirIter<T, I, E>
81+
where
82+
T: Iterator<Item = Result<I, E>>,
83+
I: DirEntryApi,
84+
{
85+
pub(crate) inner: T,
86+
pub(crate) precompose_unicode: bool,
87+
}
88+
89+
impl<T, I, E> Iterator for WalkDirIter<T, I, E>
90+
where
91+
T: Iterator<Item = Result<I, E>>,
92+
I: DirEntryApi,
93+
{
94+
type Item = Result<DirEntry<I>, E>;
95+
96+
fn next(&mut self) -> Option<Self::Item> {
97+
self.inner.next().map(|res| {
98+
res.map(|entry| DirEntry {
99+
inner: entry,
100+
precompose_unicode: self.precompose_unicode,
101+
})
102+
})
103+
}
104+
}
105+
}
106+
24107
///
25108
#[cfg(feature = "fs-walkdir-parallel")]
26109
pub mod walkdir {
110+
use std::borrow::Cow;
111+
use std::ffi::OsStr;
112+
use std::fs::FileType;
27113
use std::path::Path;
28114

29-
pub use jwalk::{DirEntry as DirEntryGeneric, DirEntryIter as DirEntryIterGeneric, Error, WalkDir};
115+
use jwalk::WalkDir as WalkDirImpl;
116+
pub use jwalk::{DirEntry as DirEntryGeneric, DirEntryIter as DirEntryIterGeneric, Error};
30117

31118
pub use super::shared::Parallelism;
32119

33-
/// An alias for an uncustomized directory entry to match the one of the non-parallel version offered by `walkdir`.
34-
pub type DirEntry = DirEntryGeneric<((), ())>;
120+
type DirEntryImpl = DirEntryGeneric<((), ())>;
121+
122+
/// A directory entry returned by [DirEntryIter].
123+
pub type DirEntry = super::walkdir_precompose::DirEntry<DirEntryImpl>;
124+
/// A platform to create a [DirEntryIter] from.
125+
pub type WalkDir = super::walkdir_precompose::WalkDir<WalkDirImpl>;
126+
127+
impl super::walkdir_precompose::DirEntryApi for DirEntryImpl {
128+
fn path(&self) -> Cow<'_, Path> {
129+
self.path().into()
130+
}
131+
132+
fn file_name(&self) -> Cow<'_, OsStr> {
133+
self.file_name().into()
134+
}
135+
136+
fn file_type(&self) -> FileType {
137+
self.file_type()
138+
}
139+
}
140+
141+
impl IntoIterator for WalkDir {
142+
type Item = Result<DirEntry, jwalk::Error>;
143+
type IntoIter = DirEntryIter;
144+
145+
fn into_iter(self) -> Self::IntoIter {
146+
DirEntryIter {
147+
inner: self.inner.expect("always set (builder fix)").into_iter(),
148+
precompose_unicode: self.precompose_unicode,
149+
}
150+
}
151+
}
152+
153+
impl WalkDir {
154+
/// Set the minimum component depth of paths of entries.
155+
pub fn min_depth(mut self, min: usize) -> Self {
156+
self.inner = Some(self.inner.take().expect("always set").min_depth(min));
157+
self
158+
}
159+
/// Set the maximum component depth of paths of entries.
160+
pub fn max_depth(mut self, max: usize) -> Self {
161+
self.inner = Some(self.inner.take().expect("always set").max_depth(max));
162+
self
163+
}
164+
/// Follow symbolic links.
165+
pub fn follow_links(mut self, toggle: bool) -> Self {
166+
self.inner = Some(self.inner.take().expect("always set").follow_links(toggle));
167+
self
168+
}
169+
}
35170

36171
impl From<Parallelism> for jwalk::Parallelism {
37172
fn from(v: Parallelism) -> Self {
@@ -58,43 +193,122 @@ pub mod walkdir {
58193
}
59194

60195
/// Instantiate a new directory iterator which will not skip hidden files, with the given level of `parallelism`.
61-
pub fn walkdir_new(root: &Path, parallelism: Parallelism) -> WalkDir {
62-
WalkDir::new(root).skip_hidden(false).parallelism(parallelism.into())
196+
///
197+
/// Use `precompose_unicode` to represent the `core.precomposeUnicode` configuration option.
198+
pub fn walkdir_new(root: &Path, parallelism: Parallelism, precompose_unicode: bool) -> WalkDir {
199+
WalkDir {
200+
inner: WalkDirImpl::new(root)
201+
.skip_hidden(false)
202+
.parallelism(parallelism.into())
203+
.into(),
204+
precompose_unicode,
205+
}
63206
}
64207

65208
/// Instantiate a new directory iterator which will not skip hidden files and is sorted
66-
pub fn walkdir_sorted_new(root: &Path, parallelism: Parallelism) -> WalkDir {
67-
WalkDir::new(root)
68-
.skip_hidden(false)
69-
.sort(true)
70-
.parallelism(parallelism.into())
209+
///
210+
/// Use `precompose_unicode` to represent the `core.precomposeUnicode` configuration option.
211+
pub fn walkdir_sorted_new(root: &Path, parallelism: Parallelism, precompose_unicode: bool) -> WalkDir {
212+
WalkDir {
213+
inner: WalkDirImpl::new(root)
214+
.skip_hidden(false)
215+
.sort(true)
216+
.parallelism(parallelism.into())
217+
.into(),
218+
precompose_unicode,
219+
}
71220
}
72221

222+
type DirEntryIterImpl = DirEntryIterGeneric<((), ())>;
223+
73224
/// The Iterator yielding directory items
74-
pub type DirEntryIter = DirEntryIterGeneric<((), ())>;
225+
pub type DirEntryIter = super::walkdir_precompose::WalkDirIter<DirEntryIterImpl, DirEntryImpl, jwalk::Error>;
75226
}
76227

77-
#[cfg(all(feature = "walkdir", not(feature = "fs-walkdir-parallel")))]
78228
///
229+
#[cfg(all(feature = "walkdir", not(feature = "fs-walkdir-parallel")))]
79230
pub mod walkdir {
231+
use std::borrow::Cow;
232+
use std::ffi::OsStr;
233+
use std::fs::FileType;
80234
use std::path::Path;
81235

82-
pub use walkdir::{DirEntry, Error, WalkDir};
236+
pub use walkdir::Error;
237+
use walkdir::{DirEntry as DirEntryImpl, WalkDir as WalkDirImpl};
238+
239+
/// A directory entry returned by [DirEntryIter].
240+
pub type DirEntry = super::walkdir_precompose::DirEntry<DirEntryImpl>;
241+
/// A platform to create a [DirEntryIter] from.
242+
pub type WalkDir = super::walkdir_precompose::WalkDir<WalkDirImpl>;
83243

84244
pub use super::shared::Parallelism;
85245

246+
impl super::walkdir_precompose::DirEntryApi for DirEntryImpl {
247+
fn path(&self) -> Cow<'_, Path> {
248+
self.path().into()
249+
}
250+
251+
fn file_name(&self) -> Cow<'_, OsStr> {
252+
self.file_name().into()
253+
}
254+
255+
fn file_type(&self) -> FileType {
256+
self.file_type()
257+
}
258+
}
259+
260+
impl IntoIterator for WalkDir {
261+
type Item = Result<DirEntry, walkdir::Error>;
262+
type IntoIter = DirEntryIter;
263+
264+
fn into_iter(self) -> Self::IntoIter {
265+
DirEntryIter {
266+
inner: self.inner.expect("always set (builder fix)").into_iter(),
267+
precompose_unicode: self.precompose_unicode,
268+
}
269+
}
270+
}
271+
272+
impl WalkDir {
273+
/// Set the minimum component depth of paths of entries.
274+
pub fn min_depth(mut self, min: usize) -> Self {
275+
self.inner = Some(self.inner.take().expect("always set").min_depth(min));
276+
self
277+
}
278+
/// Set the maximum component depth of paths of entries.
279+
pub fn max_depth(mut self, max: usize) -> Self {
280+
self.inner = Some(self.inner.take().expect("always set").max_depth(max));
281+
self
282+
}
283+
/// Follow symbolic links.
284+
pub fn follow_links(mut self, toggle: bool) -> Self {
285+
self.inner = Some(self.inner.take().expect("always set").follow_links(toggle));
286+
self
287+
}
288+
}
289+
86290
/// Instantiate a new directory iterator which will not skip hidden files, with the given level of `parallelism`.
87-
pub fn walkdir_new(root: &Path, _: Parallelism) -> WalkDir {
88-
WalkDir::new(root)
291+
///
292+
/// Use `precompose_unicode` to represent the `core.precomposeUnicode` configuration option.
293+
pub fn walkdir_new(root: &Path, _: Parallelism, precompose_unicode: bool) -> WalkDir {
294+
WalkDir {
295+
inner: WalkDirImpl::new(root).into(),
296+
precompose_unicode,
297+
}
89298
}
90299

91300
/// Instantiate a new directory iterator which will not skip hidden files and is sorted, with the given level of `parallelism`.
92-
pub fn walkdir_sorted_new(root: &Path, _: Parallelism) -> WalkDir {
93-
WalkDir::new(root).sort_by_file_name()
301+
///
302+
/// Use `precompose_unicode` to represent the `core.precomposeUnicode` configuration option.
303+
pub fn walkdir_sorted_new(root: &Path, _: Parallelism, precompose_unicode: bool) -> WalkDir {
304+
WalkDir {
305+
inner: WalkDirImpl::new(root).sort_by_file_name().into(),
306+
precompose_unicode,
307+
}
94308
}
95309

96310
/// The Iterator yielding directory items
97-
pub type DirEntryIter = walkdir::IntoIter;
311+
pub type DirEntryIter = super::walkdir_precompose::WalkDirIter<walkdir::IntoIter, DirEntryImpl, walkdir::Error>;
98312
}
99313

100314
#[cfg(any(feature = "walkdir", feature = "fs-walkdir-parallel"))]

0 commit comments

Comments
 (0)