Skip to content

Commit 47e2fec

Browse files
committed
feat!: Support precompose_unicode flag in fs::walkdir types.
1 parent a7e606b commit 47e2fec

File tree

3 files changed

+246
-20
lines changed

3 files changed

+246
-20
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

gix-features/Cargo.toml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ progress-unit-human-numbers = ["prodash?/unit-human"]
2525
progress-unit-bytes = ["dep:bytesize", "prodash?/unit-bytes"]
2626

2727
## If set, walkdir iterators will be multi-threaded.
28-
fs-walkdir-parallel = [ "dep:jwalk" ]
28+
fs-walkdir-parallel = [ "dep:jwalk", "dep:gix-utils" ]
2929

3030
## Implement `tracing` with `tracing-core`, which provides applications with valuable performance details if they opt-in to it.
3131
##
@@ -47,7 +47,7 @@ parallel = ["dep:crossbeam-channel",
4747
once_cell = ["dep:once_cell"]
4848
## Makes facilities of the `walkdir` crate partially available.
4949
## In conjunction with the **parallel** feature, directory walking will be parallel instead behind a compatible interface.
50-
walkdir = ["dep:walkdir"]
50+
walkdir = ["dep:walkdir", "dep:gix-utils"]
5151
#* an in-memory unidirectional pipe using `bytes` as efficient transfer mechanism.
5252
io-pipe = ["dep:bytes"]
5353
## provide a proven and fast `crc32` implementation.
@@ -117,6 +117,9 @@ required-features = ["io-pipe"]
117117
gix-hash = { version = "^0.14.1", path = "../gix-hash" }
118118
gix-trace = { version = "^0.1.7", path = "../gix-trace" }
119119

120+
# for walkdir
121+
gix-utils = { version = "^0.1.8", path = "../gix-utils", optional = true }
122+
120123
# 'parallel' feature
121124
crossbeam-channel = { version = "0.5.0", optional = true }
122125
parking_lot = { version = "0.12.0", default-features = false, optional = true }

gix-features/src/fs.rs

Lines changed: 240 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -21,17 +21,160 @@ mod shared {
2121
}
2222
}
2323

24+
#[cfg(any(feature = "walkdir", feature = "fs-walkdir-parallel"))]
25+
mod walkdir_precompose {
26+
use std::borrow::Cow;
27+
use std::ffi::OsStr;
28+
use std::path::Path;
29+
30+
#[derive(Debug)]
31+
pub struct DirEntry<T: std::fmt::Debug> {
32+
inner: T,
33+
precompose_unicode: bool,
34+
}
35+
36+
impl<T: std::fmt::Debug> DirEntry<T> {
37+
/// Create a new instance.
38+
pub fn new(inner: T, precompose_unicode: bool) -> Self {
39+
Self {
40+
inner,
41+
precompose_unicode,
42+
}
43+
}
44+
}
45+
46+
pub trait DirEntryApi {
47+
fn path(&self) -> Cow<'_, Path>;
48+
fn file_name(&self) -> Cow<'_, OsStr>;
49+
fn file_type(&self) -> std::io::Result<std::fs::FileType>;
50+
}
51+
52+
impl<T: DirEntryApi + std::fmt::Debug> DirEntry<T> {
53+
/// Obtain the full path of this entry, possibly with precomposed unicode if enabled.
54+
///
55+
/// Note that decomposing filesystem like those made by Apple accept both precomposed and
56+
/// decomposed names, and consider them equal.
57+
pub fn path(&self) -> Cow<'_, Path> {
58+
let path = self.inner.path();
59+
if self.precompose_unicode {
60+
gix_utils::str::precompose_path(path)
61+
} else {
62+
path
63+
}
64+
}
65+
66+
/// Obtain filen name of this entry, possibly with precomposed unicode if enabled.
67+
pub fn file_name(&self) -> Cow<'_, OsStr> {
68+
let name = self.inner.file_name();
69+
if self.precompose_unicode {
70+
gix_utils::str::precompose_os_string(name)
71+
} else {
72+
name
73+
}
74+
}
75+
76+
/// Return the file type for the file that this entry points to.
77+
///
78+
/// If `follow_links` was `true`, this is the file type of the item the link points to.
79+
pub fn file_type(&self) -> std::io::Result<std::fs::FileType> {
80+
self.inner.file_type()
81+
}
82+
}
83+
84+
/// A platform over entries in a directory, which may or may not precompose unicode after retrieving
85+
/// paths from the file system.
86+
pub struct WalkDir<T> {
87+
pub(crate) inner: Option<T>,
88+
pub(crate) precompose_unicode: bool,
89+
}
90+
91+
pub struct WalkDirIter<T, I, E>
92+
where
93+
T: Iterator<Item = Result<I, E>>,
94+
I: DirEntryApi,
95+
{
96+
pub(crate) inner: T,
97+
pub(crate) precompose_unicode: bool,
98+
}
99+
100+
impl<T, I, E> Iterator for WalkDirIter<T, I, E>
101+
where
102+
T: Iterator<Item = Result<I, E>>,
103+
I: DirEntryApi + std::fmt::Debug,
104+
{
105+
type Item = Result<DirEntry<I>, E>;
106+
107+
fn next(&mut self) -> Option<Self::Item> {
108+
self.inner
109+
.next()
110+
.map(|res| res.map(|entry| DirEntry::new(entry, self.precompose_unicode)))
111+
}
112+
}
113+
}
114+
24115
///
25116
#[cfg(feature = "fs-walkdir-parallel")]
26117
pub mod walkdir {
118+
use std::borrow::Cow;
119+
use std::ffi::OsStr;
120+
use std::fs::FileType;
27121
use std::path::Path;
28122

29-
pub use jwalk::{DirEntry as DirEntryGeneric, DirEntryIter as DirEntryIterGeneric, Error, WalkDir};
123+
use jwalk::WalkDir as WalkDirImpl;
124+
pub use jwalk::{DirEntry as DirEntryGeneric, DirEntryIter as DirEntryIterGeneric, Error};
30125

31126
pub use super::shared::Parallelism;
32127

33-
/// An alias for an uncustomized directory entry to match the one of the non-parallel version offered by `walkdir`.
34-
pub type DirEntry = DirEntryGeneric<((), ())>;
128+
type DirEntryImpl = DirEntryGeneric<((), ())>;
129+
130+
/// A directory entry returned by [DirEntryIter].
131+
pub type DirEntry = super::walkdir_precompose::DirEntry<DirEntryImpl>;
132+
/// A platform to create a [DirEntryIter] from.
133+
pub type WalkDir = super::walkdir_precompose::WalkDir<WalkDirImpl>;
134+
135+
impl super::walkdir_precompose::DirEntryApi for DirEntryImpl {
136+
fn path(&self) -> Cow<'_, Path> {
137+
self.path().into()
138+
}
139+
140+
fn file_name(&self) -> Cow<'_, OsStr> {
141+
self.file_name().into()
142+
}
143+
144+
fn file_type(&self) -> std::io::Result<FileType> {
145+
Ok(self.file_type())
146+
}
147+
}
148+
149+
impl IntoIterator for WalkDir {
150+
type Item = Result<DirEntry, jwalk::Error>;
151+
type IntoIter = DirEntryIter;
152+
153+
fn into_iter(self) -> Self::IntoIter {
154+
DirEntryIter {
155+
inner: self.inner.expect("always set (builder fix)").into_iter(),
156+
precompose_unicode: self.precompose_unicode,
157+
}
158+
}
159+
}
160+
161+
impl WalkDir {
162+
/// Set the minimum component depth of paths of entries.
163+
pub fn min_depth(mut self, min: usize) -> Self {
164+
self.inner = Some(self.inner.take().expect("always set").min_depth(min));
165+
self
166+
}
167+
/// Set the maximum component depth of paths of entries.
168+
pub fn max_depth(mut self, max: usize) -> Self {
169+
self.inner = Some(self.inner.take().expect("always set").max_depth(max));
170+
self
171+
}
172+
/// Follow symbolic links.
173+
pub fn follow_links(mut self, toggle: bool) -> Self {
174+
self.inner = Some(self.inner.take().expect("always set").follow_links(toggle));
175+
self
176+
}
177+
}
35178

36179
impl From<Parallelism> for jwalk::Parallelism {
37180
fn from(v: Parallelism) -> Self {
@@ -58,43 +201,122 @@ pub mod walkdir {
58201
}
59202

60203
/// Instantiate a new directory iterator which will not skip hidden files, with the given level of `parallelism`.
61-
pub fn walkdir_new(root: &Path, parallelism: Parallelism) -> WalkDir {
62-
WalkDir::new(root).skip_hidden(false).parallelism(parallelism.into())
204+
///
205+
/// Use `precompose_unicode` to represent the `core.precomposeUnicode` configuration option.
206+
pub fn walkdir_new(root: &Path, parallelism: Parallelism, precompose_unicode: bool) -> WalkDir {
207+
WalkDir {
208+
inner: WalkDirImpl::new(root)
209+
.skip_hidden(false)
210+
.parallelism(parallelism.into())
211+
.into(),
212+
precompose_unicode,
213+
}
63214
}
64215

65216
/// Instantiate a new directory iterator which will not skip hidden files and is sorted
66-
pub fn walkdir_sorted_new(root: &Path, parallelism: Parallelism) -> WalkDir {
67-
WalkDir::new(root)
68-
.skip_hidden(false)
69-
.sort(true)
70-
.parallelism(parallelism.into())
217+
///
218+
/// Use `precompose_unicode` to represent the `core.precomposeUnicode` configuration option.
219+
pub fn walkdir_sorted_new(root: &Path, parallelism: Parallelism, precompose_unicode: bool) -> WalkDir {
220+
WalkDir {
221+
inner: WalkDirImpl::new(root)
222+
.skip_hidden(false)
223+
.sort(true)
224+
.parallelism(parallelism.into())
225+
.into(),
226+
precompose_unicode,
227+
}
71228
}
72229

230+
type DirEntryIterImpl = DirEntryIterGeneric<((), ())>;
231+
73232
/// The Iterator yielding directory items
74-
pub type DirEntryIter = DirEntryIterGeneric<((), ())>;
233+
pub type DirEntryIter = super::walkdir_precompose::WalkDirIter<DirEntryIterImpl, DirEntryImpl, jwalk::Error>;
75234
}
76235

77-
#[cfg(all(feature = "walkdir", not(feature = "fs-walkdir-parallel")))]
78236
///
237+
#[cfg(all(feature = "walkdir", not(feature = "fs-walkdir-parallel")))]
79238
pub mod walkdir {
239+
use std::borrow::Cow;
240+
use std::ffi::OsStr;
241+
use std::fs::FileType;
80242
use std::path::Path;
81243

82-
pub use walkdir::{DirEntry, Error, WalkDir};
244+
pub use walkdir::Error;
245+
use walkdir::{DirEntry as DirEntryImpl, WalkDir as WalkDirImpl};
246+
247+
/// A directory entry returned by [DirEntryIter].
248+
pub type DirEntry = super::walkdir_precompose::DirEntry<DirEntryImpl>;
249+
/// A platform to create a [DirEntryIter] from.
250+
pub type WalkDir = super::walkdir_precompose::WalkDir<WalkDirImpl>;
83251

84252
pub use super::shared::Parallelism;
85253

254+
impl super::walkdir_precompose::DirEntryApi for DirEntryImpl {
255+
fn path(&self) -> Cow<'_, Path> {
256+
self.path().into()
257+
}
258+
259+
fn file_name(&self) -> Cow<'_, OsStr> {
260+
self.file_name().into()
261+
}
262+
263+
fn file_type(&self) -> std::io::Result<FileType> {
264+
Ok(self.file_type())
265+
}
266+
}
267+
268+
impl IntoIterator for WalkDir {
269+
type Item = Result<DirEntry, walkdir::Error>;
270+
type IntoIter = DirEntryIter;
271+
272+
fn into_iter(self) -> Self::IntoIter {
273+
DirEntryIter {
274+
inner: self.inner.expect("always set (builder fix)").into_iter(),
275+
precompose_unicode: self.precompose_unicode,
276+
}
277+
}
278+
}
279+
280+
impl WalkDir {
281+
/// Set the minimum component depth of paths of entries.
282+
pub fn min_depth(mut self, min: usize) -> Self {
283+
self.inner = Some(self.inner.take().expect("always set").min_depth(min));
284+
self
285+
}
286+
/// Set the maximum component depth of paths of entries.
287+
pub fn max_depth(mut self, max: usize) -> Self {
288+
self.inner = Some(self.inner.take().expect("always set").max_depth(max));
289+
self
290+
}
291+
/// Follow symbolic links.
292+
pub fn follow_links(mut self, toggle: bool) -> Self {
293+
self.inner = Some(self.inner.take().expect("always set").follow_links(toggle));
294+
self
295+
}
296+
}
297+
86298
/// Instantiate a new directory iterator which will not skip hidden files, with the given level of `parallelism`.
87-
pub fn walkdir_new(root: &Path, _: Parallelism) -> WalkDir {
88-
WalkDir::new(root)
299+
///
300+
/// Use `precompose_unicode` to represent the `core.precomposeUnicode` configuration option.
301+
pub fn walkdir_new(root: &Path, _: Parallelism, precompose_unicode: bool) -> WalkDir {
302+
WalkDir {
303+
inner: WalkDirImpl::new(root).into(),
304+
precompose_unicode,
305+
}
89306
}
90307

91308
/// Instantiate a new directory iterator which will not skip hidden files and is sorted, with the given level of `parallelism`.
92-
pub fn walkdir_sorted_new(root: &Path, _: Parallelism) -> WalkDir {
93-
WalkDir::new(root).sort_by_file_name()
309+
///
310+
/// Use `precompose_unicode` to represent the `core.precomposeUnicode` configuration option.
311+
pub fn walkdir_sorted_new(root: &Path, _: Parallelism, precompose_unicode: bool) -> WalkDir {
312+
WalkDir {
313+
inner: WalkDirImpl::new(root).sort_by_file_name().into(),
314+
precompose_unicode,
315+
}
94316
}
95317

96318
/// The Iterator yielding directory items
97-
pub type DirEntryIter = walkdir::IntoIter;
319+
pub type DirEntryIter = super::walkdir_precompose::WalkDirIter<walkdir::IntoIter, DirEntryImpl, walkdir::Error>;
98320
}
99321

100322
#[cfg(any(feature = "walkdir", feature = "fs-walkdir-parallel"))]

0 commit comments

Comments
 (0)