Skip to content

Commit de66b4c

Browse files
committed
feat: status now supports filters.
This is important as it allows to streaming-read from the worktree and correctly change, for example, `git-lfs` files back into their manifests, and to arrive at the correct hash.
1 parent ffcb110 commit de66b4c

File tree

9 files changed

+405
-195
lines changed

9 files changed

+405
-195
lines changed

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

gix-status/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ gix-object = { version = "^0.37.0", path = "../gix-object" }
2121
gix-path = { version = "^0.10.0", path = "../gix-path" }
2222
gix-features = { version = "^0.35.0", path = "../gix-features" }
2323
gix-pathspec = { version = "^0.3.0", path = "../gix-pathspec" }
24+
gix-filter = { version = "^0.5.0", path = "../gix-filter" }
25+
gix-worktree = { version = "^0.26.0", path = "../gix-worktree", default-features = false, features = ["attributes"] }
2426

2527
thiserror = "1.0.26"
2628
filetime = "0.2.15"

gix-status/src/index_as_worktree/function.rs

Lines changed: 148 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,17 @@ use std::{io, marker::PhantomData, path::Path};
44
use bstr::BStr;
55
use filetime::FileTime;
66
use gix_features::parallel::{in_parallel_if, Reduce};
7+
use gix_filter::pipeline::convert::ToGitOutcome;
78

9+
use crate::index_as_worktree::traits::read_data::Stream;
810
use crate::{
911
index_as_worktree::{
1012
traits,
1113
traits::{CompareBlobs, SubmoduleStatus},
1214
types::{Error, Options},
1315
Change, Outcome, VisitEntry,
1416
},
15-
read, Pathspec,
17+
Pathspec, SymlinkCheck,
1618
};
1719

1820
/// Calculates the changes that need to be applied to an `index` to match the state of the `worktree` and makes them
@@ -24,6 +26,8 @@ use crate::{
2426
/// the current time for which it will be considered fresh as long as it is included which depends on `pathspec`.
2527
///
2628
/// `should_interrupt` can be used to stop all processing.
29+
/// `filter` is used to convert worktree files back to their internal git representation. For this to be correct,
30+
/// [`Options::attributes`] must be configured as well.
2731
///
2832
/// ### Note
2933
///
@@ -45,8 +49,9 @@ pub fn index_as_worktree<'index, T, U, Find, E1, E2>(
4549
find: Find,
4650
progress: &mut dyn gix_features::progress::Progress,
4751
pathspec: impl Pathspec + Send + Clone,
52+
filter: gix_filter::Pipeline,
4853
should_interrupt: &AtomicBool,
49-
options: Options,
54+
mut options: Options,
5055
) -> Result<Outcome, Error>
5156
where
5257
T: Send,
@@ -70,6 +75,14 @@ where
7075
let range = index
7176
.prefixed_entries_range(pathspec.common_prefix())
7277
.unwrap_or(0..index.entries().len());
78+
79+
let stack = gix_worktree::Stack::from_state_and_ignore_case(
80+
worktree,
81+
options.fs.ignore_case,
82+
gix_worktree::stack::State::AttributesStack(std::mem::take(&mut options.attributes)),
83+
index,
84+
index.path_backing(),
85+
);
7386
let (entries, path_backing) = index.entries_mut_and_pathbacking();
7487
let mut num_entries = entries.len();
7588
let entries = &mut entries[range];
@@ -87,48 +100,51 @@ where
87100
progress.init(entries.len().into(), gix_features::progress::count("files"));
88101
let count = progress.counter();
89102

103+
let new_state = {
104+
let options = &options;
105+
let (skipped_by_pathspec, skipped_by_entry_flags) = (&skipped_by_pathspec, &skipped_by_entry_flags);
106+
let (symlink_metadata_calls, entries_updated) = (&symlink_metadata_calls, &entries_updated);
107+
let (racy_clean, worktree_bytes) = (&racy_clean, &worktree_bytes);
108+
let (worktree_reads, odb_bytes, odb_reads) = (&worktree_reads, &odb_bytes, &odb_reads);
109+
move |_| {
110+
(
111+
State {
112+
buf: Vec::new(),
113+
buf2: Vec::new(),
114+
attr_stack: stack,
115+
path_stack: SymlinkCheck::new(worktree.into()),
116+
timestamp,
117+
path_backing,
118+
filter,
119+
options,
120+
121+
skipped_by_pathspec,
122+
skipped_by_entry_flags,
123+
symlink_metadata_calls,
124+
entries_updated,
125+
racy_clean,
126+
worktree_reads,
127+
worktree_bytes,
128+
odb_reads,
129+
odb_bytes,
130+
},
131+
compare,
132+
submodule,
133+
find,
134+
pathspec,
135+
)
136+
}
137+
};
90138
in_parallel_if(
91139
|| true, // TODO: heuristic: when is parallelization not worth it? Git says 500 items per thread, but to 20 threads, we can be more fine-grained though.
92140
gix_features::interrupt::Iter::new(entries.chunks_mut(chunk_size), should_interrupt),
93141
thread_limit,
94-
{
95-
let options = &options;
96-
let (skipped_by_pathspec, skipped_by_entry_flags) = (&skipped_by_pathspec, &skipped_by_entry_flags);
97-
let (symlink_metadata_calls, entries_updated) = (&symlink_metadata_calls, &entries_updated);
98-
let (racy_clean, worktree_bytes) = (&racy_clean, &worktree_bytes);
99-
let (worktree_reads, odb_bytes, odb_reads) = (&worktree_reads, &odb_bytes, &odb_reads);
100-
move |_| {
101-
(
102-
State {
103-
buf: Vec::new(),
104-
odb_buf: Vec::new(),
105-
path_stack: crate::SymlinkCheck::new(worktree.to_owned()),
106-
timestamp,
107-
path_backing,
108-
options,
109-
110-
skipped_by_pathspec,
111-
skipped_by_entry_flags,
112-
symlink_metadata_calls,
113-
entries_updated,
114-
racy_clean,
115-
worktree_reads,
116-
worktree_bytes,
117-
odb_reads,
118-
odb_bytes,
119-
},
120-
compare,
121-
submodule,
122-
find,
123-
pathspec,
124-
)
125-
}
126-
},
142+
new_state,
127143
|entries, (state, blobdiff, submdule, find, pathspec)| {
128144
entries
129145
.iter_mut()
130146
.filter_map(|entry| {
131-
let res = state.process(entry, blobdiff, submdule, find, pathspec);
147+
let res = state.process(entry, pathspec, blobdiff, submdule, find);
132148
count.fetch_add(1, Ordering::Relaxed);
133149
res
134150
})
@@ -158,9 +174,16 @@ where
158174

159175
struct State<'a, 'b> {
160176
buf: Vec<u8>,
161-
odb_buf: Vec<u8>,
177+
buf2: Vec<u8>,
162178
timestamp: FileTime,
163-
path_stack: crate::SymlinkCheck,
179+
/// This is the cheap stack that only assure that we don't go through symlinks.
180+
/// It's always used to get the path to perform an lstat on.
181+
path_stack: SymlinkCheck,
182+
/// This is the expensive stack that will need to check for `.gitattributes` files each time
183+
/// it changes directory. It's only used when we know we have to read a worktree file, which in turn
184+
/// requires attributes to drive the filter configuration.
185+
attr_stack: gix_worktree::Stack,
186+
filter: gix_filter::Pipeline,
164187
path_backing: &'b [u8],
165188
options: &'a Options,
166189

@@ -181,10 +204,10 @@ impl<'index> State<'_, 'index> {
181204
fn process<T, U, Find, E1, E2>(
182205
&mut self,
183206
entry: &'index mut gix_index::Entry,
207+
pathspec: &mut impl Pathspec,
184208
diff: &mut impl CompareBlobs<Output = T>,
185209
submodule: &mut impl SubmoduleStatus<Output = U, Error = E2>,
186210
find: &mut Find,
187-
pathspec: &mut impl Pathspec,
188211
) -> Option<StatusResult<'index, T, U>>
189212
where
190213
E1: std::error::Error + Send + Sync + 'static,
@@ -266,10 +289,9 @@ impl<'index> State<'_, 'index> {
266289
E2: std::error::Error + Send + Sync + 'static,
267290
Find: for<'a> FnMut(&gix_hash::oid, &'a mut Vec<u8>) -> Result<gix_object::BlobRef<'a>, E1>,
268291
{
269-
let worktree_path = gix_path::try_from_bstr(rela_path).map_err(|_| Error::IllformedUtf8)?;
270-
let worktree_path = match self.path_stack.verified_path(worktree_path.as_ref()) {
292+
let worktree_path = match self.path_stack.verified_path(gix_path::from_bstr(rela_path).as_ref()) {
271293
Ok(path) => path,
272-
Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(Some(Change::Removed)),
294+
Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(Some(Change::Removed)),
273295
Err(err) => return Err(Error::Io(err)),
274296
};
275297
self.symlink_metadata_calls.fetch_add(1, Ordering::Relaxed);
@@ -336,27 +358,24 @@ impl<'index> State<'_, 'index> {
336358
}
337359

338360
self.buf.clear();
339-
let read_file = WorktreeBlob {
361+
self.buf2.clear();
362+
let fetch_data = ReadDataImpl {
340363
buf: &mut self.buf,
341364
path: worktree_path,
365+
rela_path,
342366
entry,
367+
file_len: metadata.len(),
368+
filter: &mut self.filter,
369+
attr_stack: &mut self.attr_stack,
343370
options: self.options,
344-
};
345-
self.odb_buf.clear();
346-
let read_blob = OdbBlob {
347-
buf: &mut self.odb_buf,
348371
id: &entry.id,
349372
find,
373+
worktree_reads: self.worktree_reads,
374+
worktree_bytes: self.worktree_bytes,
375+
odb_reads: self.odb_reads,
376+
odb_bytes: self.odb_bytes,
350377
};
351-
let content_change = diff.compare_blobs(entry, metadata.len() as usize, read_file, read_blob)?;
352-
if !self.buf.is_empty() {
353-
self.worktree_reads.fetch_add(1, Ordering::Relaxed);
354-
self.worktree_bytes.fetch_add(self.buf.len() as u64, Ordering::Relaxed);
355-
}
356-
if !self.odb_buf.is_empty() {
357-
self.odb_reads.fetch_add(1, Ordering::Relaxed);
358-
self.odb_bytes.fetch_add(self.odb_buf.len() as u64, Ordering::Relaxed);
359-
}
378+
let content_change = diff.compare_blobs(entry, metadata.len(), fetch_data, &mut self.buf2)?;
360379
// This file is racy clean! Set the size to 0 so we keep detecting this as the file is updated.
361380
if content_change.is_some() && racy_clean {
362381
entry.stat.size = 0;
@@ -404,43 +423,91 @@ impl<'index, T, U, C: VisitEntry<'index, ContentChange = T, SubmoduleStatus = U>
404423
}
405424
}
406425

407-
struct WorktreeBlob<'a> {
408-
buf: &'a mut Vec<u8>,
409-
path: &'a Path,
410-
entry: &'a gix_index::Entry,
411-
options: &'a Options,
412-
}
413-
414-
struct OdbBlob<'a, Find, E>
426+
struct ReadDataImpl<'a, Find, E>
415427
where
416428
E: std::error::Error + Send + Sync + 'static,
417-
Find: FnMut(&gix_hash::oid, &'a mut Vec<u8>) -> Result<gix_object::BlobRef<'a>, E>,
429+
Find: for<'b> FnMut(&gix_hash::oid, &'b mut Vec<u8>) -> Result<gix_object::BlobRef<'b>, E>,
418430
{
419431
buf: &'a mut Vec<u8>,
432+
path: &'a Path,
433+
rela_path: &'a BStr,
434+
file_len: u64,
435+
entry: &'a gix_index::Entry,
436+
filter: &'a mut gix_filter::Pipeline,
437+
attr_stack: &'a mut gix_worktree::Stack,
438+
options: &'a Options,
420439
id: &'a gix_hash::oid,
421440
find: Find,
441+
worktree_bytes: &'a AtomicU64,
442+
worktree_reads: &'a AtomicUsize,
443+
odb_bytes: &'a AtomicU64,
444+
odb_reads: &'a AtomicUsize,
422445
}
423446

424-
impl<'a> traits::ReadDataOnce<'a> for WorktreeBlob<'a> {
425-
fn read_data(self) -> Result<&'a [u8], Error> {
426-
let res = read::data_to_buf_with_meta(
427-
self.path,
428-
self.buf,
429-
self.entry.mode == gix_index::entry::Mode::SYMLINK,
430-
&self.options.fs,
431-
)?;
432-
Ok(res)
433-
}
434-
}
435-
436-
impl<'a, Find, E> traits::ReadDataOnce<'a> for OdbBlob<'a, Find, E>
447+
impl<'a, Find, E> traits::ReadData<'a> for ReadDataImpl<'a, Find, E>
437448
where
438449
E: std::error::Error + Send + Sync + 'static,
439-
Find: FnMut(&gix_hash::oid, &'a mut Vec<u8>) -> Result<gix_object::BlobRef<'a>, E>,
450+
Find: for<'b> FnMut(&gix_hash::oid, &'b mut Vec<u8>) -> Result<gix_object::BlobRef<'b>, E>,
440451
{
441-
fn read_data(mut self) -> Result<&'a [u8], Error> {
452+
fn read_blob(mut self) -> Result<&'a [u8], Error> {
442453
(self.find)(self.id, self.buf)
443-
.map(|b| b.data)
454+
.map(|b| {
455+
self.odb_reads.fetch_add(1, Ordering::Relaxed);
456+
self.odb_bytes.fetch_add(b.data.len() as u64, Ordering::Relaxed);
457+
b.data
458+
})
444459
.map_err(move |err| Error::Find(Box::new(err)))
445460
}
461+
462+
fn stream_worktree_file(mut self) -> Result<Stream<'a>, Error> {
463+
self.buf.clear();
464+
// symlinks are only stored as actual symlinks if the FS supports it otherwise they are just
465+
// normal files with their content equal to the linked path (so can be read normally)
466+
//
467+
let is_symlink = self.entry.mode == gix_index::entry::Mode::SYMLINK;
468+
// TODO: what to do about precompose unicode and ignore_case for symlinks
469+
let out = if is_symlink && self.options.fs.symlink {
470+
// conversion to bstr can never fail because symlinks are only used
471+
// on unix (by git) so no reason to use the try version here
472+
let symlink_path = gix_path::into_bstr(std::fs::read_link(self.path)?);
473+
self.buf.extend_from_slice(&symlink_path);
474+
self.worktree_bytes.fetch_add(self.buf.len() as u64, Ordering::Relaxed);
475+
Stream {
476+
inner: ToGitOutcome::Buffer(self.buf),
477+
bytes: None,
478+
len: None,
479+
}
480+
} else {
481+
self.buf.clear();
482+
let platform = self.attr_stack.at_entry(self.rela_path, Some(false), &mut self.find)?;
483+
let file = std::fs::File::open(self.path)?;
484+
let out = self
485+
.filter
486+
.convert_to_git(
487+
file,
488+
self.path,
489+
&mut |_path, attrs| {
490+
platform.matching_attributes(attrs);
491+
},
492+
&mut |buf| {
493+
(self.find)(self.id, buf)
494+
.map(|_| Some(()))
495+
.map_err(|err| Box::new(err) as Box<dyn std::error::Error + Send + Sync + 'static>)
496+
},
497+
)
498+
.map_err(|err| io::Error::new(io::ErrorKind::Other, err))?;
499+
let len = match out {
500+
ToGitOutcome::Unchanged(_) => Some(self.file_len),
501+
ToGitOutcome::Process(_) | ToGitOutcome::Buffer(_) => None,
502+
};
503+
Stream {
504+
inner: out,
505+
bytes: Some(self.worktree_bytes),
506+
len,
507+
}
508+
};
509+
510+
self.worktree_reads.fetch_add(1, Ordering::Relaxed);
511+
Ok(out)
512+
}
446513
}

0 commit comments

Comments
 (0)