Skip to content

Commit 0d01eb2

Browse files
committed
feat!: provide statistics at the end of a index status operation
1 parent 53de126 commit 0d01eb2

File tree

5 files changed

+302
-79
lines changed

5 files changed

+302
-79
lines changed

gix-status/src/index_as_worktree/function.rs

Lines changed: 81 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use std::sync::atomic::Ordering;
1+
use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
22
use std::{io, marker::PhantomData, path::Path};
33

44
use bstr::BStr;
@@ -10,21 +10,29 @@ use crate::{
1010
traits,
1111
traits::{CompareBlobs, SubmoduleStatus},
1212
types::{Error, Options},
13-
Change, VisitEntry,
13+
Change, Outcome, VisitEntry,
1414
},
1515
read, Pathspec,
1616
};
1717

1818
/// Calculates the changes that need to be applied to an `index` to match the state of the `worktree` and makes them
19-
/// observable in `collector`, along with information produced by `compare` which gets to see blobs that may have changes.
19+
/// observable in `collector`, along with information produced by `compare` which gets to see blobs that may have changes, and
20+
/// `submodule` which can take a look at submodules in detail to produce status information.
2021
/// `options` are used to configure the operation.
2122
///
2223
/// Note that `index` is updated with the latest seen stat information from the worktree, and its timestamp is adjusted to
23-
/// the current time for which it will be considered fresh.
24+
/// the current time for which it will be considered fresh as long as it is included which depends on `pathspec`.
2425
///
25-
/// Note that this isn't technically quite what this function does as this also provides some additional information,
26-
/// like whether a file has conflicts, and files that were added with `git add` are shown as a special
27-
/// changes despite not technically requiring a change to the index since `git add` already added the file to the index.
26+
/// ### Note
27+
///
28+
/// Technically, this function does more as this also provides additional information, like whether a file has conflicts,
29+
/// and files that were added with `git add` are shown as a special as well. It also updates index entry stats like `git status` would
30+
/// if it had to determine the hash. If that happened, the index should be written back, see [Outcome::skipped]
31+
/// The latter is a 'change' that is not technically requiring a change to the index since `git add` already added the
32+
/// file to the index, but didn't hash it.
33+
///
34+
/// Thus some care has to be taken to do the right thing when letting the index match the worktree by evaluating the changes observed
35+
/// by the `collector`.
2836
#[allow(clippy::too_many_arguments)]
2937
pub fn index_as_worktree<'index, T, U, Find, E1, E2>(
3038
index: &'index mut gix_index::State,
@@ -36,7 +44,7 @@ pub fn index_as_worktree<'index, T, U, Find, E1, E2>(
3644
progress: &mut dyn gix_features::progress::Progress,
3745
pathspec: impl Pathspec + Send + Clone,
3846
options: Options,
39-
) -> Result<(), Error>
47+
) -> Result<Outcome, Error>
4048
where
4149
T: Send,
4250
U: Send,
@@ -60,7 +68,18 @@ where
6068
.prefixed_entries_range(pathspec.common_prefix())
6169
.unwrap_or(0..index.entries().len());
6270
let (entries, path_backing) = index.entries_mut_and_pathbacking();
71+
let num_entries = entries.len();
6372
let entries = &mut entries[range];
73+
74+
let _span = gix_features::trace::detail!("gix_status::index_as_worktree",
75+
num_entries = entries.len(),
76+
chunk_size = chunk_size,
77+
thread_limit = ?thread_limit);
78+
79+
let entries_skipped_by_common_prefix = num_entries - entries.len();
80+
let (skipped_by_pathspec, skipped_by_entry_flags, symlink_metadata_calls, entries_updated) = Default::default();
81+
let (worktree_bytes, worktree_reads, odb_bytes, odb_reads, racy_clean) = Default::default();
82+
6483
progress.init(entries.len().into(), gix_features::progress::count("files"));
6584
let count = progress.counter();
6685

@@ -70,6 +89,10 @@ where
7089
thread_limit,
7190
{
7291
let options = &options;
92+
let (skipped_by_pathspec, skipped_by_entry_flags) = (&skipped_by_pathspec, &skipped_by_entry_flags);
93+
let (symlink_metadata_calls, entries_updated) = (&symlink_metadata_calls, &entries_updated);
94+
let (racy_clean, worktree_bytes) = (&racy_clean, &worktree_bytes);
95+
let (worktree_reads, odb_bytes, odb_reads) = (&worktree_reads, &odb_bytes, &odb_reads);
7396
move |_| {
7497
(
7598
State {
@@ -79,6 +102,16 @@ where
79102
timestamp,
80103
path_backing,
81104
options,
105+
106+
skipped_by_pathspec,
107+
skipped_by_entry_flags,
108+
symlink_metadata_calls,
109+
entries_updated,
110+
racy_clean,
111+
worktree_reads,
112+
worktree_bytes,
113+
odb_reads,
114+
odb_bytes,
82115
},
83116
compare,
84117
submodule,
@@ -101,7 +134,20 @@ where
101134
collector,
102135
phantom: PhantomData,
103136
},
104-
)
137+
)?;
138+
139+
Ok(Outcome {
140+
entries_skipped_by_common_prefix,
141+
entries_skipped_by_pathspec: skipped_by_pathspec.load(Ordering::Relaxed),
142+
entries_skipped_by_entry_flags: skipped_by_entry_flags.load(Ordering::Relaxed),
143+
entries_updated: entries_updated.load(Ordering::Relaxed),
144+
symlink_metadata_calls: symlink_metadata_calls.load(Ordering::Relaxed),
145+
racy_clean: racy_clean.load(Ordering::Relaxed),
146+
worktree_files_read: worktree_reads.load(Ordering::Relaxed),
147+
worktree_bytes: worktree_bytes.load(Ordering::Relaxed),
148+
odb_objects_read: odb_reads.load(Ordering::Relaxed),
149+
odb_bytes: odb_bytes.load(Ordering::Relaxed),
150+
})
105151
}
106152

107153
struct State<'a, 'b> {
@@ -111,6 +157,16 @@ struct State<'a, 'b> {
111157
path_stack: crate::SymlinkCheck,
112158
path_backing: &'b [u8],
113159
options: &'a Options,
160+
161+
skipped_by_pathspec: &'a AtomicUsize,
162+
skipped_by_entry_flags: &'a AtomicUsize,
163+
symlink_metadata_calls: &'a AtomicUsize,
164+
entries_updated: &'a AtomicUsize,
165+
racy_clean: &'a AtomicUsize,
166+
worktree_bytes: &'a AtomicU64,
167+
worktree_reads: &'a AtomicUsize,
168+
odb_bytes: &'a AtomicU64,
169+
odb_reads: &'a AtomicUsize,
114170
}
115171

116172
type StatusResult<'index, T, U> = Result<(&'index gix_index::Entry, &'index BStr, Option<Change<T, U>>, bool), Error>;
@@ -140,10 +196,12 @@ impl<'index> State<'_, 'index> {
140196
| gix_index::entry::Flags::ASSUME_VALID
141197
| gix_index::entry::Flags::FSMONITOR_VALID,
142198
) {
199+
self.skipped_by_entry_flags.fetch_add(1, Ordering::Relaxed);
143200
return None;
144201
}
145202
let path = entry.path_in(self.path_backing);
146203
if !pathspec.is_included(path, Some(false)) {
204+
self.skipped_by_pathspec.fetch_add(1, Ordering::Relaxed);
147205
return None;
148206
}
149207
let status = self.compute_status(&mut *entry, path, diff, submodule, find);
@@ -208,6 +266,7 @@ impl<'index> State<'_, 'index> {
208266
Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(Some(Change::Removed)),
209267
Err(err) => return Err(Error::Io(err)),
210268
};
269+
self.symlink_metadata_calls.fetch_add(1, Ordering::Relaxed);
211270
let metadata = match worktree_path.symlink_metadata() {
212271
Ok(metadata) if metadata.is_dir() => {
213272
// index entries are normally only for files/symlinks
@@ -265,21 +324,33 @@ impl<'index> State<'_, 'index> {
265324
racy_clean = new_stat.is_racy(self.timestamp, self.options.stat);
266325
if !racy_clean {
267326
return Ok(None);
327+
} else {
328+
self.racy_clean.fetch_add(1, Ordering::Relaxed);
268329
}
269330
}
270331

332+
self.buf.clear();
271333
let read_file = WorktreeBlob {
272334
buf: &mut self.buf,
273335
path: worktree_path,
274336
entry,
275337
options: self.options,
276338
};
339+
self.odb_buf.clear();
277340
let read_blob = OdbBlob {
278341
buf: &mut self.odb_buf,
279342
id: &entry.id,
280343
find,
281344
};
282345
let content_change = diff.compare_blobs(entry, metadata.len() as usize, read_file, read_blob)?;
346+
if !self.buf.is_empty() {
347+
self.worktree_reads.fetch_add(1, Ordering::Relaxed);
348+
self.worktree_bytes.fetch_add(self.buf.len() as u64, Ordering::Relaxed);
349+
}
350+
if !self.odb_buf.is_empty() {
351+
self.odb_reads.fetch_add(1, Ordering::Relaxed);
352+
self.odb_bytes.fetch_add(self.odb_buf.len() as u64, Ordering::Relaxed);
353+
}
283354
// This file is racy clean! Set the size to 0 so we keep detecting this as the file is updated.
284355
if content_change.is_some() && racy_clean {
285356
entry.stat.size = 0;
@@ -292,6 +363,7 @@ impl<'index> State<'_, 'index> {
292363
} else {
293364
// don't diff against this file next time since we know the file is unchanged.
294365
entry.stat = new_stat;
366+
self.entries_updated.fetch_add(1, Ordering::Relaxed);
295367
Ok(None)
296368
}
297369
}

gix-status/src/index_as_worktree/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//! Changes between an index and a worktree.
22
///
33
mod types;
4-
pub use types::{Change, Error, Options, VisitEntry};
4+
pub use types::{Change, Error, Options, Outcome, VisitEntry};
55

66
mod recorder;
77
pub use recorder::{Record, Recorder};

gix-status/src/index_as_worktree/types.rs

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use bstr::{BStr, BString};
22

3-
/// The error returned by [`status()`](crate::index_as_worktree()).
3+
/// The error returned by [index_as_worktree()`](crate::index_as_worktree()).
44
#[derive(Debug, thiserror::Error)]
55
#[allow(missing_docs)]
66
pub enum Error {
@@ -19,8 +19,8 @@ pub enum Error {
1919
},
2020
}
2121

22-
#[derive(Clone, Debug, Default)]
2322
/// Options that control how the index status with a worktree is computed.
23+
#[derive(Clone, Debug, Default)]
2424
pub struct Options {
2525
/// Capabilities of the file system which affect the status computation.
2626
pub fs: gix_fs::Capabilities,
@@ -32,6 +32,43 @@ pub struct Options {
3232
pub stat: gix_index::entry::stat::Options,
3333
}
3434

35+
/// Provide additional information collected during the runtime of [`index_as_worktree()`](crate::index_as_worktree()).
36+
#[derive(Clone, Debug, Default, Eq, PartialEq, Ord, PartialOrd)]
37+
pub struct Outcome {
38+
/// The amount of entries we didn't even traverse (and thus update with stat) due to a common prefix in pathspecs.
39+
/// This is similar to the current working directory.
40+
pub entries_skipped_by_common_prefix: usize,
41+
/// The amount of entries that were skipped due to exclusion by *pathspecs*.
42+
pub entries_skipped_by_pathspec: usize,
43+
/// The amount of entries that were skipped as the entry flag indicated this.
44+
pub entries_skipped_by_entry_flags: usize,
45+
/// The amount of times we queried symlink-metadata for a file on disk.
46+
pub symlink_metadata_calls: usize,
47+
/// The amount of entries whose stats have been updated as its modification couldn't be determined without an expensive calculation.
48+
///
49+
/// With these updates, this calculation will be avoided next time the status runs.
50+
pub entries_updated: usize,
51+
/// The amount of entries that were considered racy-clean - they will need thorough checking to see if they are truly clean,
52+
/// i.e. didn't change.
53+
pub racy_clean: usize,
54+
55+
/// The amount of bytes read from the worktree in order to determine if an entry changed, across all files.
56+
pub worktree_bytes: u64,
57+
/// The amount of files read in full from the worktree (and into memory).
58+
pub worktree_files_read: usize,
59+
/// The amount of bytes read from the object database in order to determine if an entry changed, across all objects.
60+
pub odb_bytes: u64,
61+
/// The amount of objects read from the object database.
62+
pub odb_objects_read: usize,
63+
}
64+
65+
impl Outcome {
66+
/// The total amount of skipped entries, i.e. those that weren't processed at all.
67+
pub fn skipped(&self) -> usize {
68+
self.entries_skipped_by_common_prefix + self.entries_skipped_by_pathspec + self.entries_skipped_by_entry_flags
69+
}
70+
}
71+
3572
/// How an index entry needs to be changed to obtain the destination worktree state, i.e. `entry.apply(this_change) == worktree-entry`.
3673
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)]
3774
pub enum Change<T = (), U = ()> {
@@ -61,7 +98,7 @@ pub enum Change<T = (), U = ()> {
6198
SubmoduleModification(U),
6299
/// An index entry that correspond to an untracked worktree file marked with `git add --intent-to-add`.
63100
///
64-
/// This means it's not available in the object database yet or the index was created from,
101+
/// This means it's not available in the object database yet
65102
/// even though now an entry exists that represents the worktree file.
66103
IntentToAdd,
67104
}

gix-status/src/read.rs

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,6 @@ pub fn data_to_buf_with_meta<'a>(
5050
// on unix (by git) so no reason to use the try version here
5151
let symlink_path = gix_path::into_bstr(read_link(path)?);
5252
buf.extend_from_slice(&symlink_path);
53-
// TODO: there is no reason this should be a clone
54-
// std isn't great about allowing users to avoid allocations but we could
55-
// simply write our own wrapper around libc::readlink which reuses the
56-
// buffer. This would require unsafe code tough (obviously)
5753
} else {
5854
buf.clear();
5955
File::open(path)?.read_to_end(buf)?;

0 commit comments

Comments
 (0)