Skip to content

Commit c45638c

Browse files
committed
feat!: generalize rename-tracking engine for later use with status.
Previously the rename tracking engine was integrated with tree-diffs, but already operates in a stand-alone fashion. Now it's officially generalized which allows it to be tested separately and used when tracking renames for diffs between index and tree, index and index, and index and worktree.
1 parent 13ab629 commit c45638c

File tree

9 files changed

+850
-734
lines changed

9 files changed

+850
-734
lines changed

gix/src/config/cache/access.rs

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -93,13 +93,9 @@ impl Cache {
9393
}
9494

9595
#[cfg(feature = "blob-diff")]
96-
pub(crate) fn diff_renames(
97-
&self,
98-
) -> Result<Option<crate::object::tree::diff::Rewrites>, crate::object::tree::diff::rewrites::Error> {
96+
pub(crate) fn diff_renames(&self) -> Result<Option<crate::diff::Rewrites>, crate::diff::rewrites::Error> {
9997
self.diff_renames
100-
.get_or_try_init(|| {
101-
crate::object::tree::diff::Rewrites::try_from_config(&self.resolved, self.lenient_config)
102-
})
98+
.get_or_try_init(|| crate::diff::Rewrites::try_from_config(&self.resolved, self.lenient_config))
10399
.copied()
104100
}
105101

gix/src/config/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -515,7 +515,7 @@ pub(crate) struct Cache {
515515
pub(crate) url_rewrite: OnceCell<crate::remote::url::Rewrite>,
516516
/// The lazy-loaded rename information for diffs.
517517
#[cfg(feature = "blob-diff")]
518-
pub(crate) diff_renames: OnceCell<Option<crate::object::tree::diff::Rewrites>>,
518+
pub(crate) diff_renames: OnceCell<Option<crate::diff::Rewrites>>,
519519
/// A lazily loaded mapping to know which url schemes to allow
520520
#[cfg(any(feature = "blocking-network-client", feature = "async-network-client"))]
521521
pub(crate) url_scheme: OnceCell<crate::remote::url::SchemePermission>,

gix/src/diff.rs

Lines changed: 756 additions & 0 deletions
Large diffs are not rendered by default.

gix/src/object/tree/diff/change.rs

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,6 @@
1+
use crate::diff::blob::DiffLineStats;
12
use crate::{bstr::BStr, Id};
23

3-
/// Information about the diff performed to detect similarity of a [Rewrite][Event::Rewrite].
4-
#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)]
5-
pub struct DiffLineStats {
6-
/// The amount of lines to remove from the source to get to the destination.
7-
pub removals: u32,
8-
/// The amount of lines to add to the source to get to the destination.
9-
pub insertions: u32,
10-
/// The amount of lines of the previous state, in the source.
11-
pub before: u32,
12-
/// The amount of lines of the new state, in the destination.
13-
pub after: u32,
14-
}
15-
164
/// An event emitted when finding differences between two trees.
175
#[derive(Debug, Clone, Copy)]
186
pub enum Event<'a, 'old, 'new> {

gix/src/object/tree/diff/for_each.rs

Lines changed: 83 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,8 @@
1-
use gix_object::TreeRefIter;
1+
use gix_object::{FindExt, TreeRefIter};
22

33
use super::{change, Action, Change, Platform};
4-
use crate::{
5-
bstr::BStr,
6-
ext::ObjectIdExt,
7-
object::tree::{
8-
diff,
9-
diff::{rewrites, tracked},
10-
},
11-
Repository, Tree,
12-
};
4+
use crate::diff::rewrites::tracker;
5+
use crate::{bstr::BStr, diff::rewrites, ext::ObjectIdExt, object::tree::diff, Repository, Tree};
136

147
/// The error return by methods on the [diff platform][Platform].
158
#[derive(Debug, thiserror::Error)]
@@ -19,12 +12,10 @@ pub enum Error {
1912
Diff(#[from] gix_diff::tree::changes::Error),
2013
#[error("The user-provided callback failed")]
2114
ForEach(#[source] Box<dyn std::error::Error + Send + Sync + 'static>),
22-
#[error("Could not find blob for similarity checking")]
23-
FindExistingBlob(#[from] crate::object::find::existing::Error),
2415
#[error("Could not configure diff algorithm prior to checking similarity")]
2516
ConfigureDiffAlgorithm(#[from] crate::config::diff::algorithm::Error),
26-
#[error("Could not traverse tree to obtain possible sources for copies")]
27-
TraverseTreeForExhaustiveCopyDetection(#[from] gix_traverse::tree::breadthfirst::Error),
17+
#[error("Failure during rename tracking")]
18+
RenameTracking(#[from] tracker::emit::Error),
2819
}
2920

3021
///
@@ -49,12 +40,14 @@ impl<'a, 'old> Platform<'a, 'old> {
4940
E: std::error::Error + Sync + Send + 'static,
5041
{
5142
let repo = self.lhs.repo;
43+
let diff_algo = repo.config.diff_algorithm()?;
5244
let mut delegate = Delegate {
5345
src_tree: self.lhs,
5446
other_repo: other.repo,
5547
recorder: gix_diff::tree::Recorder::default().track_location(self.tracking),
5648
visit: for_each,
57-
tracked: self.rewrites.map(|r| tracked::State::new(r, self.tracking)),
49+
location: self.tracking,
50+
tracked: self.rewrites.map(|r| rewrites::Tracker::new(r, diff_algo)),
5851
err: None,
5952
};
6053
match gix_diff::tree::Changes::from(TreeRefIter::from_bytes(&self.lhs.data)).needed_to_obtain(
@@ -87,7 +80,8 @@ struct Delegate<'a, 'old, 'new, VisitFn, E> {
8780
other_repo: &'new Repository,
8881
recorder: gix_diff::tree::Recorder,
8982
visit: VisitFn,
90-
tracked: Option<tracked::State>,
83+
tracked: Option<rewrites::Tracker>,
84+
location: Option<gix_diff::tree::recorder::Location>,
9185
err: Option<E>,
9286
}
9387

@@ -157,8 +151,8 @@ where
157151
id: oid.to_owned().attach(self.other_repo),
158152
diff: source.diff,
159153
copy: match source.kind {
160-
tracked::visit::Kind::RenameTarget => false,
161-
tracked::visit::Kind::CopyDestination => true,
154+
tracker::visit::Kind::RenameTarget => false,
155+
tracker::visit::Kind::CopyDestination => true,
162156
},
163157
},
164158
};
@@ -180,7 +174,12 @@ where
180174
&mut self.err,
181175
),
182176
},
183-
self.src_tree,
177+
|oid, buf| self.src_tree.repo.objects.find_blob(oid, buf),
178+
|push| {
179+
self.src_tree
180+
.traverse()
181+
.breadthfirst(&mut tree_to_changes::Delegate::new(push, self.location))
182+
},
184183
)?;
185184
Ok(Some(outcome))
186185
}
@@ -233,3 +232,68 @@ where
233232
}
234233
}
235234
}
235+
236+
mod tree_to_changes {
237+
use gix_diff::tree::visit::Change;
238+
use gix_object::tree::EntryRef;
239+
240+
use crate::bstr::BStr;
241+
242+
pub struct Delegate<'a> {
243+
push: &'a mut dyn FnMut(Change, &BStr),
244+
recorder: gix_traverse::tree::Recorder,
245+
}
246+
247+
impl<'a> Delegate<'a> {
248+
pub fn new(
249+
push: &'a mut dyn FnMut(Change, &BStr),
250+
location: Option<gix_diff::tree::recorder::Location>,
251+
) -> Self {
252+
let location = location.map(|t| match t {
253+
gix_diff::tree::recorder::Location::FileName => gix_traverse::tree::recorder::Location::FileName,
254+
gix_diff::tree::recorder::Location::Path => gix_traverse::tree::recorder::Location::Path,
255+
});
256+
Self {
257+
push,
258+
recorder: gix_traverse::tree::Recorder::default().track_location(location),
259+
}
260+
}
261+
}
262+
263+
impl gix_traverse::tree::Visit for Delegate<'_> {
264+
fn pop_front_tracked_path_and_set_current(&mut self) {
265+
self.recorder.pop_front_tracked_path_and_set_current()
266+
}
267+
268+
fn push_back_tracked_path_component(&mut self, component: &BStr) {
269+
self.recorder.push_back_tracked_path_component(component)
270+
}
271+
272+
fn push_path_component(&mut self, component: &BStr) {
273+
self.recorder.push_path_component(component)
274+
}
275+
276+
fn pop_path_component(&mut self) {
277+
self.recorder.pop_path_component();
278+
}
279+
280+
fn visit_tree(&mut self, _entry: &EntryRef<'_>) -> gix_traverse::tree::visit::Action {
281+
gix_traverse::tree::visit::Action::Continue
282+
}
283+
284+
fn visit_nontree(&mut self, entry: &EntryRef<'_>) -> gix_traverse::tree::visit::Action {
285+
if entry.mode.is_blob() {
286+
(self.push)(
287+
Change::Modification {
288+
previous_entry_mode: entry.mode,
289+
previous_oid: gix_hash::ObjectId::null(entry.oid.kind()),
290+
entry_mode: entry.mode,
291+
oid: entry.oid.to_owned(),
292+
},
293+
self.recorder.path(),
294+
);
295+
}
296+
gix_traverse::tree::visit::Action::Continue
297+
}
298+
}
299+
}

gix/src/object/tree/diff/mod.rs

Lines changed: 2 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use gix_diff::tree::recorder::Location;
22

3+
use crate::diff::Rewrites;
34
use crate::{bstr::BStr, Tree};
45

56
/// Returned by the `for_each` function to control flow.
@@ -39,7 +40,7 @@ impl<'repo> Tree<'repo> {
3940
/// try to access blobs to compute a similarity metric. Thus, it's more compatible to turn rewrite tracking off
4041
/// using [`Platform::track_rewrites()`].
4142
#[allow(clippy::result_large_err)]
42-
pub fn changes<'a>(&'a self) -> Result<Platform<'a, 'repo>, rewrites::Error> {
43+
pub fn changes<'a>(&'a self) -> Result<Platform<'a, 'repo>, crate::diff::rewrites::Error> {
4344
Ok(Platform {
4445
state: Default::default(),
4546
lhs: self,
@@ -58,34 +59,6 @@ pub struct Platform<'a, 'repo> {
5859
rewrites: Option<Rewrites>,
5960
}
6061

61-
/// A structure to capture how to perform rename and copy tracking
62-
#[derive(Debug, Copy, Clone, PartialEq)]
63-
pub struct Rewrites {
64-
/// If `Some(…)`, do also find copies. `None` is the default which does not try to detect copies at all.
65-
///
66-
/// Note that this is an even more expensive operation than detecting renames as files.
67-
pub copies: Option<rewrites::Copies>,
68-
/// The percentage of similarity needed for files to be considered renamed, defaulting to `Some(0.5)`.
69-
/// This field is similar to `git diff -M50%`.
70-
///
71-
/// If `None`, files are only considered equal if their content matches 100%.
72-
/// Note that values greater than 1.0 have no different effect than 1.0.
73-
pub percentage: Option<f32>,
74-
/// The amount of files to consider for fuzzy rename or copy tracking. Defaults to 1000, meaning that only 1000*1000
75-
/// combinations can be tested for fuzzy matches, i.e. the ones that try to find matches by comparing similarity.
76-
/// If 0, there is no limit.
77-
///
78-
/// If the limit would not be enough to test the entire set of combinations, the algorithm will trade in precision and not
79-
/// run the fuzzy version of identity tests at all. That way results are never partial.
80-
pub limit: usize,
81-
}
82-
83-
///
84-
pub mod rewrites;
85-
86-
/// types to actually perform rename tracking.
87-
pub(crate) mod tracked;
88-
8962
/// Configuration
9063
impl<'a, 'repo> Platform<'a, 'repo> {
9164
/// Keep track of file-names, which makes the [`location`][Change::location] field usable with the filename of the changed item.

gix/src/object/tree/diff/rewrites.rs

Lines changed: 0 additions & 108 deletions
This file was deleted.

0 commit comments

Comments
 (0)