Skip to content

Commit b7e1f38

Browse files
committed
feat!: generalize rename-tracking engine for later use with status.
Previously the rename tracking engine was integrated with tree-diffs, but already operates in a stand-alone fashion. Now it's officially generalized which allows it to be tested separately and used when tracking renames for diffs between index and tree, index and index, and index and worktree.
1 parent 54ef315 commit b7e1f38

File tree

8 files changed

+827
-656
lines changed

8 files changed

+827
-656
lines changed

gix/src/config/cache/access.rs

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -93,13 +93,9 @@ impl Cache {
9393
}
9494

9595
#[cfg(feature = "blob-diff")]
96-
pub(crate) fn diff_renames(
97-
&self,
98-
) -> Result<Option<crate::object::tree::diff::Rewrites>, crate::object::tree::diff::rewrites::Error> {
96+
pub(crate) fn diff_renames(&self) -> Result<Option<crate::diff::Rewrites>, crate::diff::rewrites::Error> {
9997
self.diff_renames
100-
.get_or_try_init(|| {
101-
crate::object::tree::diff::Rewrites::try_from_config(&self.resolved, self.lenient_config)
102-
})
98+
.get_or_try_init(|| crate::diff::Rewrites::try_from_config(&self.resolved, self.lenient_config))
10399
.copied()
104100
}
105101

gix/src/config/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -515,7 +515,7 @@ pub(crate) struct Cache {
515515
pub(crate) url_rewrite: OnceCell<crate::remote::url::Rewrite>,
516516
/// The lazy-loaded rename information for diffs.
517517
#[cfg(feature = "blob-diff")]
518-
pub(crate) diff_renames: OnceCell<Option<crate::object::tree::diff::Rewrites>>,
518+
pub(crate) diff_renames: OnceCell<Option<crate::diff::Rewrites>>,
519519
/// A lazily loaded mapping to know which url schemes to allow
520520
#[cfg(any(feature = "blocking-network-client", feature = "async-network-client"))]
521521
pub(crate) url_scheme: OnceCell<crate::remote::url::SchemePermission>,

gix/src/diff.rs

Lines changed: 736 additions & 0 deletions
Large diffs are not rendered by default.

gix/src/object/tree/diff/for_each.rs

Lines changed: 82 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,8 @@ use gix_object::TreeRefIter;
22
use gix_odb::FindExt;
33

44
use super::{change, Action, Change, Platform};
5-
use crate::{
6-
bstr::BStr,
7-
ext::ObjectIdExt,
8-
object::tree::{
9-
diff,
10-
diff::{rewrites, tracked},
11-
},
12-
Repository, Tree,
13-
};
5+
use crate::diff::rewrites::tracker;
6+
use crate::{bstr::BStr, diff::rewrites, ext::ObjectIdExt, object::tree::diff, Repository, Tree};
147

158
/// The error return by methods on the [diff platform][Platform].
169
#[derive(Debug, thiserror::Error)]
@@ -20,12 +13,10 @@ pub enum Error {
2013
Diff(#[from] gix_diff::tree::changes::Error),
2114
#[error("The user-provided callback failed")]
2215
ForEach(#[source] Box<dyn std::error::Error + Send + Sync + 'static>),
23-
#[error("Could not find blob for similarity checking")]
24-
FindExistingBlob(#[from] crate::object::find::existing::Error),
2516
#[error("Could not configure diff algorithm prior to checking similarity")]
2617
ConfigureDiffAlgorithm(#[from] crate::config::diff::algorithm::Error),
27-
#[error("Could not traverse tree to obtain possible sources for copies")]
28-
TraverseTreeForExhaustiveCopyDetection(#[from] gix_traverse::tree::breadthfirst::Error),
18+
#[error("Failure during rename tracking")]
19+
RenameTracking(#[from] tracker::emit::Error),
2920
}
3021

3122
///
@@ -50,12 +41,14 @@ impl<'a, 'old> Platform<'a, 'old> {
5041
E: std::error::Error + Sync + Send + 'static,
5142
{
5243
let repo = self.lhs.repo;
44+
let diff_algo = repo.config.diff_algorithm()?;
5345
let mut delegate = Delegate {
5446
src_tree: self.lhs,
5547
other_repo: other.repo,
5648
recorder: gix_diff::tree::Recorder::default().track_location(self.tracking),
5749
visit: for_each,
58-
tracked: self.rewrites.map(|r| tracked::State::new(r, self.tracking)),
50+
location: self.tracking,
51+
tracked: self.rewrites.map(|r| rewrites::Tracker::new(r, diff_algo)),
5952
err: None,
6053
};
6154
match gix_diff::tree::Changes::from(TreeRefIter::from_bytes(&self.lhs.data)).needed_to_obtain(
@@ -88,7 +81,8 @@ struct Delegate<'a, 'old, 'new, VisitFn, E> {
8881
other_repo: &'new Repository,
8982
recorder: gix_diff::tree::Recorder,
9083
visit: VisitFn,
91-
tracked: Option<tracked::State>,
84+
tracked: Option<rewrites::Tracker>,
85+
location: Option<gix_diff::tree::recorder::Location>,
9286
err: Option<E>,
9387
}
9488

@@ -158,8 +152,8 @@ where
158152
id: oid.to_owned().attach(self.other_repo),
159153
diff: source.diff,
160154
copy: match source.kind {
161-
tracked::visit::Kind::RenameTarget => false,
162-
tracked::visit::Kind::CopyDestination => true,
155+
tracker::visit::Kind::RenameTarget => false,
156+
tracker::visit::Kind::CopyDestination => true,
163157
},
164158
},
165159
};
@@ -181,7 +175,12 @@ where
181175
&mut self.err,
182176
),
183177
},
184-
self.src_tree,
178+
|oid, buf| self.src_tree.repo.objects.find_blob(oid, buf),
179+
|push| {
180+
self.src_tree
181+
.traverse()
182+
.breadthfirst(&mut tree_to_changes::Delegate::new(push, self.location))
183+
},
185184
)?;
186185
Ok(Some(outcome))
187186
}
@@ -234,3 +233,68 @@ where
234233
}
235234
}
236235
}
236+
237+
mod tree_to_changes {
238+
use gix_diff::tree::visit::Change;
239+
use gix_object::tree::EntryRef;
240+
241+
use crate::bstr::BStr;
242+
243+
pub struct Delegate<'a> {
244+
push: &'a mut dyn FnMut(Change, &BStr),
245+
recorder: gix_traverse::tree::Recorder,
246+
}
247+
248+
impl<'a> Delegate<'a> {
249+
pub fn new(
250+
push: &'a mut dyn FnMut(Change, &BStr),
251+
location: Option<gix_diff::tree::recorder::Location>,
252+
) -> Self {
253+
let location = location.map(|t| match t {
254+
gix_diff::tree::recorder::Location::FileName => gix_traverse::tree::recorder::Location::FileName,
255+
gix_diff::tree::recorder::Location::Path => gix_traverse::tree::recorder::Location::Path,
256+
});
257+
Self {
258+
push,
259+
recorder: gix_traverse::tree::Recorder::default().track_location(location),
260+
}
261+
}
262+
}
263+
264+
impl gix_traverse::tree::Visit for Delegate<'_> {
265+
fn pop_front_tracked_path_and_set_current(&mut self) {
266+
self.recorder.pop_front_tracked_path_and_set_current()
267+
}
268+
269+
fn push_back_tracked_path_component(&mut self, component: &BStr) {
270+
self.recorder.push_back_tracked_path_component(component)
271+
}
272+
273+
fn push_path_component(&mut self, component: &BStr) {
274+
self.recorder.push_path_component(component)
275+
}
276+
277+
fn pop_path_component(&mut self) {
278+
self.recorder.pop_path_component();
279+
}
280+
281+
fn visit_tree(&mut self, _entry: &EntryRef<'_>) -> gix_traverse::tree::visit::Action {
282+
gix_traverse::tree::visit::Action::Continue
283+
}
284+
285+
fn visit_nontree(&mut self, entry: &EntryRef<'_>) -> gix_traverse::tree::visit::Action {
286+
if entry.mode.is_blob() {
287+
(self.push)(
288+
Change::Modification {
289+
previous_entry_mode: entry.mode,
290+
previous_oid: gix_hash::ObjectId::null(entry.oid.kind()),
291+
entry_mode: entry.mode,
292+
oid: entry.oid.to_owned(),
293+
},
294+
self.recorder.path(),
295+
);
296+
}
297+
gix_traverse::tree::visit::Action::Continue
298+
}
299+
}
300+
}

gix/src/object/tree/diff/mod.rs

Lines changed: 2 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use gix_diff::tree::recorder::Location;
22

3+
use crate::diff::Rewrites;
34
use crate::{bstr::BStr, Tree};
45

56
/// Returned by the `for_each` function to control flow.
@@ -39,7 +40,7 @@ impl<'repo> Tree<'repo> {
3940
/// try to access blobs to compute a similarity metric. Thus, it's more compatible to turn rewrite tracking off
4041
/// using [`Platform::track_rewrites()`].
4142
#[allow(clippy::result_large_err)]
42-
pub fn changes<'a>(&'a self) -> Result<Platform<'a, 'repo>, rewrites::Error> {
43+
pub fn changes<'a>(&'a self) -> Result<Platform<'a, 'repo>, crate::diff::rewrites::Error> {
4344
Ok(Platform {
4445
state: Default::default(),
4546
lhs: self,
@@ -58,34 +59,6 @@ pub struct Platform<'a, 'repo> {
5859
rewrites: Option<Rewrites>,
5960
}
6061

61-
/// A structure to capture how to perform rename and copy tracking
62-
#[derive(Debug, Copy, Clone, PartialEq)]
63-
pub struct Rewrites {
64-
/// If `Some(…)`, do also find copies. `None` is the default which does not try to detect copies at all.
65-
///
66-
/// Note that this is an even more expensive operation than detecting renames as files.
67-
pub copies: Option<rewrites::Copies>,
68-
/// The percentage of similarity needed for files to be considered renamed, defaulting to `Some(0.5)`.
69-
/// This field is similar to `git diff -M50%`.
70-
///
71-
/// If `None`, files are only considered equal if their content matches 100%.
72-
/// Note that values greater than 1.0 have no different effect than 1.0.
73-
pub percentage: Option<f32>,
74-
/// The amount of files to consider for fuzzy rename or copy tracking. Defaults to 1000, meaning that only 1000*1000
75-
/// combinations can be tested for fuzzy matches, i.e. the ones that try to find matches by comparing similarity.
76-
/// If 0, there is no limit.
77-
///
78-
/// If the limit would not be enough to test the entire set of combinations, the algorithm will trade in precision and not
79-
/// run the fuzzy version of identity tests at all. That way results are never partial.
80-
pub limit: usize,
81-
}
82-
83-
///
84-
pub mod rewrites;
85-
86-
/// types to actually perform rename tracking.
87-
pub(crate) mod tracked;
88-
8962
/// Configuration
9063
impl<'a, 'repo> Platform<'a, 'repo> {
9164
/// Keep track of file-names, which makes the [`location`][Change::location] field usable with the filename of the changed item.

gix/src/object/tree/diff/rewrites.rs

Lines changed: 0 additions & 108 deletions
This file was deleted.

0 commit comments

Comments
 (0)