Skip to content

Commit 4743212

Browse files
committed
feat!: object::blob::diff::Platform now performs all necessary conversions.
Previously it would just offer the git-ODB version of a blob for diffing, while it will now make it possible to apply all necessary conversion steps for you. This also moves `Event::diff()` to `Change::diff()`, adds `Repository::diff_resource_cache()` and refactors nearly everything about the `objects::blob::diff::Platform`.
1 parent 406acef commit 4743212

File tree

9 files changed

+260
-129
lines changed

9 files changed

+260
-129
lines changed

gix/src/diff.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,12 +104,15 @@ mod utils {
104104
/// `repo` is used to obtain the needed configuration values, and `index` is used to potentially read `.gitattributes`
105105
/// files from which may affect the diff operation.
106106
/// `mode` determines how the diffable files will look like, and also how fast, in average, these conversions are.
107+
/// `attribute_source` controls where `.gitattributes` will be read from, and it's typically adjusted based on the
108+
/// `roots` - if there are no worktree roots, `.gitattributes` are also not usually read from worktrees.
107109
/// `roots` provide information about where to get diffable data from, so source and destination can either be sourced from
108110
/// a worktree, or from the object database, or both.
109111
pub fn resource_cache(
110112
repo: &Repository,
111113
index: &gix_index::State,
112114
mode: gix_diff::blob::pipeline::Mode,
115+
attribute_source: gix_worktree::stack::state::attributes::Source,
113116
roots: gix_diff::blob::pipeline::WorktreeRoots,
114117
) -> Result<gix_diff::blob::Platform, resource_cache::Error> {
115118
let diff_algo = repo.config.diff_algorithm()?;
@@ -129,9 +132,7 @@ mod utils {
129132
// TODO(perf): this could benefit from not having to build an intermediate index,
130133
// and traverse the a tree directly.
131134
index,
132-
// This is an optimization, as we avoid reading files from the working tree, which also
133-
// might not match the index at all depending on what the user passed.
134-
gix_worktree::stack::state::attributes::Source::IdMapping,
135+
attribute_source,
135136
)?
136137
.inner,
137138
);

gix/src/object/blob.rs

Lines changed: 182 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -3,63 +3,132 @@ use crate::{Blob, ObjectDetached};
33
///
44
#[cfg(feature = "blob-diff")]
55
pub mod diff {
6+
use gix_diff::blob::platform::prepare_diff::Operation;
7+
use gix_diff::blob::ResourceKind;
68
use std::ops::Range;
79

8-
use crate::{bstr::ByteSlice, object::blob::diff::line::Change};
10+
use crate::object::tree::diff::change::Event;
11+
use crate::{bstr::ByteSlice, object::blob::diff::lines::Change};
912

1013
/// A platform to keep temporary information to perform line diffs on modified blobs.
1114
///
12-
pub struct Platform<'old, 'new> {
13-
/// The previous version of the blob.
14-
pub old: crate::Object<'old>,
15-
/// The new version of the blob.
16-
pub new: crate::Object<'new>,
17-
/// The algorithm to use when calling [imara_diff::diff()][gix_diff::blob::diff()].
18-
/// This value is determined by the `diff.algorithm` configuration.
19-
pub algo: gix_diff::blob::Algorithm,
15+
pub struct Platform<'a> {
16+
/// The cache holding diffable data related to our blobs.
17+
pub resource_cache: &'a mut gix_diff::blob::Platform,
2018
}
2119

2220
///
2321
pub mod init {
24-
/// The error returned by [`Platform::from_ids()`][super::Platform::from_ids()].
25-
#[derive(Debug, thiserror::Error)]
26-
#[allow(missing_docs)]
27-
pub enum Error {
28-
#[error("Could not find the previous blob or the new blob to diff against")]
29-
FindExisting(#[from] crate::object::find::existing::Error),
30-
#[error("Could not obtain diff algorithm from configuration")]
31-
DiffAlgorithm(#[from] crate::config::diff::algorithm::Error),
32-
}
22+
/// The error returned by [`Platform::from_tree_change()`][super::Platform::from_tree_change()].
23+
pub type Error = gix_diff::blob::platform::set_resource::Error;
3324
}
3425

35-
impl<'old, 'new> Platform<'old, 'new> {
36-
/// Produce a platform for performing various diffs after obtaining the object data of `previous_id` and `new_id`.
37-
///
38-
/// Note that these objects are treated as raw data and are assumed to be blobs.
39-
pub fn from_ids(
40-
previous_id: &crate::Id<'old>,
41-
new_id: &crate::Id<'new>,
42-
) -> Result<Platform<'old, 'new>, init::Error> {
43-
match previous_id
44-
.object()
45-
.and_then(|old| new_id.object().map(|new| (old, new)))
46-
{
47-
Ok((old, new)) => {
48-
let algo = match new_id.repo.config.diff_algorithm() {
49-
Ok(algo) => algo,
50-
Err(err) => return Err(err.into()),
51-
};
52-
Ok(Platform { old, new, algo })
26+
impl<'a, 'new> Platform<'a> {
27+
/// Produce a platform for performing various diffs after obtaining the data from a single `tree_change`.
28+
pub fn from_tree_change(
29+
tree_change: &crate::object::tree::diff::Change<'_, '_, '_>,
30+
resource_cache: &'a mut gix_diff::blob::Platform,
31+
) -> Result<Platform<'a>, init::Error> {
32+
match tree_change.event {
33+
Event::Addition { entry_mode, id } => {
34+
resource_cache.set_resource(
35+
id.repo.object_hash().null(),
36+
entry_mode.kind(),
37+
tree_change.location,
38+
ResourceKind::OldOrSource,
39+
&id.repo.objects,
40+
)?;
41+
resource_cache.set_resource(
42+
id.inner,
43+
entry_mode.kind(),
44+
tree_change.location,
45+
ResourceKind::NewOrDestination,
46+
&id.repo.objects,
47+
)?;
48+
}
49+
Event::Deletion { entry_mode, id } => {
50+
resource_cache.set_resource(
51+
id.inner,
52+
entry_mode.kind(),
53+
tree_change.location,
54+
ResourceKind::OldOrSource,
55+
&id.repo.objects,
56+
)?;
57+
resource_cache.set_resource(
58+
id.repo.object_hash().null(),
59+
entry_mode.kind(),
60+
tree_change.location,
61+
ResourceKind::NewOrDestination,
62+
&id.repo.objects,
63+
)?;
64+
}
65+
Event::Modification {
66+
previous_entry_mode,
67+
previous_id,
68+
entry_mode,
69+
id,
70+
} => {
71+
resource_cache.set_resource(
72+
previous_id.inner,
73+
previous_entry_mode.kind(),
74+
tree_change.location,
75+
ResourceKind::OldOrSource,
76+
&previous_id.repo.objects,
77+
)?;
78+
resource_cache.set_resource(
79+
id.inner,
80+
entry_mode.kind(),
81+
tree_change.location,
82+
ResourceKind::NewOrDestination,
83+
&id.repo.objects,
84+
)?;
85+
}
86+
Event::Rewrite {
87+
source_location,
88+
source_entry_mode,
89+
source_id,
90+
entry_mode,
91+
id,
92+
diff: _,
93+
copy: _,
94+
} => {
95+
resource_cache.set_resource(
96+
source_id.inner,
97+
source_entry_mode.kind(),
98+
source_location,
99+
ResourceKind::OldOrSource,
100+
&source_id.repo.objects,
101+
)?;
102+
resource_cache.set_resource(
103+
id.inner,
104+
entry_mode.kind(),
105+
tree_change.location,
106+
ResourceKind::NewOrDestination,
107+
&id.repo.objects,
108+
)?;
53109
}
54-
Err(err) => Err(err.into()),
55110
}
111+
Ok(Self { resource_cache })
56112
}
57113
}
58114

59115
///
60-
pub mod line {
116+
pub mod lines {
61117
use crate::bstr::BStr;
62118

119+
/// The error returned by [Platform::lines()](super::Platform::lines()).
120+
#[derive(Debug, thiserror::Error)]
121+
#[allow(missing_docs)]
122+
pub enum Error<E>
123+
where
124+
E: std::error::Error + Send + Sync + 'static,
125+
{
126+
#[error(transparent)]
127+
ProcessHunk(E),
128+
#[error(transparent)]
129+
PrepareDiff(#[from] gix_diff::blob::platform::prepare_diff::Error),
130+
}
131+
63132
/// A change to a hunk of lines.
64133
pub enum Change<'a, 'data> {
65134
/// Lines were added.
@@ -82,70 +151,91 @@ pub mod diff {
82151
}
83152
}
84153

85-
impl<'old, 'new> Platform<'old, 'new> {
154+
impl<'a> Platform<'a> {
86155
/// Perform a diff on lines between the old and the new version of a blob, passing each hunk of lines to `process_hunk`.
87-
/// The diffing algorithm is determined by the `diff.algorithm` configuration.
88-
///
89-
/// Note that you can invoke the diff more flexibly as well.
156+
/// The diffing algorithm is determined by the `diff.algorithm` configuration, or individual diff drivers.
157+
/// Note that `process_hunk` is not called if one of the involved resources are binary, but that can be determined
158+
/// by introspecting the outcome.
90159
// TODO: more tests (only tested insertion right now)
91-
pub fn lines<FnH, E>(&self, mut process_hunk: FnH) -> Result<(), E>
160+
pub fn lines<FnH, E>(
161+
&mut self,
162+
mut process_hunk: FnH,
163+
) -> Result<gix_diff::blob::platform::prepare_diff::Outcome<'_>, lines::Error<E>>
92164
where
93-
FnH: FnMut(line::Change<'_, '_>) -> Result<(), E>,
94-
E: std::error::Error,
165+
FnH: FnMut(lines::Change<'_, '_>) -> Result<(), E>,
166+
E: std::error::Error + Send + Sync + 'static,
95167
{
96-
let input = self.line_tokens();
97-
let mut err = None;
98-
let mut lines = Vec::new();
99-
gix_diff::blob::diff(self.algo, &input, |before: Range<u32>, after: Range<u32>| {
100-
if err.is_some() {
101-
return;
168+
self.resource_cache.options.skip_internal_diff_if_external_is_configured = false;
169+
170+
let prep = self.resource_cache.prepare_diff()?;
171+
match prep.operation {
172+
Operation::InternalDiff { algorithm } => {
173+
let input = prep.interned_input();
174+
let mut err = None;
175+
let mut lines = Vec::new();
176+
177+
gix_diff::blob::diff(algorithm, &input, |before: Range<u32>, after: Range<u32>| {
178+
if err.is_some() {
179+
return;
180+
}
181+
lines.clear();
182+
lines.extend(
183+
input.before[before.start as usize..before.end as usize]
184+
.iter()
185+
.map(|&line| input.interner[line].as_bstr()),
186+
);
187+
let end_of_before = lines.len();
188+
lines.extend(
189+
input.after[after.start as usize..after.end as usize]
190+
.iter()
191+
.map(|&line| input.interner[line].as_bstr()),
192+
);
193+
let hunk_before = &lines[..end_of_before];
194+
let hunk_after = &lines[end_of_before..];
195+
if hunk_after.is_empty() {
196+
err = process_hunk(Change::Deletion { lines: hunk_before }).err();
197+
} else if hunk_before.is_empty() {
198+
err = process_hunk(Change::Addition { lines: hunk_after }).err();
199+
} else {
200+
err = process_hunk(Change::Modification {
201+
lines_before: hunk_before,
202+
lines_after: hunk_after,
203+
})
204+
.err();
205+
}
206+
});
207+
208+
if let Some(err) = err {
209+
return Err(lines::Error::ProcessHunk(err));
210+
}
102211
}
103-
lines.clear();
104-
lines.extend(
105-
input.before[before.start as usize..before.end as usize]
106-
.iter()
107-
.map(|&line| input.interner[line].as_bstr()),
108-
);
109-
let end_of_before = lines.len();
110-
lines.extend(
111-
input.after[after.start as usize..after.end as usize]
112-
.iter()
113-
.map(|&line| input.interner[line].as_bstr()),
114-
);
115-
let hunk_before = &lines[..end_of_before];
116-
let hunk_after = &lines[end_of_before..];
117-
if hunk_after.is_empty() {
118-
err = process_hunk(Change::Deletion { lines: hunk_before }).err();
119-
} else if hunk_before.is_empty() {
120-
err = process_hunk(Change::Addition { lines: hunk_after }).err();
121-
} else {
122-
err = process_hunk(Change::Modification {
123-
lines_before: hunk_before,
124-
lines_after: hunk_after,
125-
})
126-
.err();
212+
Operation::ExternalCommand { .. } => {
213+
unreachable!("we disabled that")
127214
}
128-
});
129-
130-
match err {
131-
Some(err) => Err(err),
132-
None => Ok(()),
133-
}
215+
Operation::SourceOrDestinationIsBinary => {}
216+
};
217+
Ok(prep)
134218
}
135219

136220
/// Count the amount of removed and inserted lines efficiently.
137-
pub fn line_counts(&self) -> gix_diff::blob::sink::Counter<()> {
138-
let tokens = self.line_tokens();
139-
gix_diff::blob::diff(self.algo, &tokens, gix_diff::blob::sink::Counter::default())
140-
}
221+
/// Note that nothing will happen if one of the inputs is binary, and `None` will be returned.
222+
pub fn line_counts(
223+
&mut self,
224+
) -> Result<Option<gix_diff::blob::sink::Counter<()>>, gix_diff::blob::platform::prepare_diff::Error> {
225+
self.resource_cache.options.skip_internal_diff_if_external_is_configured = false;
141226

142-
/// Return a tokenizer which treats lines as smallest unit for use in a [diff operation][gix_diff::blob::diff()].
143-
///
144-
/// The line separator is determined according to normal git rules and filters.
145-
pub fn line_tokens(&self) -> gix_diff::blob::intern::InternedInput<&[u8]> {
146-
// TODO: make use of `core.eol` and/or filters to do line-counting correctly. It's probably
147-
// OK to just know how these objects are saved to know what constitutes a line.
148-
gix_diff::blob::intern::InternedInput::new(self.old.data.as_bytes(), self.new.data.as_bytes())
227+
let prep = self.resource_cache.prepare_diff()?;
228+
match prep.operation {
229+
Operation::InternalDiff { algorithm } => {
230+
let tokens = prep.interned_input();
231+
let counter = gix_diff::blob::diff(algorithm, &tokens, gix_diff::blob::sink::Counter::default());
232+
Ok(Some(counter))
233+
}
234+
Operation::ExternalCommand { .. } => {
235+
unreachable!("we disabled that")
236+
}
237+
Operation::SourceOrDestinationIsBinary => Ok(None),
238+
}
149239
}
150240
}
151241
}

gix/src/object/tree/diff/change.rs

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -68,25 +68,25 @@ pub enum Event<'a, 'old, 'new> {
6868
},
6969
}
7070

71-
impl<'a, 'old, 'new> Event<'a, 'old, 'new> {
72-
/// Produce a platform for performing a line-diff, or `None` if this is not a [`Modification`][Event::Modification]
73-
/// or one of the entries to compare is not a blob.
74-
pub fn diff(
71+
impl<'a, 'old, 'new> super::Change<'a, 'old, 'new> {
72+
/// Produce a platform for performing a line-diff no matter whether the underlying [Event] is an addition, modification,
73+
/// deletion or rewrite.
74+
/// Use `resource_cache` to store the diffable data and possibly reuse previously stored data.
75+
/// Afterwards the platform, which holds on to `resource_cache`, can be used to perform ready-made operations on the
76+
/// pre-set resources.
77+
///
78+
/// ### Warning about Memory Consumption
79+
///
80+
/// `resource_cache` only grows, so one should call [`gix_diff::blob::Platform::clear_resource_cache`] occasionally.
81+
pub fn diff<'b>(
7582
&self,
76-
) -> Option<Result<crate::object::blob::diff::Platform<'old, 'new>, crate::object::blob::diff::init::Error>> {
77-
match self {
78-
Event::Modification {
79-
previous_entry_mode,
80-
previous_id,
81-
entry_mode,
82-
id,
83-
} if entry_mode.is_blob() && previous_entry_mode.is_blob() => {
84-
Some(crate::object::blob::diff::Platform::from_ids(previous_id, id))
85-
}
86-
_ => None,
87-
}
83+
resource_cache: &'b mut gix_diff::blob::Platform,
84+
) -> Result<crate::object::blob::diff::Platform<'b>, crate::object::blob::diff::init::Error> {
85+
crate::object::blob::diff::Platform::from_tree_change(self, resource_cache)
8886
}
87+
}
8988

89+
impl<'a, 'old, 'new> Event<'a, 'old, 'new> {
9090
/// Return the current mode of this instance.
9191
pub fn entry_mode(&self) -> gix_object::tree::EntryMode {
9292
match self {

0 commit comments

Comments
 (0)