Skip to content

Commit dd99991

Browse files
committed
feat: add blob::PlatformRef::id_by_pick() to more efficiently pick merge results.
This works by either selecting a possibly unchanged and not even loaded resource, instead of always loading it to provide a buffer, in case the user doesn't actually want a buffer. Note that this also alters `buffer_by_pick()` to enforce handling of the 'buffer-too-large' option.
1 parent c1cf08c commit dd99991

File tree

3 files changed

+118
-24
lines changed

3 files changed

+118
-24
lines changed

gix-merge/src/blob/platform/merge.rs

Lines changed: 55 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,8 @@ pub(super) mod inner {
266266

267267
///
268268
pub mod builtin_merge {
269+
use crate::blob::platform::resource;
270+
use crate::blob::platform::resource::Data;
269271
use crate::blob::{builtin_driver, BuiltinDriver, PlatformRef, Resolution};
270272

271273
/// An identifier to tell us how a merge conflict was resolved by [builtin_merge](PlatformRef::builtin_merge).
@@ -307,19 +309,20 @@ pub(super) mod inner {
307309
input: &mut imara_diff::intern::InternedInput<&'parent [u8]>,
308310
labels: builtin_driver::text::Labels<'_>,
309311
) -> (Pick, Resolution) {
310-
let base = self.ancestor.data.as_slice();
311-
let ours = self.current.data.as_slice();
312-
let theirs = self.other.data.as_slice();
312+
let base = self.ancestor.data.as_slice().unwrap_or_default();
313+
let ours = self.current.data.as_slice().unwrap_or_default();
314+
let theirs = self.other.data.as_slice().unwrap_or_default();
313315
let driver = if driver != BuiltinDriver::Binary
314-
&& (is_binary_buf(ours) || is_binary_buf(theirs) || is_binary_buf(base))
316+
&& (is_binary_buf(self.ancestor.data)
317+
|| is_binary_buf(self.other.data)
318+
|| is_binary_buf(self.current.data))
315319
{
316320
BuiltinDriver::Binary
317321
} else {
318322
driver
319323
};
320324
match driver {
321325
BuiltinDriver::Text => {
322-
let ((base, ours), theirs) = base.zip(ours).zip(theirs).expect("would use binary if missing");
323326
let resolution =
324327
builtin_driver::text(out, input, labels, ours, base, theirs, self.options.text);
325328
(Pick::Buffer, resolution)
@@ -334,7 +337,6 @@ pub(super) mod inner {
334337
(pick, resolution)
335338
}
336339
BuiltinDriver::Union => {
337-
let ((base, ours), theirs) = base.zip(ours).zip(theirs).expect("would use binary if missing");
338340
let resolution = builtin_driver::text(
339341
out,
340342
input,
@@ -353,11 +355,15 @@ pub(super) mod inner {
353355
}
354356
}
355357

356-
fn is_binary_buf(buf: Option<&[u8]>) -> bool {
357-
buf.map_or(true, |buf| {
358-
let buf = &buf[..buf.len().min(8000)];
359-
buf.contains(&0)
360-
})
358+
fn is_binary_buf(data: resource::Data<'_>) -> bool {
359+
match data {
360+
Data::Missing => false,
361+
Data::Buffer(buf) => {
362+
let buf = &buf[..buf.len().min(8000)];
363+
buf.contains(&0)
364+
}
365+
Data::TooLarge { .. } => true,
366+
}
361367
}
362368
}
363369
}
@@ -412,13 +418,45 @@ impl<'parent> PlatformRef<'parent> {
412418
}
413419

414420
/// Using a `pick` obtained from [`merge()`](Self::merge), obtain the respective buffer suitable for reading or copying.
415-
/// Return `None` if the buffer is too large, or if the `pick` corresponds to a buffer (that was written separately).
416-
pub fn buffer_by_pick(&self, pick: inner::builtin_merge::Pick) -> Option<&'parent [u8]> {
421+
/// Return `Ok(None)` if the `pick` corresponds to a buffer (that was written separately).
422+
/// Return `Err(())` if the buffer is *too large*, so it was never read.
423+
#[allow(clippy::result_unit_err)]
424+
pub fn buffer_by_pick(&self, pick: inner::builtin_merge::Pick) -> Result<Option<&'parent [u8]>, ()> {
417425
match pick {
418-
inner::builtin_merge::Pick::Ancestor => self.ancestor.data.as_slice(),
419-
inner::builtin_merge::Pick::Ours => self.current.data.as_slice(),
420-
inner::builtin_merge::Pick::Theirs => self.other.data.as_slice(),
421-
inner::builtin_merge::Pick::Buffer => None,
426+
inner::builtin_merge::Pick::Ancestor => self.ancestor.data.as_slice().map(Some).ok_or(()),
427+
inner::builtin_merge::Pick::Ours => self.current.data.as_slice().map(Some).ok_or(()),
428+
inner::builtin_merge::Pick::Theirs => self.other.data.as_slice().map(Some).ok_or(()),
429+
inner::builtin_merge::Pick::Buffer => Ok(None),
430+
}
431+
}
432+
433+
/// Use `pick` to return the object id of the merged result, assuming that `buf` was passed as `out` to [merge()](Self::merge).
434+
/// In case of binary or large files, this will simply be the existing ID of the resource.
435+
/// In case of resources available in the object DB for binary merges, the object ID will be returned.
436+
/// If new content was produced due to a content merge, `buf` will be written out
437+
/// to the object database using `write_blob`.
438+
/// Beware that the returned ID could be `Ok(None)` if the underlying resource was loaded
439+
/// from the worktree *and* was too large so it was never loaded from disk.
440+
/// `Ok(None)` will also be returned if one of the resources was missing.
441+
/// `write_blob()` is used to turn buffers.
442+
pub fn id_by_pick<E>(
443+
&self,
444+
pick: inner::builtin_merge::Pick,
445+
buf: &[u8],
446+
mut write_blob: impl FnMut(&[u8]) -> Result<gix_hash::ObjectId, E>,
447+
) -> Result<Option<gix_hash::ObjectId>, E> {
448+
let field = match pick {
449+
inner::builtin_merge::Pick::Ancestor => &self.ancestor,
450+
inner::builtin_merge::Pick::Ours => &self.current,
451+
inner::builtin_merge::Pick::Theirs => &self.other,
452+
inner::builtin_merge::Pick::Buffer => return write_blob(buf).map(Some),
453+
};
454+
use crate::blob::platform::resource::Data;
455+
match field.data {
456+
Data::TooLarge { .. } | Data::Missing if !field.id.is_null() => Ok(Some(field.id.to_owned())),
457+
Data::TooLarge { .. } | Data::Missing => Ok(None),
458+
Data::Buffer(buf) if field.id.is_null() => write_blob(buf).map(Some),
459+
Data::Buffer(_) => Ok(Some(field.id.to_owned())),
422460
}
423461
}
424462
}

gix-merge/tests/merge/blob/platform.rs

Lines changed: 58 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,14 @@ use gix_merge::blob::Platform;
55
mod merge {
66
use crate::blob::platform::new_platform;
77
use crate::blob::util::ObjectDb;
8+
use crate::hex_to_id;
89
use bstr::{BStr, ByteSlice};
910
use gix_merge::blob::builtin_driver::text::ConflictStyle;
1011
use gix_merge::blob::platform::builtin_merge::Pick;
1112
use gix_merge::blob::platform::DriverChoice;
1213
use gix_merge::blob::{builtin_driver, pipeline, platform, BuiltinDriver, Resolution, ResourceKind};
1314
use gix_object::tree::EntryKind;
15+
use std::convert::Infallible;
1416
use std::process::Stdio;
1517

1618
#[test]
@@ -66,8 +68,9 @@ mod merge {
6668
#[test]
6769
fn builtin_with_conflict() -> crate::Result {
6870
let mut platform = new_platform(None, pipeline::Mode::ToGit);
71+
let non_existing_ancestor_id = hex_to_id("ffffffffffffffffffffffffffffffffffffffff");
6972
platform.set_resource(
70-
gix_hash::Kind::Sha1.null(),
73+
non_existing_ancestor_id,
7174
EntryKind::Blob,
7275
"b".into(),
7376
ResourceKind::CommonAncestorOrBase,
@@ -152,20 +155,56 @@ theirs
152155
);
153156
assert!(buf.is_empty(), "it tells us where to get the content from");
154157
assert_eq!(
155-
platform_ref.buffer_by_pick(res.0).unwrap().as_bstr(),
158+
platform_ref.buffer_by_pick(res.0).unwrap().unwrap().as_bstr(),
156159
"ours",
157160
"getting access to the content is simplified"
158161
);
162+
assert_eq!(
163+
platform_ref
164+
.id_by_pick(res.0, &buf, |_buf| -> Result<_, Infallible> {
165+
panic!("no need to write buffer")
166+
})
167+
.unwrap()
168+
.unwrap(),
169+
hex_to_id("424860eef4edb9f5a2dacbbd6dc8c2d2e7645035"),
170+
"there is no need to write a buffer here, it just returns one of our inputs"
171+
);
159172

160-
for (expected, expected_pick, resolve) in [
161-
("ours", Pick::Ours, builtin_driver::binary::ResolveWith::Ours),
162-
("theirs", Pick::Theirs, builtin_driver::binary::ResolveWith::Theirs),
163-
("b\n", Pick::Ancestor, builtin_driver::binary::ResolveWith::Ancestor),
173+
for (expected, expected_pick, resolve, expected_id) in [
174+
(
175+
"ours",
176+
Pick::Ours,
177+
builtin_driver::binary::ResolveWith::Ours,
178+
hex_to_id("424860eef4edb9f5a2dacbbd6dc8c2d2e7645035"),
179+
),
180+
(
181+
"theirs",
182+
Pick::Theirs,
183+
builtin_driver::binary::ResolveWith::Theirs,
184+
hex_to_id("228068dbe790983c15535164cd483eb77ade97e4"),
185+
),
186+
(
187+
"b\n",
188+
Pick::Ancestor,
189+
builtin_driver::binary::ResolveWith::Ancestor,
190+
non_existing_ancestor_id,
191+
),
164192
] {
165193
platform_ref.options.resolve_binary_with = Some(resolve);
166194
let res = platform_ref.merge(&mut buf, default_labels(), &Default::default())?;
167195
assert_eq!(res, (expected_pick, Resolution::Complete));
168-
assert_eq!(platform_ref.buffer_by_pick(res.0).unwrap().as_bstr(), expected);
196+
assert_eq!(platform_ref.buffer_by_pick(res.0).unwrap().unwrap().as_bstr(), expected);
197+
198+
assert_eq!(
199+
platform_ref
200+
.id_by_pick(res.0, &buf, |_buf| -> Result<_, Infallible> {
201+
panic!("no need to write buffer")
202+
})
203+
.unwrap()
204+
.unwrap(),
205+
expected_id,
206+
"{expected}: each input has an id, so it's just returned as is without handling buffers"
207+
);
169208
}
170209

171210
Ok(())
@@ -234,6 +273,18 @@ theirs
234273
(Pick::Buffer, Resolution::Complete),
235274
"merge drivers deal with binary themselves"
236275
);
276+
assert_eq!(
277+
platform_ref.buffer_by_pick(res.0),
278+
Ok(None),
279+
"This indicates the buffer must be read"
280+
);
281+
282+
let marker = hex_to_id("ffffffffffffffffffffffffffffffffffffffff");
283+
assert_eq!(
284+
platform_ref.id_by_pick(res.0, &buf, |_buf| Ok::<_, Infallible>(marker)),
285+
Ok(Some(marker)),
286+
"the id is created by hashing the merge buffer"
287+
);
237288
let mut lines = cleaned_driver_lines(&buf)?;
238289
for tmp_file in lines.by_ref().take(3) {
239290
assert!(tmp_file.contains_str(&b".tmp"[..]), "{tmp_file}");

gix-merge/tests/merge/main.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
1+
use gix_hash::ObjectId;
12
extern crate core;
23

34
#[cfg(feature = "blob")]
45
mod blob;
56

67
pub use gix_testtools::Result;
8+
9+
fn hex_to_id(hex: &str) -> ObjectId {
10+
ObjectId::from_hex(hex.as_bytes()).expect("40 bytes hex")
11+
}

0 commit comments

Comments
 (0)