Skip to content

various improvements #1820

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 34 additions & 7 deletions gix-diff/src/blob/unified_diff.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,19 @@ impl ContextSize {
}
}

/// Specify where to put a newline.
#[derive(Debug, Copy, Clone)]
pub enum NewlineSeparator<'a> {
/// Place the given newline separator, like `\n`, after each patch header as well as after each line.
/// This is the right choice if tokens don't include newlines.
AfterHeaderAndLine(&'a str),
/// Place the given newline separator, like `\n`, only after each patch header or if a line doesn't contain a newline.
/// This is the right choice if tokens do include newlines.
/// Note that diff-tokens *with* newlines may diff strangely at the end of files when lines have been appended,
/// as it will make the last line look like it changed just because the whitespace at the end 'changed'.
AfterHeaderAndWhenNeeded(&'a str),
}

/// A utility trait for use in [`UnifiedDiff`](super::UnifiedDiff).
pub trait ConsumeHunk {
/// The item this instance produces after consuming all hunks.
Expand Down Expand Up @@ -56,7 +69,7 @@ pub trait ConsumeHunk {
}

pub(super) mod _impl {
use super::{ConsumeHunk, ContextSize};
use super::{ConsumeHunk, ContextSize, NewlineSeparator};
use bstr::{ByteSlice, ByteVec};
use imara_diff::{intern, Sink};
use intern::{InternedInput, Interner, Token};
Expand Down Expand Up @@ -86,7 +99,7 @@ pub(super) mod _impl {
buffer: Vec<u8>,
header_buf: String,
delegate: D,
newline: &'a str,
newline: NewlineSeparator<'a>,

err: Option<std::io::Error>,
}
Expand All @@ -101,11 +114,11 @@ pub(super) mod _impl {
/// `context_size` is the amount of lines around each hunk which will be passed
///to `consume_hunk`.
///
/// `consume_hunk` is called for each hunk in unified-diff format, as created from each line separated by `newline_separator`,
/// `consume_hunk` is called for each hunk in unified-diff format, as created from each line separated by `newline_separator`.
pub fn new(
input: &'a InternedInput<T>,
consume_hunk: D,
newline_separator: &'a str,
newline_separator: NewlineSeparator<'a>,
context_size: ContextSize,
) -> Self {
Self {
Expand All @@ -130,8 +143,18 @@ pub(super) mod _impl {
fn print_tokens(&mut self, tokens: &[Token], prefix: char) {
for &token in tokens {
self.buffer.push_char(prefix);
self.buffer.push_str(&self.interner[token]);
self.buffer.push_str(self.newline.as_bytes());
let line = &self.interner[token];
self.buffer.push_str(line);
match self.newline {
NewlineSeparator::AfterHeaderAndLine(nl) => {
self.buffer.push_str(nl);
}
NewlineSeparator::AfterHeaderAndWhenNeeded(nl) => {
if !line.as_ref().ends_with_str(nl) {
self.buffer.push_str(nl);
}
}
}
}
}

Expand All @@ -153,7 +176,11 @@ pub(super) mod _impl {
self.before_hunk_len,
self.after_hunk_start + 1,
self.after_hunk_len,
nl = self.newline
nl = match self.newline {
NewlineSeparator::AfterHeaderAndLine(nl) | NewlineSeparator::AfterHeaderAndWhenNeeded(nl) => {
nl
}
}
),
)
.map_err(|err| std::io::Error::new(ErrorKind::Other, err))?;
Expand Down
226 changes: 187 additions & 39 deletions gix-diff/tests/diff/blob/unified_diff.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use gix_diff::blob::unified_diff::{ConsumeHunk, ContextSize};
use gix_diff::blob::unified_diff::{ConsumeHunk, ContextSize, NewlineSeparator};
use gix_diff::blob::{Algorithm, UnifiedDiff};

#[test]
Expand All @@ -10,7 +10,12 @@ fn removed_modified_added() -> crate::Result {
let actual = gix_diff::blob::diff(
Algorithm::Myers,
&interner,
UnifiedDiff::new(&interner, String::new(), "\n", ContextSize::symmetrical(3)),
UnifiedDiff::new(
&interner,
String::new(),
NewlineSeparator::AfterHeaderAndLine("\n"),
ContextSize::symmetrical(3),
),
)?;

// merged by context.
Expand All @@ -34,7 +39,12 @@ fn removed_modified_added() -> crate::Result {
let actual = gix_diff::blob::diff(
Algorithm::Myers,
&interner,
UnifiedDiff::new(&interner, String::new(), "\n", ContextSize::symmetrical(1)),
UnifiedDiff::new(
&interner,
String::new(),
NewlineSeparator::AfterHeaderAndLine("\n"),
ContextSize::symmetrical(1),
),
)?;
// Small context lines keeps hunks separate.
insta::assert_snapshot!(actual, @r"
Expand All @@ -55,7 +65,12 @@ fn removed_modified_added() -> crate::Result {
let actual = gix_diff::blob::diff(
Algorithm::Myers,
&interner,
UnifiedDiff::new(&interner, String::new(), "\n", ContextSize::symmetrical(0)),
UnifiedDiff::new(
&interner,
String::new(),
NewlineSeparator::AfterHeaderAndLine("\n"),
ContextSize::symmetrical(0),
),
)?;
// No context is also fine
insta::assert_snapshot!(actual, @r"
Expand All @@ -69,41 +84,15 @@ fn removed_modified_added() -> crate::Result {
+twelve
");

#[derive(Default)]
struct Recorder {
#[allow(clippy::type_complexity)]
hunks: Vec<((u32, u32), (u32, u32), String)>,
}

impl ConsumeHunk for Recorder {
type Out = Vec<((u32, u32), (u32, u32), String)>;

fn consume_hunk(
&mut self,
before_hunk_start: u32,
before_hunk_len: u32,
after_hunk_start: u32,
after_hunk_len: u32,
header: &str,
_hunk: &[u8],
) -> std::io::Result<()> {
self.hunks.push((
(before_hunk_start, before_hunk_len),
(after_hunk_start, after_hunk_len),
header.to_string(),
));
Ok(())
}

fn finish(self) -> Self::Out {
self.hunks
}
}

let actual = gix_diff::blob::diff(
Algorithm::Myers,
&interner,
UnifiedDiff::new(&interner, Recorder::default(), "\n", ContextSize::symmetrical(1)),
UnifiedDiff::new(
&interner,
Recorder::default(),
NewlineSeparator::AfterHeaderAndLine("\n"),
ContextSize::symmetrical(1),
),
)?;
assert_eq!(
actual,
Expand All @@ -117,6 +106,119 @@ fn removed_modified_added() -> crate::Result {
Ok(())
}

#[test]
fn removed_modified_added_with_newlines_in_tokens() -> crate::Result {
let a = "1\n2\n3\n4\n5\n6\n7\n8\n9\n10";
let b = "2\n3\n4\n5\nsix\n7\n8\n9\n10\neleven\ntwelve";

let a = gix_diff::blob::sources::lines_with_terminator(a);
let b = gix_diff::blob::sources::lines_with_terminator(b);
let interner = gix_diff::blob::intern::InternedInput::new(a, b);
let actual = gix_diff::blob::diff(
Algorithm::Myers,
&interner,
UnifiedDiff::new(
&interner,
String::new(),
NewlineSeparator::AfterHeaderAndWhenNeeded("\n"),
ContextSize::symmetrical(3),
),
)?;

// merged by context.
// newline diffs differently.
insta::assert_snapshot!(actual, @r"
@@ -1,10 +1,11 @@
-1
2
3
4
5
-6
+six
7
8
9
-10
+10
+eleven
+twelve
");

let actual = gix_diff::blob::diff(
Algorithm::Myers,
&interner,
UnifiedDiff::new(
&interner,
String::new(),
NewlineSeparator::AfterHeaderAndWhenNeeded("\n"),
ContextSize::symmetrical(1),
),
)?;
// Small context lines keeps hunks separate.
insta::assert_snapshot!(actual, @r"
@@ -1,2 +1,1 @@
-1
2
@@ -5,3 +4,3 @@
5
-6
+six
7
@@ -9,2 +8,4 @@
9
-10
+10
+eleven
+twelve
");

let actual = gix_diff::blob::diff(
Algorithm::Myers,
&interner,
UnifiedDiff::new(
&interner,
String::new(),
NewlineSeparator::AfterHeaderAndWhenNeeded("\n"),
ContextSize::symmetrical(0),
),
)?;
// No context is also fine
insta::assert_snapshot!(actual, @r"
@@ -1,1 +1,0 @@
-1
@@ -6,1 +5,1 @@
-6
+six
@@ -10,1 +9,3 @@
-10
+10
+eleven
+twelve
");

let actual = gix_diff::blob::diff(
Algorithm::Myers,
&interner,
UnifiedDiff::new(
&interner,
Recorder::default(),
NewlineSeparator::AfterHeaderAndWhenNeeded("\r\n"),
ContextSize::symmetrical(1),
),
)?;
assert_eq!(
actual,
&[
((1, 2), (1, 1), "@@ -1,2 +1,1 @@\r\n".to_string()),
((5, 3), (4, 3), "@@ -5,3 +4,3 @@\r\n".into()),
((9, 2), (8, 4), "@@ -9,2 +8,4 @@\r\n".into())
]
);

Ok(())
}

#[test]
fn all_added_or_removed() -> crate::Result {
let content = "1\n2\n3\n4\n5";
Expand All @@ -127,7 +229,12 @@ fn all_added_or_removed() -> crate::Result {
let actual = gix_diff::blob::diff(
Algorithm::Myers,
&interner,
UnifiedDiff::new(&interner, String::new(), "\n", ContextSize::symmetrical(context_lines)),
UnifiedDiff::new(
&interner,
String::new(),
NewlineSeparator::AfterHeaderAndLine("\n"),
ContextSize::symmetrical(context_lines),
),
)?;
assert_eq!(
actual,
Expand All @@ -147,7 +254,12 @@ fn all_added_or_removed() -> crate::Result {
let actual = gix_diff::blob::diff(
Algorithm::Myers,
&interner,
UnifiedDiff::new(&interner, String::new(), "\n", ContextSize::symmetrical(context_lines)),
UnifiedDiff::new(
&interner,
String::new(),
NewlineSeparator::AfterHeaderAndLine("\n"),
ContextSize::symmetrical(context_lines),
),
)?;
assert_eq!(
actual,
Expand All @@ -170,9 +282,45 @@ fn empty() -> crate::Result {
let actual = gix_diff::blob::diff(
Algorithm::Myers,
&interner,
UnifiedDiff::new(&interner, String::new(), "\n", ContextSize::symmetrical(3)),
UnifiedDiff::new(
&interner,
String::new(),
NewlineSeparator::AfterHeaderAndLine("\n"),
ContextSize::symmetrical(3),
),
)?;

insta::assert_snapshot!(actual, @r"");
Ok(())
}

#[derive(Default)]
struct Recorder {
#[allow(clippy::type_complexity)]
hunks: Vec<((u32, u32), (u32, u32), String)>,
}

impl ConsumeHunk for Recorder {
type Out = Vec<((u32, u32), (u32, u32), String)>;

fn consume_hunk(
&mut self,
before_hunk_start: u32,
before_hunk_len: u32,
after_hunk_start: u32,
after_hunk_len: u32,
header: &str,
_hunk: &[u8],
) -> std::io::Result<()> {
self.hunks.push((
(before_hunk_start, before_hunk_len),
(after_hunk_start, after_hunk_len),
header.to_string(),
));
Ok(())
}

fn finish(self) -> Self::Out {
self.hunks
}
}
Loading
Loading