Skip to content

Commit daa6d4a

Browse files
authored
Merge pull request #1820 from GitoxideLabs/improvements
various improvements
2 parents 14d6b8d + 1091e29 commit daa6d4a

File tree

4 files changed

+237
-61
lines changed

4 files changed

+237
-61
lines changed

gix-diff/src/blob/unified_diff.rs

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,19 @@ impl ContextSize {
2626
}
2727
}
2828

29+
/// Specify where to put a newline.
30+
#[derive(Debug, Copy, Clone)]
31+
pub enum NewlineSeparator<'a> {
32+
/// Place the given newline separator, like `\n`, after each patch header as well as after each line.
33+
/// This is the right choice if tokens don't include newlines.
34+
AfterHeaderAndLine(&'a str),
35+
/// Place the given newline separator, like `\n`, only after each patch header or if a line doesn't contain a newline.
36+
/// This is the right choice if tokens do include newlines.
37+
/// Note that diff-tokens *with* newlines may diff strangely at the end of files when lines have been appended,
38+
/// as it will make the last line look like it changed just because the whitespace at the end 'changed'.
39+
AfterHeaderAndWhenNeeded(&'a str),
40+
}
41+
2942
/// A utility trait for use in [`UnifiedDiff`](super::UnifiedDiff).
3043
pub trait ConsumeHunk {
3144
/// The item this instance produces after consuming all hunks.
@@ -56,7 +69,7 @@ pub trait ConsumeHunk {
5669
}
5770

5871
pub(super) mod _impl {
59-
use super::{ConsumeHunk, ContextSize};
72+
use super::{ConsumeHunk, ContextSize, NewlineSeparator};
6073
use bstr::{ByteSlice, ByteVec};
6174
use imara_diff::{intern, Sink};
6275
use intern::{InternedInput, Interner, Token};
@@ -86,7 +99,7 @@ pub(super) mod _impl {
8699
buffer: Vec<u8>,
87100
header_buf: String,
88101
delegate: D,
89-
newline: &'a str,
102+
newline: NewlineSeparator<'a>,
90103

91104
err: Option<std::io::Error>,
92105
}
@@ -101,11 +114,11 @@ pub(super) mod _impl {
101114
/// `context_size` is the amount of lines around each hunk which will be passed
102115
///to `consume_hunk`.
103116
///
104-
/// `consume_hunk` is called for each hunk in unified-diff format, as created from each line separated by `newline_separator`,
117+
/// `consume_hunk` is called for each hunk in unified-diff format, as created from each line separated by `newline_separator`.
105118
pub fn new(
106119
input: &'a InternedInput<T>,
107120
consume_hunk: D,
108-
newline_separator: &'a str,
121+
newline_separator: NewlineSeparator<'a>,
109122
context_size: ContextSize,
110123
) -> Self {
111124
Self {
@@ -130,8 +143,18 @@ pub(super) mod _impl {
130143
fn print_tokens(&mut self, tokens: &[Token], prefix: char) {
131144
for &token in tokens {
132145
self.buffer.push_char(prefix);
133-
self.buffer.push_str(&self.interner[token]);
134-
self.buffer.push_str(self.newline.as_bytes());
146+
let line = &self.interner[token];
147+
self.buffer.push_str(line);
148+
match self.newline {
149+
NewlineSeparator::AfterHeaderAndLine(nl) => {
150+
self.buffer.push_str(nl);
151+
}
152+
NewlineSeparator::AfterHeaderAndWhenNeeded(nl) => {
153+
if !line.as_ref().ends_with_str(nl) {
154+
self.buffer.push_str(nl);
155+
}
156+
}
157+
}
135158
}
136159
}
137160

@@ -153,7 +176,11 @@ pub(super) mod _impl {
153176
self.before_hunk_len,
154177
self.after_hunk_start + 1,
155178
self.after_hunk_len,
156-
nl = self.newline
179+
nl = match self.newline {
180+
NewlineSeparator::AfterHeaderAndLine(nl) | NewlineSeparator::AfterHeaderAndWhenNeeded(nl) => {
181+
nl
182+
}
183+
}
157184
),
158185
)
159186
.map_err(|err| std::io::Error::new(ErrorKind::Other, err))?;

gix-diff/tests/diff/blob/unified_diff.rs

Lines changed: 187 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use gix_diff::blob::unified_diff::{ConsumeHunk, ContextSize};
1+
use gix_diff::blob::unified_diff::{ConsumeHunk, ContextSize, NewlineSeparator};
22
use gix_diff::blob::{Algorithm, UnifiedDiff};
33

44
#[test]
@@ -10,7 +10,12 @@ fn removed_modified_added() -> crate::Result {
1010
let actual = gix_diff::blob::diff(
1111
Algorithm::Myers,
1212
&interner,
13-
UnifiedDiff::new(&interner, String::new(), "\n", ContextSize::symmetrical(3)),
13+
UnifiedDiff::new(
14+
&interner,
15+
String::new(),
16+
NewlineSeparator::AfterHeaderAndLine("\n"),
17+
ContextSize::symmetrical(3),
18+
),
1419
)?;
1520

1621
// merged by context.
@@ -34,7 +39,12 @@ fn removed_modified_added() -> crate::Result {
3439
let actual = gix_diff::blob::diff(
3540
Algorithm::Myers,
3641
&interner,
37-
UnifiedDiff::new(&interner, String::new(), "\n", ContextSize::symmetrical(1)),
42+
UnifiedDiff::new(
43+
&interner,
44+
String::new(),
45+
NewlineSeparator::AfterHeaderAndLine("\n"),
46+
ContextSize::symmetrical(1),
47+
),
3848
)?;
3949
// Small context lines keeps hunks separate.
4050
insta::assert_snapshot!(actual, @r"
@@ -55,7 +65,12 @@ fn removed_modified_added() -> crate::Result {
5565
let actual = gix_diff::blob::diff(
5666
Algorithm::Myers,
5767
&interner,
58-
UnifiedDiff::new(&interner, String::new(), "\n", ContextSize::symmetrical(0)),
68+
UnifiedDiff::new(
69+
&interner,
70+
String::new(),
71+
NewlineSeparator::AfterHeaderAndLine("\n"),
72+
ContextSize::symmetrical(0),
73+
),
5974
)?;
6075
// No context is also fine
6176
insta::assert_snapshot!(actual, @r"
@@ -69,41 +84,15 @@ fn removed_modified_added() -> crate::Result {
6984
+twelve
7085
");
7186

72-
#[derive(Default)]
73-
struct Recorder {
74-
#[allow(clippy::type_complexity)]
75-
hunks: Vec<((u32, u32), (u32, u32), String)>,
76-
}
77-
78-
impl ConsumeHunk for Recorder {
79-
type Out = Vec<((u32, u32), (u32, u32), String)>;
80-
81-
fn consume_hunk(
82-
&mut self,
83-
before_hunk_start: u32,
84-
before_hunk_len: u32,
85-
after_hunk_start: u32,
86-
after_hunk_len: u32,
87-
header: &str,
88-
_hunk: &[u8],
89-
) -> std::io::Result<()> {
90-
self.hunks.push((
91-
(before_hunk_start, before_hunk_len),
92-
(after_hunk_start, after_hunk_len),
93-
header.to_string(),
94-
));
95-
Ok(())
96-
}
97-
98-
fn finish(self) -> Self::Out {
99-
self.hunks
100-
}
101-
}
102-
10387
let actual = gix_diff::blob::diff(
10488
Algorithm::Myers,
10589
&interner,
106-
UnifiedDiff::new(&interner, Recorder::default(), "\n", ContextSize::symmetrical(1)),
90+
UnifiedDiff::new(
91+
&interner,
92+
Recorder::default(),
93+
NewlineSeparator::AfterHeaderAndLine("\n"),
94+
ContextSize::symmetrical(1),
95+
),
10796
)?;
10897
assert_eq!(
10998
actual,
@@ -117,6 +106,119 @@ fn removed_modified_added() -> crate::Result {
117106
Ok(())
118107
}
119108

109+
#[test]
110+
fn removed_modified_added_with_newlines_in_tokens() -> crate::Result {
111+
let a = "1\n2\n3\n4\n5\n6\n7\n8\n9\n10";
112+
let b = "2\n3\n4\n5\nsix\n7\n8\n9\n10\neleven\ntwelve";
113+
114+
let a = gix_diff::blob::sources::lines_with_terminator(a);
115+
let b = gix_diff::blob::sources::lines_with_terminator(b);
116+
let interner = gix_diff::blob::intern::InternedInput::new(a, b);
117+
let actual = gix_diff::blob::diff(
118+
Algorithm::Myers,
119+
&interner,
120+
UnifiedDiff::new(
121+
&interner,
122+
String::new(),
123+
NewlineSeparator::AfterHeaderAndWhenNeeded("\n"),
124+
ContextSize::symmetrical(3),
125+
),
126+
)?;
127+
128+
// merged by context.
129+
// newline diffs differently.
130+
insta::assert_snapshot!(actual, @r"
131+
@@ -1,10 +1,11 @@
132+
-1
133+
2
134+
3
135+
4
136+
5
137+
-6
138+
+six
139+
7
140+
8
141+
9
142+
-10
143+
+10
144+
+eleven
145+
+twelve
146+
");
147+
148+
let actual = gix_diff::blob::diff(
149+
Algorithm::Myers,
150+
&interner,
151+
UnifiedDiff::new(
152+
&interner,
153+
String::new(),
154+
NewlineSeparator::AfterHeaderAndWhenNeeded("\n"),
155+
ContextSize::symmetrical(1),
156+
),
157+
)?;
158+
// Small context lines keeps hunks separate.
159+
insta::assert_snapshot!(actual, @r"
160+
@@ -1,2 +1,1 @@
161+
-1
162+
2
163+
@@ -5,3 +4,3 @@
164+
5
165+
-6
166+
+six
167+
7
168+
@@ -9,2 +8,4 @@
169+
9
170+
-10
171+
+10
172+
+eleven
173+
+twelve
174+
");
175+
176+
let actual = gix_diff::blob::diff(
177+
Algorithm::Myers,
178+
&interner,
179+
UnifiedDiff::new(
180+
&interner,
181+
String::new(),
182+
NewlineSeparator::AfterHeaderAndWhenNeeded("\n"),
183+
ContextSize::symmetrical(0),
184+
),
185+
)?;
186+
// No context is also fine
187+
insta::assert_snapshot!(actual, @r"
188+
@@ -1,1 +1,0 @@
189+
-1
190+
@@ -6,1 +5,1 @@
191+
-6
192+
+six
193+
@@ -10,1 +9,3 @@
194+
-10
195+
+10
196+
+eleven
197+
+twelve
198+
");
199+
200+
let actual = gix_diff::blob::diff(
201+
Algorithm::Myers,
202+
&interner,
203+
UnifiedDiff::new(
204+
&interner,
205+
Recorder::default(),
206+
NewlineSeparator::AfterHeaderAndWhenNeeded("\r\n"),
207+
ContextSize::symmetrical(1),
208+
),
209+
)?;
210+
assert_eq!(
211+
actual,
212+
&[
213+
((1, 2), (1, 1), "@@ -1,2 +1,1 @@\r\n".to_string()),
214+
((5, 3), (4, 3), "@@ -5,3 +4,3 @@\r\n".into()),
215+
((9, 2), (8, 4), "@@ -9,2 +8,4 @@\r\n".into())
216+
]
217+
);
218+
219+
Ok(())
220+
}
221+
120222
#[test]
121223
fn all_added_or_removed() -> crate::Result {
122224
let content = "1\n2\n3\n4\n5";
@@ -127,7 +229,12 @@ fn all_added_or_removed() -> crate::Result {
127229
let actual = gix_diff::blob::diff(
128230
Algorithm::Myers,
129231
&interner,
130-
UnifiedDiff::new(&interner, String::new(), "\n", ContextSize::symmetrical(context_lines)),
232+
UnifiedDiff::new(
233+
&interner,
234+
String::new(),
235+
NewlineSeparator::AfterHeaderAndLine("\n"),
236+
ContextSize::symmetrical(context_lines),
237+
),
131238
)?;
132239
assert_eq!(
133240
actual,
@@ -147,7 +254,12 @@ fn all_added_or_removed() -> crate::Result {
147254
let actual = gix_diff::blob::diff(
148255
Algorithm::Myers,
149256
&interner,
150-
UnifiedDiff::new(&interner, String::new(), "\n", ContextSize::symmetrical(context_lines)),
257+
UnifiedDiff::new(
258+
&interner,
259+
String::new(),
260+
NewlineSeparator::AfterHeaderAndLine("\n"),
261+
ContextSize::symmetrical(context_lines),
262+
),
151263
)?;
152264
assert_eq!(
153265
actual,
@@ -170,9 +282,45 @@ fn empty() -> crate::Result {
170282
let actual = gix_diff::blob::diff(
171283
Algorithm::Myers,
172284
&interner,
173-
UnifiedDiff::new(&interner, String::new(), "\n", ContextSize::symmetrical(3)),
285+
UnifiedDiff::new(
286+
&interner,
287+
String::new(),
288+
NewlineSeparator::AfterHeaderAndLine("\n"),
289+
ContextSize::symmetrical(3),
290+
),
174291
)?;
175292

176293
insta::assert_snapshot!(actual, @r"");
177294
Ok(())
178295
}
296+
297+
#[derive(Default)]
298+
struct Recorder {
299+
#[allow(clippy::type_complexity)]
300+
hunks: Vec<((u32, u32), (u32, u32), String)>,
301+
}
302+
303+
impl ConsumeHunk for Recorder {
304+
type Out = Vec<((u32, u32), (u32, u32), String)>;
305+
306+
fn consume_hunk(
307+
&mut self,
308+
before_hunk_start: u32,
309+
before_hunk_len: u32,
310+
after_hunk_start: u32,
311+
after_hunk_len: u32,
312+
header: &str,
313+
_hunk: &[u8],
314+
) -> std::io::Result<()> {
315+
self.hunks.push((
316+
(before_hunk_start, before_hunk_len),
317+
(after_hunk_start, after_hunk_len),
318+
header.to_string(),
319+
));
320+
Ok(())
321+
}
322+
323+
fn finish(self) -> Self::Out {
324+
self.hunks
325+
}
326+
}

0 commit comments

Comments
 (0)