Skip to content

Commit 4d0f39c

Browse files
author
Junio C Hamano
committed
diffcore-break: similarity estimator fix.
This is a companion patch to the previous fix to diffcore-rename. The merging-back process should use a logic similar to what is used there. Signed-off-by: Junio C Hamano <[email protected]>
1 parent f5948cf commit 4d0f39c

File tree

2 files changed

+28
-21
lines changed

2 files changed

+28
-21
lines changed

diffcore-break.c

Lines changed: 26 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,8 @@ static int should_break(struct diff_filespec *src,
4545
* The value we return is 1 if we want the pair to be broken,
4646
* or 0 if we do not.
4747
*/
48-
unsigned long delta_size, base_size, src_copied, literal_added;
49-
int to_break = 0;
48+
unsigned long delta_size, base_size, src_copied, literal_added,
49+
src_removed;
5050

5151
*merge_score_p = 0; /* assume no deletion --- "do not break"
5252
* is the default.
@@ -72,33 +72,40 @@ static int should_break(struct diff_filespec *src,
7272
&src_copied, &literal_added))
7373
return 0;
7474

75+
/* sanity */
76+
if (src->size < src_copied)
77+
src_copied = src->size;
78+
if (dst->size < literal_added + src_copied) {
79+
if (src_copied < dst->size)
80+
literal_added = dst->size - src_copied;
81+
else
82+
literal_added = 0;
83+
}
84+
src_removed = src->size - src_copied;
85+
7586
/* Compute merge-score, which is "how much is removed
7687
* from the source material". The clean-up stage will
7788
* merge the surviving pair together if the score is
7889
* less than the minimum, after rename/copy runs.
7990
*/
80-
if (src->size <= src_copied)
81-
; /* all copied, nothing removed */
82-
else {
83-
delta_size = src->size - src_copied;
84-
*merge_score_p = delta_size * MAX_SCORE / src->size;
85-
}
86-
91+
*merge_score_p = src_removed * MAX_SCORE / src->size;
92+
8793
/* Extent of damage, which counts both inserts and
8894
* deletes.
8995
*/
90-
if (src->size + literal_added <= src_copied)
91-
delta_size = 0; /* avoid wrapping around */
92-
else
93-
delta_size = (src->size - src_copied) + literal_added;
94-
95-
/* We break if the edit exceeds the minimum.
96-
* i.e. (break_score / MAX_SCORE < delta_size / base_size)
96+
delta_size = src_removed + literal_added;
97+
if (delta_size * MAX_SCORE / base_size < break_score)
98+
return 0;
99+
100+
/* If you removed a lot without adding new material, that is
101+
* not really a rewrite.
97102
*/
98-
if (break_score * base_size < delta_size * MAX_SCORE)
99-
to_break = 1;
103+
if ((src->size * break_score < src_removed * MAX_SCORE) &&
104+
(literal_added * 20 < src_removed) &&
105+
(literal_added * 20 < src_copied))
106+
return 0;
100107

101-
return to_break;
108+
return 1;
102109
}
103110

104111
void diffcore_break(int break_score)

diffcore.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@
1717
*/
1818
#define MAX_SCORE 60000.0
1919
#define DEFAULT_RENAME_SCORE 30000 /* rename/copy similarity minimum (50%) */
20-
#define DEFAULT_BREAK_SCORE 30000 /* minimum for break to happen (50%)*/
21-
#define DEFAULT_MERGE_SCORE 48000 /* maximum for break-merge to happen (80%)*/
20+
#define DEFAULT_BREAK_SCORE 30000 /* minimum for break to happen (50%) */
21+
#define DEFAULT_MERGE_SCORE 36000 /* maximum for break-merge to happen 60%) */
2222

2323
#define MINIMUM_BREAK_SIZE 400 /* do not break a file smaller than this */
2424

0 commit comments

Comments
 (0)