Skip to content

Commit f345b0a

Browse files
Junio C HamanoLinus Torvalds
authored andcommitted
[PATCH] Add -B flag to diff-* brothers.
A new diffcore transformation, diffcore-break.c, is introduced. When the -B flag is given, a patch that represents a complete rewrite is broken into a deletion followed by a creation. This makes it easier to review such a complete rewrite patch. The -B flag takes the same syntax as the -M and -C flags to specify the minimum amount of non-source material the resulting file needs to have to be considered a complete rewrite, and defaults to 99% if not specified. As the new test t4008-diff-break-rewrite.sh demonstrates, if a file is a complete rewrite, it is broken into a delete/create pair, which can further be subjected to the usual rename detection if -M or -C is used. For example, if file0 gets completely rewritten to make it as if it were rather based on file1 which itself disappeared, the following happens: The original change looks like this: file0 --> file0' (quite different from file0) file1 --> /dev/null After diffcore-break runs, it would become this: file0 --> /dev/null /dev/null --> file0' file1 --> /dev/null Then diffcore-rename matches them up: file1 --> file0' The internal score values are finer grained now. Earlier maximum of 10000 has been raised to 60000; there is no user visible changes but there is no reason to waste available bits. Signed-off-by: Junio C Hamano <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 2cd6888 commit f345b0a

13 files changed

+433
-29
lines changed

Documentation/git-diff-cache.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ git-diff-cache - Compares content and mode of blobs between the cache and reposi
99

1010
SYNOPSIS
1111
--------
12-
'git-diff-cache' [-p] [-r] [-z] [-m] [-M] [-R] [-C] [-S<string>] [--pickaxe-all] [--cached] <tree-ish> [<path>...]
12+
'git-diff-cache' [-p] [-r] [-z] [-m] [-B] [-M] [-R] [-C] [-S<string>] [--pickaxe-all] [--cached] <tree-ish> [<path>...]
1313

1414
DESCRIPTION
1515
-----------
@@ -35,6 +35,9 @@ OPTIONS
3535
-z::
3636
\0 line termination on output
3737

38+
-B::
39+
Break complete rewrite changes into pairs of delete and create.
40+
3841
-M::
3942
Detect renames.
4043

Documentation/git-diff-files.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ git-diff-files - Compares files in the working tree and the cache
99

1010
SYNOPSIS
1111
--------
12-
'git-diff-files' [-p] [-q] [-r] [-z] [-M] [-C] [-R] [-S<string>] [--pickaxe-all] [<pattern>...]
12+
'git-diff-files' [-p] [-q] [-r] [-z] [-B] [-M] [-C] [-R] [-S<string>] [--pickaxe-all] [<pattern>...]
1313

1414
DESCRIPTION
1515
-----------
@@ -29,6 +29,9 @@ OPTIONS
2929
-R::
3030
Output diff in reverse.
3131

32+
-B::
33+
Break complete rewrite changes into pairs of delete and create.
34+
3235
-M::
3336
Detect renames.
3437

Documentation/git-diff-tree.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ git-diff-tree - Compares the content and mode of blobs found via two tree object
99

1010
SYNOPSIS
1111
--------
12-
'git-diff-tree' [-p] [-r] [-z] [--stdin] [-M] [-R] [-C] [-S<string>] [--pickaxe-all] [-m] [-s] [-v] [-t] <tree-ish> <tree-ish> [<pattern>]\*
12+
'git-diff-tree' [-p] [-r] [-z] [--stdin] [-B] [-M] [-R] [-C] [-S<string>] [--pickaxe-all] [-m] [-s] [-v] [-t] <tree-ish> <tree-ish> [<pattern>]\*
1313

1414
DESCRIPTION
1515
-----------
@@ -33,6 +33,9 @@ OPTIONS
3333
generate patch (see section on generating patches). For
3434
git-diff-tree, this flag implies '-r' as well.
3535

36+
-B::
37+
Break complete rewrite changes into pairs of delete and create.
38+
3639
-M::
3740
Detect renames.
3841

Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ LIB_OBJS += strbuf.o
4848

4949
LIB_H += diff.h count-delta.h
5050
LIB_OBJS += diff.o diffcore-rename.o diffcore-pickaxe.o diffcore-pathspec.o \
51-
count-delta.o
51+
count-delta.o diffcore-break.o
5252

5353
LIB_OBJS += gitenv.o
5454

@@ -130,6 +130,7 @@ diff.o: $(LIB_H) diffcore.h
130130
diffcore-rename.o : $(LIB_H) diffcore.h
131131
diffcore-pathspec.o : $(LIB_H) diffcore.h
132132
diffcore-pickaxe.o : $(LIB_H) diffcore.h
133+
diffcore-break.o : $(LIB_H) diffcore.h
133134

134135
test: all
135136
$(MAKE) -C t/ all

diff-cache.c

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ static int diff_setup_opt = 0;
99
static int diff_score_opt = 0;
1010
static const char *pickaxe = NULL;
1111
static int pickaxe_opts = 0;
12+
static int diff_break_opt = -1;
1213

1314
/* A file entry went away or appeared */
1415
static void show_file(const char *prefix, struct cache_entry *ce, unsigned char *sha1, unsigned int mode)
@@ -188,6 +189,10 @@ int main(int argc, const char **argv)
188189
diff_output_format = DIFF_FORMAT_PATCH;
189190
continue;
190191
}
192+
if (!strncmp(arg, "-B", 2)) {
193+
diff_break_opt = diff_scoreopt_parse(arg);
194+
continue;
195+
}
191196
if (!strncmp(arg, "-M", 2)) {
192197
detect_rename = DIFF_DETECT_RENAME;
193198
diff_score_opt = diff_scoreopt_parse(arg);
@@ -240,9 +245,11 @@ int main(int argc, const char **argv)
240245
die("unable to read tree object %s", tree_name);
241246

242247
ret = diff_cache(active_cache, active_nr);
243-
diffcore_std(pathspec,
248+
249+
diffcore_std(pathspec ? : NULL,
244250
detect_rename, diff_score_opt,
245-
pickaxe, pickaxe_opts);
251+
pickaxe, pickaxe_opts,
252+
diff_break_opt);
246253
diff_flush(diff_output_format, 1);
247254
return ret;
248255
}

diff-files.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ static int diff_setup_opt = 0;
1515
static int diff_score_opt = 0;
1616
static const char *pickaxe = NULL;
1717
static int pickaxe_opts = 0;
18+
static int diff_break_opt = -1;
1819
static int silent = 0;
1920

2021
static void show_unmerge(const char *path)
@@ -57,6 +58,8 @@ int main(int argc, const char **argv)
5758
pickaxe = argv[1] + 2;
5859
else if (!strcmp(argv[1], "--pickaxe-all"))
5960
pickaxe_opts = DIFF_PICKAXE_ALL;
61+
else if (!strncmp(argv[1], "-B", 2))
62+
diff_break_opt = diff_scoreopt_parse(argv[1]);
6063
else if (!strncmp(argv[1], "-M", 2)) {
6164
diff_score_opt = diff_scoreopt_parse(argv[1]);
6265
detect_rename = DIFF_DETECT_RENAME;
@@ -116,9 +119,10 @@ int main(int argc, const char **argv)
116119
show_modified(oldmode, mode, ce->sha1, null_sha1,
117120
ce->name);
118121
}
119-
diffcore_std(argv + 1,
122+
diffcore_std((1 < argc) ? argv + 1 : NULL,
120123
detect_rename, diff_score_opt,
121-
pickaxe, pickaxe_opts);
124+
pickaxe, pickaxe_opts,
125+
diff_break_opt);
122126
diff_flush(diff_output_format, 1);
123127
return 0;
124128
}

diff-tree.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ static int diff_setup_opt = 0;
1414
static int diff_score_opt = 0;
1515
static const char *pickaxe = NULL;
1616
static int pickaxe_opts = 0;
17+
static int diff_break_opt = -1;
1718
static const char *header = NULL;
1819
static const char *header_prefix = "";
1920

@@ -263,7 +264,8 @@ static int call_diff_flush(void)
263264
{
264265
diffcore_std(0,
265266
detect_rename, diff_score_opt,
266-
pickaxe, pickaxe_opts);
267+
pickaxe, pickaxe_opts,
268+
diff_break_opt);
267269
if (diff_queue_is_empty()) {
268270
diff_flush(DIFF_FORMAT_NO_OUTPUT, 0);
269271
return 0;
@@ -523,6 +525,10 @@ int main(int argc, const char **argv)
523525
diff_score_opt = diff_scoreopt_parse(arg);
524526
continue;
525527
}
528+
if (!strncmp(arg, "-B", 2)) {
529+
diff_break_opt = diff_scoreopt_parse(arg);
530+
continue;
531+
}
526532
if (!strcmp(arg, "-z")) {
527533
diff_output_format = DIFF_FORMAT_MACHINE;
528534
continue;

diff.c

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -603,6 +603,7 @@ struct diff_filepair *diff_queue(struct diff_queue_struct *queue,
603603
dp->two = two;
604604
dp->score = 0;
605605
dp->source_stays = 0;
606+
dp->broken_pair = 0;
606607
diff_q(queue, dp);
607608
return dp;
608609
}
@@ -637,6 +638,16 @@ static void diff_flush_raw(struct diff_filepair *p,
637638
sprintf(status, "%c%03d", p->status,
638639
(int)(0.5 + p->score * 100.0/MAX_SCORE));
639640
break;
641+
case 'N': case 'D':
642+
two_paths = 0;
643+
if (p->score)
644+
sprintf(status, "%c%03d", p->status,
645+
(int)(0.5 + p->score * 100.0/MAX_SCORE));
646+
else {
647+
status[0] = p->status;
648+
status[1] = 0;
649+
}
650+
break;
640651
default:
641652
two_paths = 0;
642653
status[0] = p->status;
@@ -760,8 +771,9 @@ void diff_debug_filepair(const struct diff_filepair *p, int i)
760771
{
761772
diff_debug_filespec(p->one, i, "one");
762773
diff_debug_filespec(p->two, i, "two");
763-
fprintf(stderr, "score %d, status %c source_stays %d\n",
764-
p->score, p->status ? : '?', p->source_stays);
774+
fprintf(stderr, "score %d, status %c stays %d broken %d\n",
775+
p->score, p->status ? : '?',
776+
p->source_stays, p->broken_pair);
765777
}
766778

767779
void diff_debug_queue(const char *msg, struct diff_queue_struct *q)
@@ -875,10 +887,13 @@ void diff_flush(int diff_output_style, int resolve_rename_copy)
875887

876888
void diffcore_std(const char **paths,
877889
int detect_rename, int rename_score,
878-
const char *pickaxe, int pickaxe_opts)
890+
const char *pickaxe, int pickaxe_opts,
891+
int break_opt)
879892
{
880893
if (paths && paths[0])
881894
diffcore_pathspec(paths);
895+
if (0 <= break_opt)
896+
diffcore_break(break_opt);
882897
if (detect_rename)
883898
diffcore_rename(detect_rename, rename_score);
884899
if (pickaxe)

diff.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,12 @@ extern void diffcore_pickaxe(const char *needle, int opts);
4343

4444
extern void diffcore_pathspec(const char **pathspec);
4545

46+
extern void diffcore_break(int);
47+
4648
extern void diffcore_std(const char **paths,
4749
int detect_rename, int rename_score,
48-
const char *pickaxe, int pickaxe_opts);
50+
const char *pickaxe, int pickaxe_opts,
51+
int break_opt);
4952

5053
extern int diff_queue_is_empty(void);
5154

diffcore-break.c

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
/*
2+
* Copyright (C) 2005 Junio C Hamano
3+
*/
4+
#include "cache.h"
5+
#include "diff.h"
6+
#include "diffcore.h"
7+
#include "delta.h"
8+
#include "count-delta.h"
9+
10+
static int very_different(struct diff_filespec *src,
11+
struct diff_filespec *dst,
12+
int min_score)
13+
{
14+
/* dst is recorded as a modification of src. Are they so
15+
* different that we are better off recording this as a pair
16+
* of delete and create? min_score is the minimum amount of
17+
* new material that must exist in the dst and not in src for
18+
* the pair to be considered a complete rewrite, and recommended
19+
* to be set to a very high value, 99% or so.
20+
*
21+
* The value we return represents the amount of new material
22+
* that is in dst and not in src. We return 0 when we do not
23+
* want to get the filepair broken.
24+
*/
25+
void *delta;
26+
unsigned long delta_size, base_size;
27+
28+
if (!S_ISREG(src->mode) || !S_ISREG(dst->mode))
29+
return 0; /* leave symlink rename alone */
30+
31+
if (diff_populate_filespec(src, 1) || diff_populate_filespec(dst, 1))
32+
return 0; /* error but caught downstream */
33+
34+
delta_size = ((src->size < dst->size) ?
35+
(dst->size - src->size) : (src->size - dst->size));
36+
37+
/* Notice that we use max of src and dst as the base size,
38+
* unlike rename similarity detection. This is so that we do
39+
* not mistake a large addition as a complete rewrite.
40+
*/
41+
base_size = ((src->size < dst->size) ? dst->size : src->size);
42+
43+
/*
44+
* If file size difference is too big compared to the
45+
* base_size, we declare this a complete rewrite.
46+
*/
47+
if (base_size * min_score < delta_size * MAX_SCORE)
48+
return MAX_SCORE;
49+
50+
if (diff_populate_filespec(src, 0) || diff_populate_filespec(dst, 0))
51+
return 0; /* error but caught downstream */
52+
53+
delta = diff_delta(src->data, src->size,
54+
dst->data, dst->size,
55+
&delta_size);
56+
57+
/* A delta that has a lot of literal additions would have
58+
* big delta_size no matter what else it does.
59+
*/
60+
if (base_size * min_score < delta_size * MAX_SCORE)
61+
return MAX_SCORE;
62+
63+
/* Estimate the edit size by interpreting delta. */
64+
delta_size = count_delta(delta, delta_size);
65+
free(delta);
66+
if (delta_size == UINT_MAX)
67+
return 0; /* error in delta computation */
68+
69+
if (base_size < delta_size)
70+
return MAX_SCORE;
71+
72+
return delta_size * MAX_SCORE / base_size;
73+
}
74+
75+
void diffcore_break(int min_score)
76+
{
77+
struct diff_queue_struct *q = &diff_queued_diff;
78+
struct diff_queue_struct outq;
79+
int i;
80+
81+
if (!min_score)
82+
min_score = DEFAULT_BREAK_SCORE;
83+
84+
outq.nr = outq.alloc = 0;
85+
outq.queue = NULL;
86+
87+
for (i = 0; i < q->nr; i++) {
88+
struct diff_filepair *p = q->queue[i];
89+
int score;
90+
91+
/* We deal only with in-place edit of non directory.
92+
* We do not break anything else.
93+
*/
94+
if (DIFF_FILE_VALID(p->one) && DIFF_FILE_VALID(p->two) &&
95+
!S_ISDIR(p->one->mode) && !S_ISDIR(p->two->mode) &&
96+
!strcmp(p->one->path, p->two->path)) {
97+
score = very_different(p->one, p->two, min_score);
98+
if (min_score <= score) {
99+
/* Split this into delete and create */
100+
struct diff_filespec *null_one, *null_two;
101+
struct diff_filepair *dp;
102+
103+
/* deletion of one */
104+
null_one = alloc_filespec(p->one->path);
105+
dp = diff_queue(&outq, p->one, null_one);
106+
dp->score = score;
107+
dp->broken_pair = 1;
108+
109+
/* creation of two */
110+
null_two = alloc_filespec(p->two->path);
111+
dp = diff_queue(&outq, null_two, p->two);
112+
dp->score = score;
113+
dp->broken_pair = 1;
114+
115+
free(p); /* not diff_free_filepair(), we are
116+
* reusing one and two here.
117+
*/
118+
continue;
119+
}
120+
}
121+
diff_q(&outq, p);
122+
}
123+
free(q->queue);
124+
*q = outq;
125+
126+
return;
127+
}

0 commit comments

Comments
 (0)