Skip to content

Commit f7c1512

Browse files
Junio C HamanoLinus Torvalds
authored andcommitted
[PATCH] Rename/copy detection fix.
The rename/copy detection logic in earlier round was only good enough to show patch output and discussion on the mailing list about the diff-raw format updates revealed many problems with it. This patch fixes all the ones known to me, without making things I want to do later impossible, mostly related to patch reordering. (1) Earlier rename/copy detector determined which one is rename and which one is copy too early, which made it impossible to later introduce diffcore transformers to reorder patches. This patch fixes it by moving that logic to the very end of the processing. (2) Earlier output routine diff_flush() was pruning all the "no-change" entries indiscriminatingly. This was done due to my false assumption that one of the requirements in the diff-raw output was not to show such an entry (which resulted in my incorrect comment about "diff-helper never being able to be equivalent to built-in diff driver"). My special thanks go to Linus for correcting me about this. When we produce diff-raw output, for the downstream to be able to tell renames from copies, sometimes it _is_ necessary to output "no-change" entries, and this patch adds diffcore_prune() function for doing it. (3) Earlier diff_filepair structure was trying to be not too specific about rename/copy operations, but the purpose of the structure was to record one or two paths, which _was_ indeed about rename/copy. This patch discards xfrm_msg field which was trying to be generic for this wrong reason, and introduces a couple of fields (rename_score and rename_rank) that are explicitly specific to rename/copy logic. One thing to note is that the information in a single diff_filepair structure _still_ does not distinguish renames from copies, and it is deliberately so. This is to allow patches to be reordered in later stages. (4) This patch also adds some tests about diff-raw format output and makes sure that necessary "no-change" entries appear on the output. Signed-off-by: Junio C Hamano <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 60896c7 commit f7c1512

File tree

7 files changed

+244
-138
lines changed

7 files changed

+244
-138
lines changed

diff.c

Lines changed: 98 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -283,12 +283,6 @@ int diff_populate_filespec(struct diff_filespec *s)
283283
return 0;
284284
}
285285

286-
void diff_free_filepair(struct diff_filepair *p)
287-
{
288-
free(p->xfrm_msg);
289-
free(p);
290-
}
291-
292286
void diff_free_filespec_data(struct diff_filespec *s)
293287
{
294288
if (s->should_free)
@@ -501,9 +495,9 @@ struct diff_filepair *diff_queue(struct diff_queue_struct *queue,
501495
struct diff_filepair *dp = xmalloc(sizeof(*dp));
502496
dp->one = one;
503497
dp->two = two;
504-
dp->xfrm_msg = NULL;
498+
dp->score = 0;
505499
dp->orig_order = queue->nr;
506-
dp->xfrm_work = 0;
500+
dp->rename_rank = 0;
507501
diff_q(queue, dp);
508502
return dp;
509503
}
@@ -522,23 +516,7 @@ static void diff_flush_raw(struct diff_filepair *p)
522516
p->two->path, line_termination);
523517
}
524518

525-
static void diff_flush_patch(struct diff_filepair *p)
526-
{
527-
const char *name, *other;
528-
529-
name = p->one->path;
530-
other = (strcmp(name, p->two->path) ? p->two->path : NULL);
531-
if ((DIFF_FILE_VALID(p->one) && S_ISDIR(p->one->mode)) ||
532-
(DIFF_FILE_VALID(p->two) && S_ISDIR(p->two->mode)))
533-
return; /* no tree diffs in patch format */
534-
535-
if (DIFF_PAIR_UNMERGED(p))
536-
run_external_diff(name, NULL, NULL, NULL, NULL);
537-
else
538-
run_external_diff(name, other, p->one, p->two, p->xfrm_msg);
539-
}
540-
541-
static int uninteresting(struct diff_filepair *p)
519+
int diff_unmodified_pair(struct diff_filepair *p)
542520
{
543521
/* This function is written stricter than necessary to support
544522
* the currently implemented transformers, but the idea is to
@@ -570,12 +548,70 @@ static int uninteresting(struct diff_filepair *p)
570548
return 0;
571549
}
572550

551+
static void diff_flush_patch(struct diff_filepair *p, const char *msg)
552+
{
553+
const char *name, *other;
554+
555+
/* diffcore_prune() keeps "stay" entries for diff-raw
556+
* copy/rename detection, but when we are generating
557+
* patches we do not need them.
558+
*/
559+
if (diff_unmodified_pair(p))
560+
return;
561+
562+
name = p->one->path;
563+
other = (strcmp(name, p->two->path) ? p->two->path : NULL);
564+
if ((DIFF_FILE_VALID(p->one) && S_ISDIR(p->one->mode)) ||
565+
(DIFF_FILE_VALID(p->two) && S_ISDIR(p->two->mode)))
566+
return; /* no tree diffs in patch format */
567+
568+
if (DIFF_PAIR_UNMERGED(p))
569+
run_external_diff(name, NULL, NULL, NULL, NULL);
570+
else
571+
run_external_diff(name, other, p->one, p->two, msg);
572+
}
573+
574+
int diff_needs_to_stay(struct diff_queue_struct *q, int i,
575+
struct diff_filespec *it)
576+
{
577+
/* If it will be used in later entry (either stay or used
578+
* as the source of rename/copy), we need to copy, not rename.
579+
*/
580+
while (i < q->nr) {
581+
struct diff_filepair *p = q->queue[i++];
582+
if (!DIFF_FILE_VALID(p->two))
583+
continue; /* removed is fine */
584+
if (strcmp(p->one->path, it->path))
585+
continue; /* not relevant */
586+
587+
/* p has its src set to *it and it is not a delete;
588+
* it will be used for in-place change, rename/copy,
589+
* or just stays there. We cannot rename it out.
590+
*/
591+
return 1;
592+
}
593+
return 0;
594+
}
595+
596+
static int diff_used_as_source(struct diff_queue_struct *q, int lim,
597+
struct diff_filespec *it)
598+
{
599+
int i;
600+
for (i = 0; i < lim; i++) {
601+
struct diff_filepair *p = q->queue[i++];
602+
if (!strcmp(p->one->path, it->path))
603+
return 1;
604+
}
605+
return 0;
606+
}
607+
573608
void diffcore_prune(void)
574609
{
575610
/*
576611
* Although rename/copy detection wants to have "no-change"
577612
* entries fed into them, the downstream do not need to see
578-
* them. This function removes such entries.
613+
* them, unless we had rename/copy for the same path earlier.
614+
* This function removes such entries.
579615
*
580616
* The applications that use rename/copy should:
581617
*
@@ -585,6 +621,7 @@ void diffcore_prune(void)
585621
* (3) call diffcore_prune
586622
* (4) call other diffcore_xxx that do not need to see
587623
* "no-change" entries.
624+
* (5) call diff_flush().
588625
*/
589626
struct diff_queue_struct *q = &diff_queued_diff;
590627
struct diff_queue_struct outq;
@@ -595,37 +632,29 @@ void diffcore_prune(void)
595632

596633
for (i = 0; i < q->nr; i++) {
597634
struct diff_filepair *p = q->queue[i];
598-
if (!uninteresting(p))
635+
if (!diff_unmodified_pair(p) ||
636+
diff_used_as_source(q, i, p->one))
599637
diff_q(&outq, p);
600638
else
601-
diff_free_filepair(p);
639+
free(p);
602640
}
603641
free(q->queue);
604642
*q = outq;
605643
return;
606644
}
607645

608-
static void diff_flush_one(struct diff_filepair *p)
646+
static void diff_flush_one(struct diff_filepair *p, const char *msg)
609647
{
610-
if (uninteresting(p))
611-
return;
612648
if (generate_patch)
613-
diff_flush_patch(p);
649+
diff_flush_patch(p, msg);
614650
else
615651
diff_flush_raw(p);
616652
}
617653

618654
int diff_queue_is_empty(void)
619655
{
620656
struct diff_queue_struct *q = &diff_queued_diff;
621-
int i;
622-
623-
for (i = 0; i < q->nr; i++) {
624-
struct diff_filepair *p = q->queue[i];
625-
if (!uninteresting(p))
626-
return 0;
627-
}
628-
return 1;
657+
return q->nr == 0;
629658
}
630659

631660
void diff_flush(int diff_output_style)
@@ -646,13 +675,35 @@ void diff_flush(int diff_output_style)
646675
generate_patch = 1;
647676
break;
648677
}
649-
for (i = 0; i < q->nr; i++)
650-
diff_flush_one(q->queue[i]);
678+
for (i = 0; i < q->nr; i++) {
679+
char msg_[PATH_MAX*2+200], *msg = NULL;
680+
struct diff_filepair *p = q->queue[i];
681+
if (strcmp(p->one->path, p->two->path)) {
682+
/* This is rename or copy. Which one is it? */
683+
if (diff_needs_to_stay(q, i+1, p->one)) {
684+
sprintf(msg_,
685+
"similarity index %d%%\n"
686+
"copy from %s\n"
687+
"copy to %s\n",
688+
(int)(0.5 + p->score * 100/MAX_SCORE),
689+
p->one->path, p->two->path);
690+
}
691+
else
692+
sprintf(msg_,
693+
"similarity index %d%%\n"
694+
"rename old %s\n"
695+
"rename new %s\n",
696+
(int)(0.5 + p->score * 100/MAX_SCORE),
697+
p->one->path, p->two->path);
698+
msg = msg_;
699+
}
700+
diff_flush_one(p, msg);
701+
}
702+
651703
for (i = 0; i < q->nr; i++) {
652704
struct diff_filepair *p = q->queue[i];
653705
diff_free_filespec_data(p->one);
654706
diff_free_filespec_data(p->two);
655-
free(p->xfrm_msg);
656707
free(p);
657708
}
658709
free(q->queue);
@@ -674,10 +725,10 @@ void diff_addremove(int addremove, unsigned mode,
674725
* entries to the diff-core. They will be prefixed
675726
* with something like '=' or '*' (I haven't decided
676727
* which but should not make any difference).
677-
* Feeding the same new and old to diff_change() should
678-
* also have the same effect. diff_flush() should
679-
* filter uninteresting ones out at the final output
680-
* stage.
728+
* Feeding the same new and old to diff_change()
729+
* also has the same effect. diffcore_prune() should
730+
* be used to filter uninteresting ones out before the
731+
* final output happens.
681732
*/
682733
if (reverse_diff)
683734
addremove = (addremove == '+' ? '-' :

diffcore-pathspec.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ void diffcore_pathspec(const char **pathspec)
5555
matches_pathspec(p->two->path, spec, speccnt))
5656
diff_q(&outq, p);
5757
else
58-
diff_free_filepair(p);
58+
free(p);
5959
}
6060
free(q->queue);
6161
*q = outq;

diffcore-pickaxe.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ void diffcore_pickaxe(const char *needle)
4848
contains(p->two, needle, len))
4949
diff_q(&outq, p);
5050
if (onum == outq.nr)
51-
diff_free_filepair(p);
51+
free(p);
5252
}
5353
free(q->queue);
5454
*q = outq;

0 commit comments

Comments
 (0)