Skip to content

Commit 1ad69eb

Browse files
newrengitster
authored andcommitted
diffcore-rename: compute dir_rename_counts in stages
Compute dir_rename_counts based just on exact renames to start, as that can provide us useful information in find_basename_matches(). This is done by moving the code from compute_dir_rename_counts() into initialize_dir_rename_info(), resulting in it being computed earlier and based just on exact renames. Since that's an incomplete result, we augment the counts via calling update_dir_rename_counts() after each basename-guide and inexact rename detection match is found. Reviewed-by: Derrick Stolee <[email protected]> Signed-off-by: Elijah Newren <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent b147301 commit 1ad69eb

File tree

1 file changed

+70
-40
lines changed

1 file changed

+70
-40
lines changed

diffcore-rename.c

Lines changed: 70 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,28 @@ static void update_dir_rename_counts(struct dir_rename_info *info,
419419
char new_dir_first_char = new_dir[0];
420420
int first_time_in_loop = 1;
421421

422+
if (!info->setup)
423+
/*
424+
* info->setup is 0 here in two cases: (1) all auxiliary
425+
* vars (like dirs_removed) were NULL so
426+
* initialize_dir_rename_info() returned early, or (2)
427+
* either break detection or copy detection are active so
428+
* that we never called initialize_dir_rename_info(). In
429+
* the former case, we don't have enough info to know if
430+
* directories were renamed (because dirs_removed lets us
431+
* know about a necessary prerequisite, namely if they were
432+
* removed), and in the latter, we don't care about
433+
* directory renames or find_basename_matches.
434+
*
435+
* This matters because both basename and inexact matching
436+
* will also call update_dir_rename_counts(). In either of
437+
* the above two cases info->dir_rename_counts will not
438+
* have been properly initialized which prevents us from
439+
* updating it, but in these two cases we don't care about
440+
* dir_rename_counts anyway, so we can just exit early.
441+
*/
442+
return;
443+
422444
while (1) {
423445
dirname_munge(old_dir);
424446
dirname_munge(new_dir);
@@ -479,45 +501,29 @@ static void update_dir_rename_counts(struct dir_rename_info *info,
479501
free(new_dir);
480502
}
481503

482-
static void compute_dir_rename_counts(struct dir_rename_info *info,
483-
struct strset *dirs_removed,
484-
struct strmap *dir_rename_count)
504+
static void initialize_dir_rename_info(struct dir_rename_info *info,
505+
struct strset *dirs_removed,
506+
struct strmap *dir_rename_count)
485507
{
486508
int i;
487509

488-
info->setup = 1;
489-
info->dir_rename_count = dir_rename_count;
490-
491-
for (i = 0; i < rename_dst_nr; ++i) {
492-
/* File not part of directory rename counts if not a rename */
493-
if (!rename_dst[i].is_rename)
494-
continue;
495-
496-
/*
497-
* Make dir_rename_count contain a map of a map:
498-
* old_directory -> {new_directory -> count}
499-
* In other words, for every pair look at the directories for
500-
* the old filename and the new filename and count how many
501-
* times that pairing occurs.
502-
*/
503-
update_dir_rename_counts(info, dirs_removed,
504-
rename_dst[i].p->one->path,
505-
rename_dst[i].p->two->path);
510+
if (!dirs_removed) {
511+
info->setup = 0;
512+
return;
506513
}
507-
}
508-
509-
static void initialize_dir_rename_info(struct dir_rename_info *info)
510-
{
511-
int i;
512-
513514
info->setup = 1;
514515

516+
info->dir_rename_count = dir_rename_count;
517+
if (!info->dir_rename_count) {
518+
info->dir_rename_count = xmalloc(sizeof(*dir_rename_count));
519+
strmap_init(info->dir_rename_count);
520+
}
515521
strintmap_init_with_options(&info->idx_map, -1, NULL, 0);
516522
strmap_init_with_options(&info->dir_rename_guess, NULL, 0);
517-
info->dir_rename_count = NULL;
518523

519524
/*
520-
* Loop setting up both info->idx_map.
525+
* Loop setting up both info->idx_map, and doing setup of
526+
* info->dir_rename_count.
521527
*/
522528
for (i = 0; i < rename_dst_nr; ++i) {
523529
/*
@@ -527,7 +533,20 @@ static void initialize_dir_rename_info(struct dir_rename_info *info)
527533
if (!rename_dst[i].is_rename) {
528534
char *filename = rename_dst[i].p->two->path;
529535
strintmap_set(&info->idx_map, filename, i);
536+
continue;
530537
}
538+
539+
/*
540+
* For everything else (i.e. renamed files), make
541+
* dir_rename_count contain a map of a map:
542+
* old_directory -> {new_directory -> count}
543+
* In other words, for every pair look at the directories for
544+
* the old filename and the new filename and count how many
545+
* times that pairing occurs.
546+
*/
547+
update_dir_rename_counts(info, dirs_removed,
548+
rename_dst[i].p->one->path,
549+
rename_dst[i].p->two->path);
531550
}
532551
}
533552

@@ -682,7 +701,8 @@ static int idx_possible_rename(char *filename, struct dir_rename_info *info)
682701

683702
static int find_basename_matches(struct diff_options *options,
684703
int minimum_score,
685-
struct dir_rename_info *info)
704+
struct dir_rename_info *info,
705+
struct strset *dirs_removed)
686706
{
687707
/*
688708
* When I checked in early 2020, over 76% of file renames in linux
@@ -810,6 +830,8 @@ static int find_basename_matches(struct diff_options *options,
810830
continue;
811831
record_rename_pair(dst_index, src_index, score);
812832
renames++;
833+
update_dir_rename_counts(info, dirs_removed,
834+
one->path, two->path);
813835

814836
/*
815837
* Found a rename so don't need text anymore; if we
@@ -893,7 +915,12 @@ static int too_many_rename_candidates(int num_destinations, int num_sources,
893915
return 1;
894916
}
895917

896-
static int find_renames(struct diff_score *mx, int dst_cnt, int minimum_score, int copies)
918+
static int find_renames(struct diff_score *mx,
919+
int dst_cnt,
920+
int minimum_score,
921+
int copies,
922+
struct dir_rename_info *info,
923+
struct strset *dirs_removed)
897924
{
898925
int count = 0, i;
899926

@@ -910,6 +937,9 @@ static int find_renames(struct diff_score *mx, int dst_cnt, int minimum_score, i
910937
continue;
911938
record_rename_pair(mx[i].dst, mx[i].src, mx[i].score);
912939
count++;
940+
update_dir_rename_counts(info, dirs_removed,
941+
rename_src[mx[i].src].p->one->path,
942+
rename_dst[mx[i].dst].p->two->path);
913943
}
914944
return count;
915945
}
@@ -981,6 +1011,8 @@ void diffcore_rename_extended(struct diff_options *options,
9811011
info.setup = 0;
9821012
assert(!dir_rename_count || strmap_empty(dir_rename_count));
9831013
want_copies = (detect_rename == DIFF_DETECT_COPY);
1014+
if (dirs_removed && (break_idx || want_copies))
1015+
BUG("dirs_removed incompatible with break/copy detection");
9841016
if (!minimum_score)
9851017
minimum_score = DEFAULT_RENAME_SCORE;
9861018

@@ -1074,14 +1106,15 @@ void diffcore_rename_extended(struct diff_options *options,
10741106

10751107
/* Preparation for basename-driven matching. */
10761108
trace2_region_enter("diff", "dir rename setup", options->repo);
1077-
initialize_dir_rename_info(&info);
1109+
initialize_dir_rename_info(&info,
1110+
dirs_removed, dir_rename_count);
10781111
trace2_region_leave("diff", "dir rename setup", options->repo);
10791112

10801113
/* Utilize file basenames to quickly find renames. */
10811114
trace2_region_enter("diff", "basename matches", options->repo);
10821115
rename_count += find_basename_matches(options,
10831116
min_basename_score,
1084-
&info);
1117+
&info, dirs_removed);
10851118
trace2_region_leave("diff", "basename matches", options->repo);
10861119

10871120
/*
@@ -1167,18 +1200,15 @@ void diffcore_rename_extended(struct diff_options *options,
11671200
/* cost matrix sorted by most to least similar pair */
11681201
STABLE_QSORT(mx, dst_cnt * NUM_CANDIDATE_PER_DST, score_compare);
11691202

1170-
rename_count += find_renames(mx, dst_cnt, minimum_score, 0);
1203+
rename_count += find_renames(mx, dst_cnt, minimum_score, 0,
1204+
&info, dirs_removed);
11711205
if (want_copies)
1172-
rename_count += find_renames(mx, dst_cnt, minimum_score, 1);
1206+
rename_count += find_renames(mx, dst_cnt, minimum_score, 1,
1207+
&info, dirs_removed);
11731208
free(mx);
11741209
trace2_region_leave("diff", "inexact renames", options->repo);
11751210

11761211
cleanup:
1177-
/*
1178-
* Now that renames have been computed, compute dir_rename_count */
1179-
if (dirs_removed && dir_rename_count)
1180-
compute_dir_rename_counts(&info, dirs_removed, dir_rename_count);
1181-
11821212
/* At this point, we have found some renames and copies and they
11831213
* are recorded in rename_dst. The original list is still in *q.
11841214
*/

0 commit comments

Comments
 (0)