Skip to content

Commit 69d8e51

Browse files
committed
diffcore-rename: avoid doing basename comparisons for irrelevant sources
The basename comparison optimization implemented in find_basename_matches() is very beneficial since it allows a source to sometimes only be compared with one other file instead of N other files. When a match is found, both a source and destination can be removed from the matrix of inexact rename comparisons. In contrast, the irrelevant source optimization only allows us to remove a source from the matrix of inexact rename comparisons...but it has the advantage of allowing a source file to not even be loaded into memory at all and be compared to 0 other files. Generally, not even comparing is a bigger performance win, so when both optimizations could apply, prefer to use the irrelevant-source optimization. For the testcases mentioned in commit 557ac03 ("merge-ort: begin performance work; instrument with trace2_region_* calls", 2020-10-28), this change improves the performance as follows: Before After no-renames: 5.607 s ± 0.068 s 5.671 s ± 0.052 s mega-renames: 102.714 s ± 0.233 s 13.653 s ± 0.104 s just-one-mega: 3.511 s ± 0.017 s 499.4 ms ± 3.6 ms Signed-off-by: Elijah Newren <[email protected]>
1 parent ae0560c commit 69d8e51

File tree

1 file changed

+31
-3
lines changed

1 file changed

+31
-3
lines changed

diffcore-rename.c

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -521,6 +521,7 @@ static void update_dir_rename_counts(struct dir_rename_info *info,
521521
}
522522

523523
static void initialize_dir_rename_info(struct dir_rename_info *info,
524+
struct strset *relevant_sources,
524525
struct strset *dirs_removed,
525526
struct strmap *dir_rename_count)
526527
{
@@ -529,7 +530,7 @@ static void initialize_dir_rename_info(struct dir_rename_info *info,
529530
int i;
530531

531532
info->setup = 0;
532-
if (!dirs_removed)
533+
if (!dirs_removed && !relevant_sources)
533534
return;
534535
info->setup = 1;
535536

@@ -542,7 +543,20 @@ static void initialize_dir_rename_info(struct dir_rename_info *info,
542543
strmap_init_with_options(&info->dir_rename_guess, NULL, 0);
543544

544545
/* Setup info->relevant_source_dirs */
545-
info->relevant_source_dirs = dirs_removed;
546+
info->relevant_source_dirs = NULL;
547+
if (dirs_removed || !relevant_sources) {
548+
info->relevant_source_dirs = dirs_removed; /* might be NULL */
549+
} else {
550+
info->relevant_source_dirs = xmalloc(sizeof(struct strintmap));
551+
strset_init(info->relevant_source_dirs);
552+
strset_for_each_entry(relevant_sources, &iter, entry) {
553+
char *dirname = get_dirname(entry->key);
554+
if (!dirs_removed ||
555+
strset_contains(dirs_removed, dirname))
556+
strset_add(info->relevant_source_dirs, dirname);
557+
free(dirname);
558+
}
559+
}
546560

547561
/*
548562
* Loop setting up both info->idx_map, and doing setup of
@@ -618,6 +632,13 @@ static void cleanup_dir_rename_info(struct dir_rename_info *info,
618632
/* dir_rename_guess */
619633
strmap_clear(&info->dir_rename_guess, 1);
620634

635+
/* relevant_source_dirs */
636+
if (info->relevant_source_dirs &&
637+
info->relevant_source_dirs != dirs_removed) {
638+
strset_clear(info->relevant_source_dirs);
639+
FREE_AND_NULL(info->relevant_source_dirs);
640+
}
641+
621642
if (!keep_dir_rename_count) {
622643
partial_clear_dir_rename_count(info->dir_rename_count);
623644
strmap_clear(info->dir_rename_count, 1);
@@ -714,6 +735,7 @@ static int find_basename_matches(struct diff_options *options,
714735
int minimum_score,
715736
int num_src,
716737
struct dir_rename_info *info,
738+
struct strset *relevant_sources,
717739
struct strset *dirs_removed)
718740
{
719741
/*
@@ -801,6 +823,11 @@ static int find_basename_matches(struct diff_options *options,
801823
intptr_t src_index;
802824
intptr_t dst_index;
803825

826+
/* Skip irrelevant sources */
827+
if (relevant_sources &&
828+
!strset_contains(relevant_sources, filename))
829+
continue;
830+
804831
/* Get the basename */
805832
base = strrchr(filename, '/');
806833
base = (base ? base+1 : filename);
@@ -1113,14 +1140,15 @@ void diffcore_rename_extended(struct diff_options *options,
11131140

11141141
/* Preparation for basename-driven matching. */
11151142
trace2_region_enter("diff", "dir rename setup", options->repo);
1116-
initialize_dir_rename_info(&info,
1143+
initialize_dir_rename_info(&info, relevant_sources,
11171144
dirs_removed, dir_rename_count);
11181145
trace2_region_leave("diff", "dir rename setup", options->repo);
11191146

11201147
/* Utilize file basenames to quickly find renames. */
11211148
trace2_region_enter("diff", "basename matches", options->repo);
11221149
rename_count += find_basename_matches(options, minimum_score,
11231150
rename_src_nr, &info,
1151+
relevant_sources,
11241152
dirs_removed);
11251153
trace2_region_leave("diff", "basename matches", options->repo);
11261154

0 commit comments

Comments
 (0)