Skip to content

Commit a6e7c2e

Browse files
committed
diffcore-rename: avoid doing basename comparisons for irrelevant sources
The basename comparison optimization implemented in find_basename_matches() is very beneficial since it allows a source to sometimes only be compared with one other file instead of N other files. When a match is found, both a source and destination can be removed from the matrix of inexact rename comparisons. In contrast, the irrelevant source optimization only allows us to remove a source from the matrix of inexact rename comparisons...but it has the advantage of allowing a source file to not even be loaded into memory at all and be compared to 0 other files. Generally, not even comparing is a bigger performance win, so when both optimizations could apply, prefer to use the irrelevant-source optimization. For the testcases mentioned in commit 557ac03 ("merge-ort: begin performance work; instrument with trace2_region_* calls", 2020-10-28), this change improves the performance as follows: Before After no-renames: 5.708 s ± 0.111 s 5.680 s ± 0.096 s mega-renames: 102.171 s ± 0.440 s 13.812 s ± 0.162 s just-one-mega: 3.471 s ± 0.015 s 506.0 ms ± 3.9 ms Signed-off-by: Elijah Newren <[email protected]>
1 parent d65292d commit a6e7c2e

File tree

1 file changed

+31
-3
lines changed

1 file changed

+31
-3
lines changed

diffcore-rename.c

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -521,6 +521,7 @@ static void update_dir_rename_counts(struct dir_rename_info *info,
521521
}
522522

523523
static void initialize_dir_rename_info(struct dir_rename_info *info,
524+
struct strset *relevant_sources,
524525
struct strset *dirs_removed,
525526
struct strmap *dir_rename_count)
526527
{
@@ -529,7 +530,7 @@ static void initialize_dir_rename_info(struct dir_rename_info *info,
529530
int i;
530531

531532
info->setup = 0;
532-
if (!dirs_removed)
533+
if (!dirs_removed && !relevant_sources)
533534
return;
534535
info->setup = 1;
535536

@@ -542,7 +543,20 @@ static void initialize_dir_rename_info(struct dir_rename_info *info,
542543
strmap_init_with_options(&info->dir_rename_guess, NULL, 0);
543544

544545
/* Setup info->relevant_source_dirs */
545-
info->relevant_source_dirs = dirs_removed;
546+
info->relevant_source_dirs = NULL;
547+
if (dirs_removed || !relevant_sources) {
548+
info->relevant_source_dirs = dirs_removed; /* might be NULL */
549+
} else {
550+
info->relevant_source_dirs = xmalloc(sizeof(struct strintmap));
551+
strset_init(info->relevant_source_dirs);
552+
strset_for_each_entry(relevant_sources, &iter, entry) {
553+
char *dirname = get_dirname(entry->key);
554+
if (!dirs_removed ||
555+
strset_contains(dirs_removed, dirname))
556+
strset_add(info->relevant_source_dirs, dirname);
557+
free(dirname);
558+
}
559+
}
546560

547561
/*
548562
* Loop setting up both info->idx_map, and doing setup of
@@ -618,6 +632,13 @@ static void cleanup_dir_rename_info(struct dir_rename_info *info,
618632
/* dir_rename_guess */
619633
strmap_clear(&info->dir_rename_guess, 1);
620634

635+
/* relevant_source_dirs */
636+
if (info->relevant_source_dirs &&
637+
info->relevant_source_dirs != dirs_removed) {
638+
strset_clear(info->relevant_source_dirs);
639+
FREE_AND_NULL(info->relevant_source_dirs);
640+
}
641+
621642
if (!keep_dir_rename_count) {
622643
partial_clear_dir_rename_count(info->dir_rename_count);
623644
strmap_clear(info->dir_rename_count, 1);
@@ -726,6 +747,7 @@ static int find_basename_matches(struct diff_options *options,
726747
int minimum_score,
727748
int num_src,
728749
struct dir_rename_info *info,
750+
struct strset *relevant_sources,
729751
struct strset *dirs_removed)
730752
{
731753
/*
@@ -813,6 +835,11 @@ static int find_basename_matches(struct diff_options *options,
813835
intptr_t src_index;
814836
intptr_t dst_index;
815837

838+
/* Skip irrelevant sources */
839+
if (relevant_sources &&
840+
!strset_contains(relevant_sources, filename))
841+
continue;
842+
816843
/* Find out if this basename is unique among sources */
817844
base = get_basename(filename);
818845
src_index = strintmap_get(&sources, base);
@@ -1133,7 +1160,7 @@ void diffcore_rename_extended(struct diff_options *options,
11331160

11341161
/* Preparation for basename-driven matching. */
11351162
trace2_region_enter("diff", "dir rename setup", options->repo);
1136-
initialize_dir_rename_info(&info,
1163+
initialize_dir_rename_info(&info, relevant_sources,
11371164
dirs_removed, dir_rename_count);
11381165
trace2_region_leave("diff", "dir rename setup", options->repo);
11391166

@@ -1142,6 +1169,7 @@ void diffcore_rename_extended(struct diff_options *options,
11421169
rename_count += find_basename_matches(options,
11431170
min_basename_score,
11441171
rename_src_nr, &info,
1172+
relevant_sources,
11451173
dirs_removed);
11461174
trace2_region_leave("diff", "basename matches", options->repo);
11471175

0 commit comments

Comments
 (0)