Skip to content

Commit 0d3c472

Browse files
committed
diffcore-rename: avoid doing basename comparisons for irrelevant sources
The basename comparison optimization implemented in find_basename_matches() is very beneficial since it allows a source to sometimes only be compared with one other file instead of N other files. When a match is found, both a source and destination can be removed from the matrix of inexact rename comparisons. In contrast, the irrelevant source optimization only allows us to remove a source from the matrix of inexact rename comparisons...but it has the advantage of allowing a source file to not even be loaded into memory at all and be compared to 0 other files. Generally, not even comparing is a bigger performance win, so when both optimizations could apply, prefer to use the irrelevant-source optimization. For the testcases mentioned in commit 557ac03 ("merge-ort: begin performance work; instrument with trace2_region_* calls", 2020-10-28), this change improves the performance as follows: Before After no-renames: 5.708 s ± 0.111 s 5.680 s ± 0.096 s mega-renames: 102.171 s ± 0.440 s 13.812 s ± 0.162 s just-one-mega: 3.471 s ± 0.015 s 506.0 ms ± 3.9 ms Signed-off-by: Elijah Newren <[email protected]>
1 parent 594ea90 commit 0d3c472

File tree

1 file changed

+31
-3
lines changed

1 file changed

+31
-3
lines changed

diffcore-rename.c

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,7 @@ static void update_dir_rename_counts(struct dir_rename_info *info,
527527
}
528528

529529
static void initialize_dir_rename_info(struct dir_rename_info *info,
530+
struct strset *relevant_sources,
530531
struct strset *dirs_removed,
531532
struct strmap *dir_rename_count)
532533
{
@@ -535,7 +536,7 @@ static void initialize_dir_rename_info(struct dir_rename_info *info,
535536
int i;
536537

537538
info->setup = 0;
538-
if (!dirs_removed)
539+
if (!dirs_removed && !relevant_sources)
539540
return;
540541
info->setup = 1;
541542

@@ -548,7 +549,20 @@ static void initialize_dir_rename_info(struct dir_rename_info *info,
548549
strmap_init_with_options(&info->dir_rename_guess, NULL, 0);
549550

550551
/* Setup info->relevant_source_dirs */
551-
info->relevant_source_dirs = dirs_removed;
552+
info->relevant_source_dirs = NULL;
553+
if (dirs_removed || !relevant_sources) {
554+
info->relevant_source_dirs = dirs_removed; /* might be NULL */
555+
} else {
556+
info->relevant_source_dirs = xmalloc(sizeof(struct strintmap));
557+
strset_init(info->relevant_source_dirs);
558+
strset_for_each_entry(relevant_sources, &iter, entry) {
559+
char *dirname = get_dirname(entry->key);
560+
if (!dirs_removed ||
561+
strset_contains(dirs_removed, dirname))
562+
strset_add(info->relevant_source_dirs, dirname);
563+
free(dirname);
564+
}
565+
}
552566

553567
/*
554568
* Loop setting up both info->idx_map, and doing setup of
@@ -624,6 +638,13 @@ static void cleanup_dir_rename_info(struct dir_rename_info *info,
624638
/* dir_rename_guess */
625639
strmap_clear(&info->dir_rename_guess, 1);
626640

641+
/* relevant_source_dirs */
642+
if (info->relevant_source_dirs &&
643+
info->relevant_source_dirs != dirs_removed) {
644+
strset_clear(info->relevant_source_dirs);
645+
FREE_AND_NULL(info->relevant_source_dirs);
646+
}
647+
627648
if (!keep_dir_rename_count) {
628649
partial_clear_dir_rename_count(info->dir_rename_count);
629650
strmap_clear(info->dir_rename_count, 1);
@@ -744,6 +765,7 @@ static int find_basename_matches(struct diff_options *options,
744765
int minimum_score,
745766
int num_src,
746767
struct dir_rename_info *info,
768+
struct strset *relevant_sources,
747769
struct strset *dirs_removed)
748770
{
749771
/*
@@ -831,6 +853,11 @@ static int find_basename_matches(struct diff_options *options,
831853
intptr_t src_index;
832854
intptr_t dst_index;
833855

856+
/* Skip irrelevant sources */
857+
if (relevant_sources &&
858+
!strset_contains(relevant_sources, filename))
859+
continue;
860+
834861
/* Find out if this basename is unique among sources */
835862
base = get_basename(filename);
836863
src_index = strintmap_get(&sources, base);
@@ -1151,7 +1178,7 @@ void diffcore_rename_extended(struct diff_options *options,
11511178

11521179
/* Preparation for basename-driven matching. */
11531180
trace2_region_enter("diff", "dir rename setup", options->repo);
1154-
initialize_dir_rename_info(&info,
1181+
initialize_dir_rename_info(&info, relevant_sources,
11551182
dirs_removed, dir_rename_count);
11561183
trace2_region_leave("diff", "dir rename setup", options->repo);
11571184

@@ -1160,6 +1187,7 @@ void diffcore_rename_extended(struct diff_options *options,
11601187
rename_count += find_basename_matches(options,
11611188
min_basename_score,
11621189
rename_src_nr, &info,
1190+
relevant_sources,
11631191
dirs_removed);
11641192
trace2_region_leave("diff", "basename matches", options->repo);
11651193

0 commit comments

Comments
 (0)