Skip to content

Commit e4fd06e

Browse files
newrengitster
authored andcommitted
diffcore-rename: avoid doing basename comparisons for irrelevant sources
The basename comparison optimization implemented in find_basename_matches() is very beneficial since it allows a source to sometimes only be compared with one other file instead of N other files. When a match is found, both a source and destination can be removed from the matrix of inexact rename comparisons. In contrast, the irrelevant source optimization only allows us to remove a source from the matrix of inexact rename comparisons...but it has the advantage of allowing a source file to not even be loaded into memory at all and be compared to 0 other files. Generally, not even comparing is a bigger performance win, so when both optimizations could apply, prefer to use the irrelevant-source optimization. For the testcases mentioned in commit 557ac03 ("merge-ort: begin performance work; instrument with trace2_region_* calls", 2020-10-28), this change improves the performance as follows: Before After no-renames: 5.708 s ± 0.111 s 5.680 s ± 0.096 s mega-renames: 102.171 s ± 0.440 s 13.812 s ± 0.162 s just-one-mega: 3.471 s ± 0.015 s 506.0 ms ± 3.9 ms Signed-off-by: Elijah Newren <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent f89b4f2 commit e4fd06e

File tree

1 file changed

+33
-4
lines changed

1 file changed

+33
-4
lines changed

diffcore-rename.c

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -527,14 +527,15 @@ static void update_dir_rename_counts(struct dir_rename_info *info,
527527
}
528528

529529
static void initialize_dir_rename_info(struct dir_rename_info *info,
530+
struct strset *relevant_sources,
530531
struct strset *dirs_removed,
531532
struct strmap *dir_rename_count)
532533
{
533534
struct hashmap_iter iter;
534535
struct strmap_entry *entry;
535536
int i;
536537

537-
if (!dirs_removed) {
538+
if (!dirs_removed && !relevant_sources) {
538539
info->setup = 0;
539540
return;
540541
}
@@ -549,7 +550,20 @@ static void initialize_dir_rename_info(struct dir_rename_info *info,
549550
strmap_init_with_options(&info->dir_rename_guess, NULL, 0);
550551

551552
/* Setup info->relevant_source_dirs */
552-
info->relevant_source_dirs = dirs_removed;
553+
info->relevant_source_dirs = NULL;
554+
if (dirs_removed || !relevant_sources) {
555+
info->relevant_source_dirs = dirs_removed; /* might be NULL */
556+
} else {
557+
info->relevant_source_dirs = xmalloc(sizeof(struct strintmap));
558+
strset_init(info->relevant_source_dirs);
559+
strset_for_each_entry(relevant_sources, &iter, entry) {
560+
char *dirname = get_dirname(entry->key);
561+
if (!dirs_removed ||
562+
strset_contains(dirs_removed, dirname))
563+
strset_add(info->relevant_source_dirs, dirname);
564+
free(dirname);
565+
}
566+
}
553567

554568
/*
555569
* Loop setting up both info->idx_map, and doing setup of
@@ -627,6 +641,13 @@ static void cleanup_dir_rename_info(struct dir_rename_info *info,
627641
/* dir_rename_guess */
628642
strmap_clear(&info->dir_rename_guess, 1);
629643

644+
/* relevant_source_dirs */
645+
if (info->relevant_source_dirs &&
646+
info->relevant_source_dirs != dirs_removed) {
647+
strset_clear(info->relevant_source_dirs);
648+
FREE_AND_NULL(info->relevant_source_dirs);
649+
}
650+
630651
/* dir_rename_count */
631652
if (!keep_dir_rename_count) {
632653
partial_clear_dir_rename_count(info->dir_rename_count);
@@ -749,6 +770,7 @@ static int idx_possible_rename(char *filename, struct dir_rename_info *info)
749770
static int find_basename_matches(struct diff_options *options,
750771
int minimum_score,
751772
struct dir_rename_info *info,
773+
struct strset *relevant_sources,
752774
struct strset *dirs_removed)
753775
{
754776
/*
@@ -839,6 +861,11 @@ static int find_basename_matches(struct diff_options *options,
839861
intptr_t src_index;
840862
intptr_t dst_index;
841863

864+
/* Skip irrelevant sources */
865+
if (relevant_sources &&
866+
!strset_contains(relevant_sources, filename))
867+
continue;
868+
842869
/*
843870
* If the basename is unique among remaining sources, then
844871
* src_index will equal 'i' and we can attempt to match it
@@ -1164,15 +1191,17 @@ void diffcore_rename_extended(struct diff_options *options,
11641191

11651192
/* Preparation for basename-driven matching. */
11661193
trace2_region_enter("diff", "dir rename setup", options->repo);
1167-
initialize_dir_rename_info(&info,
1194+
initialize_dir_rename_info(&info, relevant_sources,
11681195
dirs_removed, dir_rename_count);
11691196
trace2_region_leave("diff", "dir rename setup", options->repo);
11701197

11711198
/* Utilize file basenames to quickly find renames. */
11721199
trace2_region_enter("diff", "basename matches", options->repo);
11731200
rename_count += find_basename_matches(options,
11741201
min_basename_score,
1175-
&info, dirs_removed);
1202+
&info,
1203+
relevant_sources,
1204+
dirs_removed);
11761205
trace2_region_leave("diff", "basename matches", options->repo);
11771206

11781207
/*

0 commit comments

Comments
 (0)