Skip to content

Commit 1b31224

Browse files
committed
Merge branch 'en/ort-perf-batch-9'
The ort merge backend has been optimized by skipping irrelevant renames. * en/ort-perf-batch-9: diffcore-rename: avoid doing basename comparisons for irrelevant sources merge-ort: skip rename detection entirely if possible merge-ort: use relevant_sources to filter possible rename sources merge-ort: precompute whether directory rename detection is needed merge-ort: introduce wrappers for alternate tree traversal merge-ort: add data structures for an alternate tree traversal merge-ort: precompute subset of sources for which we need rename detection diffcore-rename: enable filtering possible rename sources
2 parents 82fd285 + e4fd06e commit 1b31224

File tree

4 files changed

+354
-15
lines changed

4 files changed

+354
-15
lines changed

diffcore-rename.c

Lines changed: 52 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -527,14 +527,15 @@ static void update_dir_rename_counts(struct dir_rename_info *info,
527527
}
528528

529529
static void initialize_dir_rename_info(struct dir_rename_info *info,
530+
struct strset *relevant_sources,
530531
struct strset *dirs_removed,
531532
struct strmap *dir_rename_count)
532533
{
533534
struct hashmap_iter iter;
534535
struct strmap_entry *entry;
535536
int i;
536537

537-
if (!dirs_removed) {
538+
if (!dirs_removed && !relevant_sources) {
538539
info->setup = 0;
539540
return;
540541
}
@@ -549,7 +550,20 @@ static void initialize_dir_rename_info(struct dir_rename_info *info,
549550
strmap_init_with_options(&info->dir_rename_guess, NULL, 0);
550551

551552
/* Setup info->relevant_source_dirs */
552-
info->relevant_source_dirs = dirs_removed;
553+
info->relevant_source_dirs = NULL;
554+
if (dirs_removed || !relevant_sources) {
555+
info->relevant_source_dirs = dirs_removed; /* might be NULL */
556+
} else {
557+
info->relevant_source_dirs = xmalloc(sizeof(struct strintmap));
558+
strset_init(info->relevant_source_dirs);
559+
strset_for_each_entry(relevant_sources, &iter, entry) {
560+
char *dirname = get_dirname(entry->key);
561+
if (!dirs_removed ||
562+
strset_contains(dirs_removed, dirname))
563+
strset_add(info->relevant_source_dirs, dirname);
564+
free(dirname);
565+
}
566+
}
553567

554568
/*
555569
* Loop setting up both info->idx_map, and doing setup of
@@ -627,6 +641,13 @@ static void cleanup_dir_rename_info(struct dir_rename_info *info,
627641
/* dir_rename_guess */
628642
strmap_clear(&info->dir_rename_guess, 1);
629643

644+
/* relevant_source_dirs */
645+
if (info->relevant_source_dirs &&
646+
info->relevant_source_dirs != dirs_removed) {
647+
strset_clear(info->relevant_source_dirs);
648+
FREE_AND_NULL(info->relevant_source_dirs);
649+
}
650+
630651
/* dir_rename_count */
631652
if (!keep_dir_rename_count) {
632653
partial_clear_dir_rename_count(info->dir_rename_count);
@@ -749,6 +770,7 @@ static int idx_possible_rename(char *filename, struct dir_rename_info *info)
749770
static int find_basename_matches(struct diff_options *options,
750771
int minimum_score,
751772
struct dir_rename_info *info,
773+
struct strset *relevant_sources,
752774
struct strset *dirs_removed)
753775
{
754776
/*
@@ -839,6 +861,11 @@ static int find_basename_matches(struct diff_options *options,
839861
intptr_t src_index;
840862
intptr_t dst_index;
841863

864+
/* Skip irrelevant sources */
865+
if (relevant_sources &&
866+
!strset_contains(relevant_sources, filename))
867+
continue;
868+
842869
/*
843870
* If the basename is unique among remaining sources, then
844871
* src_index will equal 'i' and we can attempt to match it
@@ -991,11 +1018,12 @@ static int find_renames(struct diff_score *mx,
9911018
return count;
9921019
}
9931020

994-
static void remove_unneeded_paths_from_src(int detecting_copies)
1021+
static void remove_unneeded_paths_from_src(int detecting_copies,
1022+
struct strset *interesting)
9951023
{
9961024
int i, new_num_src;
9971025

998-
if (detecting_copies)
1026+
if (detecting_copies && !interesting)
9991027
return; /* nothing to remove */
10001028
if (break_idx)
10011029
return; /* culling incompatible with break detection */
@@ -1022,12 +1050,18 @@ static void remove_unneeded_paths_from_src(int detecting_copies)
10221050
* from rename_src here.
10231051
*/
10241052
for (i = 0, new_num_src = 0; i < rename_src_nr; i++) {
1053+
struct diff_filespec *one = rename_src[i].p->one;
1054+
10251055
/*
10261056
* renames are stored in rename_dst, so if a rename has
10271057
* already been detected using this source, we can just
10281058
* remove the source knowing rename_dst has its info.
10291059
*/
1030-
if (rename_src[i].p->one->rename_used)
1060+
if (!detecting_copies && one->rename_used)
1061+
continue;
1062+
1063+
/* If we don't care about the source path, skip it */
1064+
if (interesting && !strset_contains(interesting, one->path))
10311065
continue;
10321066

10331067
if (new_num_src < i)
@@ -1040,6 +1074,7 @@ static void remove_unneeded_paths_from_src(int detecting_copies)
10401074
}
10411075

10421076
void diffcore_rename_extended(struct diff_options *options,
1077+
struct strset *relevant_sources,
10431078
struct strset *dirs_removed,
10441079
struct strmap *dir_rename_count)
10451080
{
@@ -1060,6 +1095,8 @@ void diffcore_rename_extended(struct diff_options *options,
10601095
want_copies = (detect_rename == DIFF_DETECT_COPY);
10611096
if (dirs_removed && (break_idx || want_copies))
10621097
BUG("dirs_removed incompatible with break/copy detection");
1098+
if (break_idx && relevant_sources)
1099+
BUG("break detection incompatible with source specification");
10631100
if (!minimum_score)
10641101
minimum_score = DEFAULT_RENAME_SCORE;
10651102

@@ -1127,9 +1164,10 @@ void diffcore_rename_extended(struct diff_options *options,
11271164
/*
11281165
* Cull sources:
11291166
* - remove ones corresponding to exact renames
1167+
* - remove ones not found in relevant_sources
11301168
*/
11311169
trace2_region_enter("diff", "cull after exact", options->repo);
1132-
remove_unneeded_paths_from_src(want_copies);
1170+
remove_unneeded_paths_from_src(want_copies, relevant_sources);
11331171
trace2_region_leave("diff", "cull after exact", options->repo);
11341172
} else {
11351173
/* Determine minimum score to match basenames */
@@ -1148,28 +1186,31 @@ void diffcore_rename_extended(struct diff_options *options,
11481186
* - remove ones involved in renames (found via exact match)
11491187
*/
11501188
trace2_region_enter("diff", "cull after exact", options->repo);
1151-
remove_unneeded_paths_from_src(want_copies);
1189+
remove_unneeded_paths_from_src(want_copies, NULL);
11521190
trace2_region_leave("diff", "cull after exact", options->repo);
11531191

11541192
/* Preparation for basename-driven matching. */
11551193
trace2_region_enter("diff", "dir rename setup", options->repo);
1156-
initialize_dir_rename_info(&info,
1194+
initialize_dir_rename_info(&info, relevant_sources,
11571195
dirs_removed, dir_rename_count);
11581196
trace2_region_leave("diff", "dir rename setup", options->repo);
11591197

11601198
/* Utilize file basenames to quickly find renames. */
11611199
trace2_region_enter("diff", "basename matches", options->repo);
11621200
rename_count += find_basename_matches(options,
11631201
min_basename_score,
1164-
&info, dirs_removed);
1202+
&info,
1203+
relevant_sources,
1204+
dirs_removed);
11651205
trace2_region_leave("diff", "basename matches", options->repo);
11661206

11671207
/*
11681208
* Cull sources, again:
11691209
* - remove ones involved in renames (found via basenames)
1210+
* - remove ones not found in relevant_sources
11701211
*/
11711212
trace2_region_enter("diff", "cull basename", options->repo);
1172-
remove_unneeded_paths_from_src(want_copies);
1213+
remove_unneeded_paths_from_src(want_copies, relevant_sources);
11731214
trace2_region_leave("diff", "cull basename", options->repo);
11741215
}
11751216

@@ -1341,5 +1382,5 @@ void diffcore_rename_extended(struct diff_options *options,
13411382

13421383
void diffcore_rename(struct diff_options *options)
13431384
{
1344-
diffcore_rename_extended(options, NULL, NULL);
1385+
diffcore_rename_extended(options, NULL, NULL, NULL);
13451386
}

diffcore.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ void partial_clear_dir_rename_count(struct strmap *dir_rename_count);
166166
void diffcore_break(struct repository *, int);
167167
void diffcore_rename(struct diff_options *);
168168
void diffcore_rename_extended(struct diff_options *options,
169+
struct strset *relevant_sources,
169170
struct strset *dirs_removed,
170171
struct strmap *dir_rename_count);
171172
void diffcore_merge_broken(void);

0 commit comments

Comments
 (0)