Skip to content

Commit 272174d

Browse files
committed
merge-ort: add data structures for in-memory caching of rename detection
When there are many renames between the old base of a series of commits and the new base for a series of commits, the sequence of merges employed to transplant those commits (from a cherry-pick or rebase operation) will repeatedly detect the exact same renames. This is wasted effort. Add data structures which will be used to cache rename detection results, along with the initialization and deallocation of these data structures. Future commits will populate these caches, detect the appropriate circumstances when they can be used, and employ them to avoid re-detecting the same renames repeatedly. Signed-off-by: Elijah Newren <[email protected]>
1 parent d8e921a commit 272174d

File tree

1 file changed

+42
-0
lines changed

1 file changed

+42
-0
lines changed

merge-ort.c

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,37 @@ struct rename_info {
138138
int callback_data_nr, callback_data_alloc;
139139
char *callback_data_traverse_path;
140140

141+
/*
142+
* cached_pairs: Caching of renames and deletions.
143+
*
144+
* These are mappings recording renames and deletions of individual
145+
* files (not directories). They are thus a map from an old
146+
* filename to either NULL (for deletions) or a new filename (for
147+
* renames).
148+
*/
149+
struct strmap cached_pairs[3];
150+
151+
/*
152+
* cached_target_names: just the destinations from cached_pairs
153+
*
154+
* We sometimes want a fast lookup to determine if a given filename
155+
* is one of the destinations in cached_pairs. cached_target_names
156+
* is thus duplicative information, but it provides a fast lookup.
157+
*/
158+
struct strset cached_target_names[3];
159+
160+
/*
161+
* cached_irrelevant: Caching of rename_sources that aren't relevant.
162+
*
163+
* cached_pairs records both renames and deletes. Sometimes we
164+
* do not know if a path is a rename or a delete because we pass
165+
* RELEVANT_LOCATION to diffcore_rename_extended() and based on
166+
* various optimizations it returns without detecting whether that
167+
* path is actually a rename or a delete. We need to cache such
168+
* paths too, but separately from cached_pairs.
169+
*/
170+
struct strset cached_irrelevant[3];
171+
141172
/*
142173
* needed_limit: value needed for inexact rename detection to run
143174
*
@@ -370,6 +401,8 @@ static void clear_or_reinit_internal_opts(struct merge_options_internal *opti,
370401
reinitialize ? strmap_partial_clear : strmap_clear;
371402
void (*strintmap_func)(struct strintmap *) =
372403
reinitialize ? strintmap_partial_clear : strintmap_clear;
404+
void (*strset_func)(struct strset *) =
405+
reinitialize ? strset_partial_clear : strset_clear;
373406

374407
/*
375408
* We marked opti->paths with strdup_strings = 0, so that we
@@ -410,6 +443,9 @@ static void clear_or_reinit_internal_opts(struct merge_options_internal *opti,
410443
strmap_func(&renames->dir_renames[i], 0);
411444

412445
strintmap_func(&renames->relevant_sources[i]);
446+
strset_func(&renames->cached_target_names[i]);
447+
strmap_func(&renames->cached_pairs[i], 1);
448+
strset_func(&renames->cached_irrelevant[i]);
413449
}
414450

415451
if (!reinitialize) {
@@ -3498,6 +3534,12 @@ static void merge_start(struct merge_options *opt, struct merge_result *result)
34983534
NULL, 0);
34993535
strintmap_init_with_options(&renames->relevant_sources[i],
35003536
0, NULL, 0);
3537+
strmap_init_with_options(&renames->cached_pairs[i],
3538+
NULL, 1);
3539+
strset_init_with_options(&renames->cached_irrelevant[i],
3540+
NULL, 1);
3541+
strset_init_with_options(&renames->cached_target_names[i],
3542+
NULL, 0);
35013543
}
35023544

35033545
/*

0 commit comments

Comments
 (0)