Skip to content

Commit 488ea58

Browse files
committed
Merge branch 'en/ort-perf-batch-13' into seen
Performance tweaks of "git merge -sort" around lazy fetching of objects. * en/ort-perf-batch-13: merge-ort: add prefetching for content merges diffcore-rename: use a different prefetch for basename comparisons diffcore-rename: allow different missing_object_cb functions t6421: add tests checking for excessive object downloads during merge promisor-remote: output trace2 statistics for number of objects fetched
2 parents 1f47af5 + de2d003 commit 488ea58

File tree

4 files changed

+608
-32
lines changed

4 files changed

+608
-32
lines changed

diffcore-rename.c

Lines changed: 117 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -87,13 +87,13 @@ struct diff_score {
8787
short name_score;
8888
};
8989

90-
struct prefetch_options {
90+
struct inexact_prefetch_options {
9191
struct repository *repo;
9292
int skip_unmodified;
9393
};
94-
static void prefetch(void *prefetch_options)
94+
static void inexact_prefetch(void *prefetch_options)
9595
{
96-
struct prefetch_options *options = prefetch_options;
96+
struct inexact_prefetch_options *options = prefetch_options;
9797
int i;
9898
struct oid_array to_fetch = OID_ARRAY_INIT;
9999

@@ -126,7 +126,7 @@ static int estimate_similarity(struct repository *r,
126126
struct diff_filespec *src,
127127
struct diff_filespec *dst,
128128
int minimum_score,
129-
int skip_unmodified)
129+
struct diff_populate_filespec_options *dpf_opt)
130130
{
131131
/* src points at a file that existed in the original tree (or
132132
* optionally a file in the destination tree) and dst points
@@ -143,15 +143,6 @@ static int estimate_similarity(struct repository *r,
143143
*/
144144
unsigned long max_size, delta_size, base_size, src_copied, literal_added;
145145
int score;
146-
struct diff_populate_filespec_options dpf_options = {
147-
.check_size_only = 1
148-
};
149-
struct prefetch_options prefetch_options = {r, skip_unmodified};
150-
151-
if (r == the_repository && has_promisor_remote()) {
152-
dpf_options.missing_object_cb = prefetch;
153-
dpf_options.missing_object_data = &prefetch_options;
154-
}
155146

156147
/* We deal only with regular files. Symlink renames are handled
157148
* only when they are exact matches --- in other words, no edits
@@ -169,11 +160,13 @@ static int estimate_similarity(struct repository *r,
169160
* is a possible size - we really should have a flag to
170161
* say whether the size is valid or not!)
171162
*/
163+
dpf_opt->check_size_only = 1;
164+
172165
if (!src->cnt_data &&
173-
diff_populate_filespec(r, src, &dpf_options))
166+
diff_populate_filespec(r, src, dpf_opt))
174167
return 0;
175168
if (!dst->cnt_data &&
176-
diff_populate_filespec(r, dst, &dpf_options))
169+
diff_populate_filespec(r, dst, dpf_opt))
177170
return 0;
178171

179172
max_size = ((src->size > dst->size) ? src->size : dst->size);
@@ -191,11 +184,11 @@ static int estimate_similarity(struct repository *r,
191184
if (max_size * (MAX_SCORE-minimum_score) < delta_size * MAX_SCORE)
192185
return 0;
193186

194-
dpf_options.check_size_only = 0;
187+
dpf_opt->check_size_only = 0;
195188

196-
if (!src->cnt_data && diff_populate_filespec(r, src, &dpf_options))
189+
if (!src->cnt_data && diff_populate_filespec(r, src, dpf_opt))
197190
return 0;
198-
if (!dst->cnt_data && diff_populate_filespec(r, dst, &dpf_options))
191+
if (!dst->cnt_data && diff_populate_filespec(r, dst, dpf_opt))
199192
return 0;
200193

201194
if (diffcore_count_changes(r, src, dst,
@@ -823,6 +816,78 @@ static int idx_possible_rename(char *filename, struct dir_rename_info *info)
823816
return idx;
824817
}
825818

819+
struct basename_prefetch_options {
820+
struct repository *repo;
821+
struct strintmap *relevant_sources;
822+
struct strintmap *sources;
823+
struct strintmap *dests;
824+
struct dir_rename_info *info;
825+
};
826+
static void basename_prefetch(void *prefetch_options)
827+
{
828+
struct basename_prefetch_options *options = prefetch_options;
829+
struct strintmap *relevant_sources = options->relevant_sources;
830+
struct strintmap *sources = options->sources;
831+
struct strintmap *dests = options->dests;
832+
struct dir_rename_info *info = options->info;
833+
int i;
834+
struct oid_array to_fetch = OID_ARRAY_INIT;
835+
836+
/*
837+
* TODO: The following loops mirror the code/logic from
838+
* find_basename_matches(), though not quite exactly. Maybe
839+
* abstract the iteration logic out somehow?
840+
*/
841+
for (i = 0; i < rename_src_nr; ++i) {
842+
char *filename = rename_src[i].p->one->path;
843+
const char *base = NULL;
844+
intptr_t src_index;
845+
intptr_t dst_index;
846+
847+
/* Skip irrelevant sources */
848+
if (relevant_sources &&
849+
!strintmap_contains(relevant_sources, filename))
850+
continue;
851+
852+
/*
853+
* If the basename is unique among remaining sources, then
854+
* src_index will equal 'i' and we can attempt to match it
855+
* to a unique basename in the destinations. Otherwise,
856+
* use directory rename heuristics, if possible.
857+
*/
858+
base = get_basename(filename);
859+
src_index = strintmap_get(sources, base);
860+
assert(src_index == -1 || src_index == i);
861+
862+
if (strintmap_contains(dests, base)) {
863+
struct diff_filespec *one, *two;
864+
865+
/* Find a matching destination, if possible */
866+
dst_index = strintmap_get(dests, base);
867+
if (src_index == -1 || dst_index == -1) {
868+
src_index = i;
869+
dst_index = idx_possible_rename(filename, info);
870+
}
871+
if (dst_index == -1)
872+
continue;
873+
874+
/* Ignore this dest if already used in a rename */
875+
if (rename_dst[dst_index].is_rename)
876+
continue; /* already used previously */
877+
878+
one = rename_src[src_index].p->one;
879+
two = rename_dst[dst_index].p->two;
880+
881+
/* Add the pairs */
882+
diff_add_if_missing(options->repo, &to_fetch, two);
883+
diff_add_if_missing(options->repo, &to_fetch, one);
884+
}
885+
}
886+
887+
promisor_remote_get_direct(options->repo, to_fetch.oid, to_fetch.nr);
888+
oid_array_clear(&to_fetch);
889+
}
890+
826891
static int find_basename_matches(struct diff_options *options,
827892
int minimum_score,
828893
struct dir_rename_info *info,
@@ -862,18 +927,18 @@ static int find_basename_matches(struct diff_options *options,
862927
int i, renames = 0;
863928
struct strintmap sources;
864929
struct strintmap dests;
865-
866-
/*
867-
* The prefeteching stuff wants to know if it can skip prefetching
868-
* blobs that are unmodified...and will then do a little extra work
869-
* to verify that the oids are indeed different before prefetching.
870-
* Unmodified blobs are only relevant when doing copy detection;
871-
* when limiting to rename detection, diffcore_rename[_extended]()
872-
* will never be called with unmodified source paths fed to us, so
873-
* the extra work necessary to check if rename_src entries are
874-
* unmodified would be a small waste.
875-
*/
876-
int skip_unmodified = 0;
930+
struct diff_populate_filespec_options dpf_options = {
931+
.check_binary = 0,
932+
.missing_object_cb = NULL,
933+
.missing_object_data = NULL
934+
};
935+
struct basename_prefetch_options prefetch_options = {
936+
.repo = options->repo,
937+
.relevant_sources = relevant_sources,
938+
.sources = &sources,
939+
.dests = &dests,
940+
.info = info
941+
};
877942

878943
/*
879944
* Create maps of basename -> fullname(s) for remaining sources and
@@ -910,6 +975,11 @@ static int find_basename_matches(struct diff_options *options,
910975
strintmap_set(&dests, base, i);
911976
}
912977

978+
if (options->repo == the_repository && has_promisor_remote()) {
979+
dpf_options.missing_object_cb = basename_prefetch;
980+
dpf_options.missing_object_data = &prefetch_options;
981+
}
982+
913983
/* Now look for basename matchups and do similarity estimation */
914984
for (i = 0; i < rename_src_nr; ++i) {
915985
char *filename = rename_src[i].p->one->path;
@@ -953,7 +1023,7 @@ static int find_basename_matches(struct diff_options *options,
9531023
one = rename_src[src_index].p->one;
9541024
two = rename_dst[dst_index].p->two;
9551025
score = estimate_similarity(options->repo, one, two,
956-
minimum_score, skip_unmodified);
1026+
minimum_score, &dpf_options);
9571027

9581028
/* If sufficiently similar, record as rename pair */
9591029
if (score < minimum_score)
@@ -1272,6 +1342,14 @@ void diffcore_rename_extended(struct diff_options *options,
12721342
int num_sources, want_copies;
12731343
struct progress *progress = NULL;
12741344
struct dir_rename_info info;
1345+
struct diff_populate_filespec_options dpf_options = {
1346+
.check_binary = 0,
1347+
.missing_object_cb = NULL,
1348+
.missing_object_data = NULL
1349+
};
1350+
struct inexact_prefetch_options prefetch_options = {
1351+
.repo = options->repo
1352+
};
12751353

12761354
trace2_region_enter("diff", "setup", options->repo);
12771355
info.setup = 0;
@@ -1433,6 +1511,13 @@ void diffcore_rename_extended(struct diff_options *options,
14331511
(uint64_t)num_destinations * (uint64_t)num_sources);
14341512
}
14351513

1514+
/* Finish setting up dpf_options */
1515+
prefetch_options.skip_unmodified = skip_unmodified;
1516+
if (options->repo == the_repository && has_promisor_remote()) {
1517+
dpf_options.missing_object_cb = inexact_prefetch;
1518+
dpf_options.missing_object_data = &prefetch_options;
1519+
}
1520+
14361521
CALLOC_ARRAY(mx, st_mult(NUM_CANDIDATE_PER_DST, num_destinations));
14371522
for (dst_cnt = i = 0; i < rename_dst_nr; i++) {
14381523
struct diff_filespec *two = rename_dst[i].p->two;
@@ -1458,7 +1543,7 @@ void diffcore_rename_extended(struct diff_options *options,
14581543
this_src.score = estimate_similarity(options->repo,
14591544
one, two,
14601545
minimum_score,
1461-
skip_unmodified);
1546+
&dpf_options);
14621547
this_src.name_score = basename_same(one, two);
14631548
this_src.dst = i;
14641549
this_src.src = j;

merge-ort.c

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "entry.h"
3030
#include "ll-merge.h"
3131
#include "object-store.h"
32+
#include "promisor-remote.h"
3233
#include "revision.h"
3334
#include "strmap.h"
3435
#include "submodule.h"
@@ -3494,6 +3495,54 @@ static void process_entry(struct merge_options *opt,
34943495
record_entry_for_tree(dir_metadata, path, &ci->merged);
34953496
}
34963497

3498+
static void prefetch_for_content_merges(struct merge_options *opt,
3499+
struct string_list *plist)
3500+
{
3501+
struct string_list_item *e;
3502+
struct oid_array to_fetch = OID_ARRAY_INIT;
3503+
3504+
if (opt->repo != the_repository || !has_promisor_remote())
3505+
return;
3506+
3507+
for (e = &plist->items[plist->nr-1]; e >= plist->items; --e) {
3508+
/* char *path = e->string; */
3509+
struct conflict_info *ci = e->util;
3510+
int i;
3511+
3512+
/* Ignore clean entries */
3513+
if (ci->merged.clean)
3514+
continue;
3515+
3516+
/* Ignore entries that don't need a content merge */
3517+
if (ci->match_mask || ci->filemask < 6 ||
3518+
!S_ISREG(ci->stages[1].mode) ||
3519+
!S_ISREG(ci->stages[2].mode) ||
3520+
oideq(&ci->stages[1].oid, &ci->stages[2].oid))
3521+
continue;
3522+
3523+
/* Also don't need content merge if base matches either side */
3524+
if (ci->filemask == 7 &&
3525+
S_ISREG(ci->stages[0].mode) &&
3526+
(oideq(&ci->stages[0].oid, &ci->stages[1].oid) ||
3527+
oideq(&ci->stages[0].oid, &ci->stages[2].oid)))
3528+
continue;
3529+
3530+
for (i = 0; i < 3; i++) {
3531+
unsigned side_mask = (1 << i);
3532+
struct version_info *vi = &ci->stages[i];
3533+
3534+
if ((ci->filemask & side_mask) &&
3535+
S_ISREG(vi->mode) &&
3536+
oid_object_info_extended(opt->repo, &vi->oid, NULL,
3537+
OBJECT_INFO_FOR_PREFETCH))
3538+
oid_array_append(&to_fetch, &vi->oid);
3539+
}
3540+
}
3541+
3542+
promisor_remote_get_direct(opt->repo, to_fetch.oid, to_fetch.nr);
3543+
oid_array_clear(&to_fetch);
3544+
}
3545+
34973546
static void process_entries(struct merge_options *opt,
34983547
struct object_id *result_oid)
34993548
{
@@ -3540,6 +3589,7 @@ static void process_entries(struct merge_options *opt,
35403589
* the way when it is time to process the file at the same path).
35413590
*/
35423591
trace2_region_enter("merge", "processing", opt->repo);
3592+
prefetch_for_content_merges(opt, &plist);
35433593
for (entry = &plist.items[plist.nr-1]; entry >= plist.items; --entry) {
35443594
char *path = entry->string;
35453595
/*

promisor-remote.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ static int fetch_objects(struct repository *repo,
3131
die(_("promisor-remote: unable to fork off fetch subprocess"));
3232
child_in = xfdopen(child.in, "w");
3333

34+
trace2_data_intmax("promisor", repo, "fetch_count", oid_nr);
35+
3436
for (i = 0; i < oid_nr; i++) {
3537
if (fputs(oid_to_hex(&oids[i]), child_in) < 0)
3638
die_errno(_("promisor-remote: could not write to fetch subprocess"));

0 commit comments

Comments
 (0)