Skip to content

Commit 809e032

Browse files
ttaylorrgitster
authored andcommitted
builtin/commit-graph.c: introduce '--max-new-filters=<n>'
Introduce a command-line flag to specify the maximum number of new Bloom filters that a 'git commit-graph write' is willing to compute from scratch. Prior to this patch, a commit-graph write with '--changed-paths' would compute Bloom filters for all selected commits which haven't already been computed (i.e., by a previous commit-graph write with '--split' such that a roll-up or replacement is performed). This behavior can cause prohibitively-long commit-graph writes for a variety of reasons: * There may be lots of filters whose diffs take a long time to generate (for example, they have close to the maximum number of changes, diffing itself takes a long time, etc). * Old-style commit-graphs (which encode filters with too many entries as not having been computed at all) cause us to waste time recomputing filters that appear to have not been computed only to discover that they are too-large. This can make the upper-bound of the time it takes for 'git commit-graph write --changed-paths' to be rather unpredictable. To make this command behave more predictably, introduce '--max-new-filters=<n>' to allow computing at most '<n>' Bloom filters from scratch. This lets "computing" already-known filters proceed quickly, while bounding the number of slow tasks that Git is willing to do. Helped-by: Junio C Hamano <[email protected]> Signed-off-by: Taylor Blau <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 98bb796 commit 809e032

File tree

6 files changed

+112
-8
lines changed

6 files changed

+112
-8
lines changed

Documentation/git-commit-graph.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,12 @@ this option is given, future commit-graph writes will automatically assume
6767
that this option was intended. Use `--no-changed-paths` to stop storing this
6868
data.
6969
+
70+
With the `--max-new-filters=<n>` option, generate at most `n` new Bloom
71+
filters (if `--changed-paths` is specified). If `n` is `-1`, no limit is
72+
enforced. Only commits present in the new layer count against this
73+
limit. To retroactively compute Bloom filters over earlier layers, it is
74+
advised to use `--split=replace`.
75+
+
7076
With the `--split[=<strategy>]` option, write the commit-graph as a
7177
chain of multiple commit-graph files stored in
7278
`<dir>/info/commit-graphs`. Commit-graph layers are merged based on the

bloom.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -204,12 +204,11 @@ struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
204204

205205
if (!filter->data) {
206206
load_commit_graph_info(r, c);
207-
if (commit_graph_position(c) != COMMIT_NOT_FROM_GRAPH &&
208-
load_bloom_filter_from_graph(r->objects->commit_graph, filter, c))
209-
return filter;
207+
if (commit_graph_position(c) != COMMIT_NOT_FROM_GRAPH)
208+
load_bloom_filter_from_graph(r->objects->commit_graph, filter, c);
210209
}
211210

212-
if (filter->data)
211+
if (filter->data && filter->len)
213212
return filter;
214213
if (!compute_if_not_present)
215214
return NULL;

builtin/commit-graph.c

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ static char const * const builtin_commit_graph_usage[] = {
1313
N_("git commit-graph verify [--object-dir <objdir>] [--shallow] [--[no-]progress]"),
1414
N_("git commit-graph write [--object-dir <objdir>] [--append] "
1515
"[--split[=<strategy>]] [--reachable|--stdin-packs|--stdin-commits] "
16-
"[--changed-paths] [--[no-]progress] <split options>"),
16+
"[--changed-paths] [--[no-]max-new-filters <n>] [--[no-]progress] "
17+
"<split options>"),
1718
NULL
1819
};
1920

@@ -25,7 +26,8 @@ static const char * const builtin_commit_graph_verify_usage[] = {
2526
static const char * const builtin_commit_graph_write_usage[] = {
2627
N_("git commit-graph write [--object-dir <objdir>] [--append] "
2728
"[--split[=<strategy>]] [--reachable|--stdin-packs|--stdin-commits] "
28-
"[--changed-paths] [--[no-]progress] <split options>"),
29+
"[--changed-paths] [--[no-]max-new-filters <n>] [--[no-]progress] "
30+
"<split options>"),
2931
NULL
3032
};
3133

@@ -162,6 +164,23 @@ static int read_one_commit(struct oidset *commits, struct progress *progress,
162164
return 0;
163165
}
164166

167+
static int write_option_max_new_filters(const struct option *opt,
168+
const char *arg,
169+
int unset)
170+
{
171+
int *to = opt->value;
172+
if (unset)
173+
*to = -1;
174+
else {
175+
const char *s;
176+
*to = strtol(arg, (char **)&s, 10);
177+
if (*s)
178+
return error(_("%s expects a numerical value"),
179+
optname(opt, opt->flags));
180+
}
181+
return 0;
182+
}
183+
165184
static int graph_write(int argc, const char **argv)
166185
{
167186
struct string_list pack_indexes = STRING_LIST_INIT_NODUP;
@@ -197,6 +216,9 @@ static int graph_write(int argc, const char **argv)
197216
N_("maximum ratio between two levels of a split commit-graph")),
198217
OPT_EXPIRY_DATE(0, "expire-time", &write_opts.expire_time,
199218
N_("only expire files older than a given date-time")),
219+
OPT_CALLBACK_F(0, "max-new-filters", &write_opts.max_new_filters,
220+
NULL, N_("maximum number of changed-path Bloom filters to compute"),
221+
0, write_option_max_new_filters),
200222
OPT_END(),
201223
};
202224

@@ -205,6 +227,7 @@ static int graph_write(int argc, const char **argv)
205227
write_opts.size_multiple = 2;
206228
write_opts.max_commits = 0;
207229
write_opts.expire_time = 0;
230+
write_opts.max_new_filters = -1;
208231

209232
trace2_cmd_mode("write");
210233

commit-graph.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1408,6 +1408,7 @@ static void compute_bloom_filters(struct write_commit_graph_context *ctx)
14081408
int i;
14091409
struct progress *progress = NULL;
14101410
struct commit **sorted_commits;
1411+
int max_new_filters;
14111412

14121413
init_bloom_filters();
14131414

@@ -1424,13 +1425,16 @@ static void compute_bloom_filters(struct write_commit_graph_context *ctx)
14241425
else
14251426
QSORT(sorted_commits, ctx->commits.nr, commit_gen_cmp);
14261427

1428+
max_new_filters = ctx->opts && ctx->opts->max_new_filters >= 0 ?
1429+
ctx->opts->max_new_filters : ctx->commits.nr;
1430+
14271431
for (i = 0; i < ctx->commits.nr; i++) {
14281432
enum bloom_filter_computed computed = 0;
14291433
struct commit *c = sorted_commits[i];
14301434
struct bloom_filter *filter = get_or_compute_bloom_filter(
14311435
ctx->r,
14321436
c,
1433-
1,
1437+
ctx->count_bloom_filter_computed < max_new_filters,
14341438
ctx->bloom_settings,
14351439
&computed);
14361440
if (computed & BLOOM_COMPUTED) {
@@ -1441,7 +1445,8 @@ static void compute_bloom_filters(struct write_commit_graph_context *ctx)
14411445
ctx->count_bloom_filter_trunc_large++;
14421446
} else if (computed & BLOOM_NOT_COMPUTED)
14431447
ctx->count_bloom_filter_not_computed++;
1444-
ctx->total_bloom_filter_data_size += sizeof(unsigned char) * filter->len;
1448+
ctx->total_bloom_filter_data_size += filter
1449+
? sizeof(unsigned char) * filter->len : 0;
14451450
display_progress(progress, i + 1);
14461451
}
14471452

commit-graph.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ struct commit_graph_opts {
110110
int max_commits;
111111
timestamp_t expire_time;
112112
enum commit_graph_split_flags split_flags;
113+
int max_new_filters;
113114
};
114115

115116
/*

t/t4216-log-bloom.sh

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,4 +305,74 @@ test_expect_success 'correctly report commits with no changed paths' '
305305
)
306306
'
307307

308+
test_expect_success 'Bloom generation is limited by --max-new-filters' '
309+
(
310+
cd limits &&
311+
test_commit c2 filter &&
312+
test_commit c3 filter &&
313+
test_commit c4 no-filter &&
314+
315+
rm -f trace.event &&
316+
GIT_TRACE2_EVENT="$(pwd)/trace.event" \
317+
git commit-graph write --reachable --split=replace \
318+
--changed-paths --max-new-filters=2 &&
319+
320+
test_filter_computed 2 trace.event &&
321+
test_filter_not_computed 3 trace.event &&
322+
test_filter_trunc_empty 0 trace.event &&
323+
test_filter_trunc_large 0 trace.event
324+
)
325+
'
326+
327+
test_expect_success 'Bloom generation backfills previously-skipped filters' '
328+
(
329+
cd limits &&
330+
331+
rm -f trace.event &&
332+
GIT_TRACE2_EVENT="$(pwd)/trace.event" \
333+
git commit-graph write --reachable --changed-paths \
334+
--split=replace --max-new-filters=1 &&
335+
test_filter_computed 1 trace.event &&
336+
test_filter_not_computed 4 trace.event &&
337+
test_filter_trunc_empty 0 trace.event &&
338+
test_filter_trunc_large 0 trace.event
339+
)
340+
'
341+
342+
test_expect_success 'Bloom generation backfills empty commits' '
343+
git init empty &&
344+
test_when_finished "rm -fr empty" &&
345+
(
346+
cd empty &&
347+
for i in $(test_seq 1 6)
348+
do
349+
git commit --allow-empty -m "$i"
350+
done &&
351+
352+
# Generate Bloom filters for empty commits 1-6, two at a time.
353+
for i in $(test_seq 1 3)
354+
do
355+
rm -f trace.event &&
356+
GIT_TRACE2_EVENT="$(pwd)/trace.event" \
357+
git commit-graph write --reachable \
358+
--changed-paths --max-new-filters=2 &&
359+
test_filter_computed 2 trace.event &&
360+
test_filter_not_computed 4 trace.event &&
361+
test_filter_trunc_empty 2 trace.event &&
362+
test_filter_trunc_large 0 trace.event
363+
done &&
364+
365+
# Finally, make sure that once all commits have filters, that
366+
# none are subsequently recomputed.
367+
rm -f trace.event &&
368+
GIT_TRACE2_EVENT="$(pwd)/trace.event" \
369+
git commit-graph write --reachable \
370+
--changed-paths --max-new-filters=2 &&
371+
test_filter_computed 0 trace.event &&
372+
test_filter_not_computed 6 trace.event &&
373+
test_filter_trunc_empty 0 trace.event &&
374+
test_filter_trunc_large 0 trace.event
375+
)
376+
'
377+
308378
test_done

0 commit comments

Comments
 (0)