Skip to content

Commit 4de4bfb

Browse files
committed
commit-graph: create options for split files
The split commit-graph feature is now fully implemented, but needs some more run-time configurability. Allow direct callers to 'git commit-graph write --split' to specify the values used in the merge strategy and the expire time. Update the documentation to specify these values. Signed-off-by: Derrick Stolee <[email protected]>
1 parent 18d612b commit 4de4bfb

File tree

8 files changed

+112
-23
lines changed

8 files changed

+112
-23
lines changed

Documentation/git-commit-graph.txt

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ OPTIONS
2626
Use given directory for the location of packfiles and commit-graph
2727
file. This parameter exists to specify the location of an alternate
2828
that only has the objects directory, not a full `.git` directory. The
29-
commit-graph file is expected to be at `<dir>/info/commit-graph` and
29+
commit-graph file is expected to be in the `<dir>/info` directory and
3030
the packfiles are expected to be in `<dir>/pack`.
3131

3232

@@ -51,6 +51,25 @@ or `--stdin-packs`.)
5151
+
5252
With the `--append` option, include all commits that are present in the
5353
existing commit-graph file.
54+
+
55+
With the `--split` option, write the commit-graph as a chain of multiple
56+
commit-graph files stored in `<dir>/info/commit-graphs`. The new commits
57+
not already in the commit-graph are added in a new "tip" file. This file
58+
is merged with the existing file if the following merge conditions are
59+
met:
60+
+
61+
* If `--size-multiple=<X>` is not specified, let `X` equal 2. If the new
62+
tip file would have `N` commits and the previous tip has `M` commits and
63+
`X` times `N` is greater than `M`, instead merge the two files into a
64+
single file.
65+
+
66+
* If `--max-commits=<M>` is specified with `M` a positive integer, and the
67+
new tip file would have more than `M` commits, then instead merge the new
68+
tip with the previous tip.
69+
+
70+
Finally, if `--expire-time=<datetime>` is not specified, let `datetime`
71+
be the current time. After writing the split commit-graph, delete all
72+
unused commit-graph whose modified times are older than `datetime`.
5473

5574
'read'::
5675

Documentation/technical/commit-graph.txt

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -248,10 +248,11 @@ When writing a set of commits that do not exist in the commit-graph stack of
248248
height N, we default to creating a new file at level N + 1. We then decide to
249249
merge with the Nth level if one of two conditions hold:
250250

251-
1. The expected file size for level N + 1 is at least half the file size for
252-
level N.
251+
1. `--size-multiple=<X>` is specified or X = 2, and the number of commits in
252+
level N is less than X times the number of commits in level N + 1.
253253

254-
2. Level N + 1 contains more than 64,0000 commits.
254+
2. `--max-commits=<C>` is specified with non-zero C and the number of commits
255+
in level N + 1 is more than C commits.
255256

256257
This decision cascades down the levels: when we merge a level we create a new
257258
set of commits that then compares to the next level.

builtin/commit-graph.c

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ static char const * const builtin_commit_graph_usage[] = {
1010
N_("git commit-graph [--object-dir <objdir>]"),
1111
N_("git commit-graph read [--object-dir <objdir>]"),
1212
N_("git commit-graph verify [--object-dir <objdir>]"),
13-
N_("git commit-graph write [--object-dir <objdir>] [--append|--split] [--reachable|--stdin-packs|--stdin-commits]"),
13+
N_("git commit-graph write [--object-dir <objdir>] [--append|--split] [--reachable|--stdin-packs|--stdin-commits] <split options>"),
1414
NULL
1515
};
1616

@@ -25,7 +25,7 @@ static const char * const builtin_commit_graph_read_usage[] = {
2525
};
2626

2727
static const char * const builtin_commit_graph_write_usage[] = {
28-
N_("git commit-graph write [--object-dir <objdir>] [--append|--split] [--reachable|--stdin-packs|--stdin-commits]"),
28+
N_("git commit-graph write [--object-dir <objdir>] [--append|--split] [--reachable|--stdin-packs|--stdin-commits] <split options>"),
2929
NULL
3030
};
3131

@@ -135,6 +135,7 @@ static int graph_read(int argc, const char **argv)
135135
}
136136

137137
extern int read_replace_refs;
138+
struct split_commit_graph_opts split_opts;
138139

139140
static int graph_write(int argc, const char **argv)
140141
{
@@ -158,9 +159,19 @@ static int graph_write(int argc, const char **argv)
158159
N_("include all commits already in the commit-graph file")),
159160
OPT_BOOL(0, "split", &opts.split,
160161
N_("allow writing an incremental commit-graph file")),
162+
OPT_INTEGER(0, "max-commits", &split_opts.max_commits,
163+
N_("maximum number of commits in a non-base split commit-graph")),
164+
OPT_INTEGER(0, "size-multiple", &split_opts.size_multiple,
165+
N_("maximum ratio between two levels of a split commit-graph")),
166+
OPT_EXPIRY_DATE(0, "expire-time", &split_opts.expire_time,
167+
N_("maximum number of commits in a non-base split commit-graph")),
161168
OPT_END(),
162169
};
163170

171+
split_opts.size_multiple = 2;
172+
split_opts.max_commits = 0;
173+
split_opts.expire_time = 0;
174+
164175
argc = parse_options(argc, argv, NULL,
165176
builtin_commit_graph_write_options,
166177
builtin_commit_graph_write_usage, 0);
@@ -177,7 +188,7 @@ static int graph_write(int argc, const char **argv)
177188
read_replace_refs = 0;
178189

179190
if (opts.reachable)
180-
return write_commit_graph_reachable(opts.obj_dir, flags);
191+
return write_commit_graph_reachable(opts.obj_dir, flags, &split_opts);
181192

182193
string_list_init(&lines, 0);
183194
if (opts.stdin_packs || opts.stdin_commits) {
@@ -197,7 +208,8 @@ static int graph_write(int argc, const char **argv)
197208
result = write_commit_graph(opts.obj_dir,
198209
pack_indexes,
199210
commit_hex,
200-
flags);
211+
flags,
212+
&split_opts);
201213

202214
UNLEAK(lines);
203215
return result;

builtin/commit.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1670,7 +1670,7 @@ int cmd_commit(int argc, const char **argv, const char *prefix)
16701670
"not exceeded, and then \"git reset HEAD\" to recover."));
16711671

16721672
if (git_env_bool(GIT_TEST_COMMIT_GRAPH, 0) &&
1673-
write_commit_graph_reachable(get_object_directory(), 0))
1673+
write_commit_graph_reachable(get_object_directory(), 0, NULL))
16741674
return 1;
16751675

16761676
repo_rerere(the_repository, 0);

builtin/gc.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -666,7 +666,8 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
666666

667667
if (gc_write_commit_graph &&
668668
write_commit_graph_reachable(get_object_directory(),
669-
!quiet && !daemonized ? COMMIT_GRAPH_PROGRESS : 0))
669+
!quiet && !daemonized ? COMMIT_GRAPH_PROGRESS : 0,
670+
NULL))
670671
return 1;
671672

672673
if (auto_gc && too_many_loose_objects())

commit-graph.c

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -753,6 +753,8 @@ struct write_commit_graph_context {
753753
unsigned append:1,
754754
report_progress:1,
755755
split:1;
756+
757+
const struct split_commit_graph_opts *split_opts;
756758
};
757759

758760
static void write_graph_chunk_fanout(struct hashfile *f,
@@ -1101,14 +1103,15 @@ static int add_ref_to_list(const char *refname,
11011103
return 0;
11021104
}
11031105

1104-
int write_commit_graph_reachable(const char *obj_dir, unsigned int flags)
1106+
int write_commit_graph_reachable(const char *obj_dir, unsigned int flags,
1107+
const struct split_commit_graph_opts *split_opts)
11051108
{
11061109
struct string_list list = STRING_LIST_INIT_DUP;
11071110
int result;
11081111

11091112
for_each_ref(add_ref_to_list, &list);
11101113
result = write_commit_graph(obj_dir, NULL, &list,
1111-
flags);
1114+
flags, split_opts);
11121115

11131116
string_list_clear(&list, 0);
11141117
return result;
@@ -1479,20 +1482,25 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
14791482
return 0;
14801483
}
14811484

1482-
static int split_strategy_max_commits = 64000;
1483-
static float split_strategy_size_mult = 2.0f;
1484-
14851485
static void split_graph_merge_strategy(struct write_commit_graph_context *ctx)
14861486
{
14871487
struct commit_graph *g = ctx->r->objects->commit_graph;
14881488
uint32_t num_commits = ctx->commits.nr;
14891489
uint32_t i;
14901490

1491+
int max_commits = 0;
1492+
int size_mult = 2;
1493+
1494+
if (ctx->split_opts) {
1495+
max_commits = ctx->split_opts->max_commits;
1496+
size_mult = ctx->split_opts->size_multiple;
1497+
}
1498+
14911499
g = ctx->r->objects->commit_graph;
14921500
ctx->num_commit_graphs_after = ctx->num_commit_graphs_before + 1;
14931501

1494-
while (g && (g->num_commits <= split_strategy_size_mult * num_commits ||
1495-
num_commits > split_strategy_max_commits)) {
1502+
while (g && (g->num_commits <= size_mult * num_commits ||
1503+
(max_commits && num_commits > max_commits))) {
14961504
if (strcmp(g->obj_dir, ctx->obj_dir))
14971505
break;
14981506

@@ -1660,7 +1668,10 @@ static void expire_commit_graphs(struct write_commit_graph_context *ctx)
16601668
DIR *dir;
16611669
struct dirent *de;
16621670
size_t dirnamelen;
1663-
time_t expire_time = time(NULL);
1671+
timestamp_t expire_time = time(NULL);
1672+
1673+
if (ctx->split_opts && ctx->split_opts->expire_time)
1674+
expire_time -= ctx->split_opts->expire_time;
16641675

16651676
strbuf_addstr(&path, ctx->obj_dir);
16661677
strbuf_addstr(&path, "/info/commit-graphs");
@@ -1704,7 +1715,8 @@ static void expire_commit_graphs(struct write_commit_graph_context *ctx)
17041715
int write_commit_graph(const char *obj_dir,
17051716
struct string_list *pack_indexes,
17061717
struct string_list *commit_hex,
1707-
unsigned int flags)
1718+
unsigned int flags,
1719+
const struct split_commit_graph_opts *split_opts)
17081720
{
17091721
struct write_commit_graph_context *ctx;
17101722
uint32_t i, count_distinct = 0;
@@ -1719,6 +1731,7 @@ int write_commit_graph(const char *obj_dir,
17191731
ctx->append = flags & COMMIT_GRAPH_APPEND ? 1 : 0;
17201732
ctx->report_progress = flags & COMMIT_GRAPH_PROGRESS ? 1 : 0;
17211733
ctx->split = flags & COMMIT_GRAPH_SPLIT ? 1 : 0;
1734+
ctx->split_opts = split_opts;
17221735

17231736
if (ctx->split) {
17241737
struct commit_graph *g;
@@ -1746,8 +1759,8 @@ int write_commit_graph(const char *obj_dir,
17461759
ctx->approx_nr_objects = approximate_object_count();
17471760
ctx->oids.alloc = ctx->approx_nr_objects / 32;
17481761

1749-
if (ctx->split && ctx->oids.alloc > split_strategy_max_commits)
1750-
ctx->oids.alloc = split_strategy_max_commits;
1762+
if (ctx->split && split_opts && ctx->oids.alloc > split_opts->max_commits)
1763+
ctx->oids.alloc = split_opts->max_commits;
17511764

17521765
if (ctx->append) {
17531766
prepare_commit_graph_one(ctx->r, ctx->obj_dir);

commit-graph.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,11 +75,19 @@ int generation_numbers_enabled(struct repository *r);
7575
#define COMMIT_GRAPH_PROGRESS (1 << 1)
7676
#define COMMIT_GRAPH_SPLIT (1 << 2)
7777

78-
int write_commit_graph_reachable(const char *obj_dir, unsigned int flags);
78+
struct split_commit_graph_opts {
79+
int size_multiple;
80+
int max_commits;
81+
timestamp_t expire_time;
82+
};
83+
84+
int write_commit_graph_reachable(const char *obj_dir, unsigned int flags,
85+
const struct split_commit_graph_opts *split_opts);
7986
int write_commit_graph(const char *obj_dir,
8087
struct string_list *pack_indexes,
8188
struct string_list *commit_hex,
82-
unsigned int flags);
89+
unsigned int flags,
90+
const struct split_commit_graph_opts *split_opts);
8391

8492
int verify_commit_graph(struct repository *r, struct commit_graph *g);
8593

t/t5323-split-commit-graph.sh

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,4 +169,39 @@ test_expect_success 'create fork and chain across alternate' '
169169

170170
graph_git_behavior 'alternate: commit 13 vs 6' commits/13 commits/6
171171

172+
test_expect_success 'test merge stragety constants' '
173+
git clone . merge-2 &&
174+
(
175+
cd merge-2 &&
176+
git config core.commitGraph true &&
177+
test_line_count = 2 $graphdir/commit-graph-chain &&
178+
test_commit 14 &&
179+
git commit-graph write --reachable --split --size-multiple=2 &&
180+
test_line_count = 3 $graphdir/commit-graph-chain
181+
182+
) &&
183+
git clone . merge-10 &&
184+
(
185+
cd merge-10 &&
186+
git config core.commitGraph true &&
187+
test_line_count = 2 $graphdir/commit-graph-chain &&
188+
test_commit 14 &&
189+
git commit-graph write --reachable --split --size-multiple=10 &&
190+
test_line_count = 1 $graphdir/commit-graph-chain &&
191+
ls $graphdir/graph-*.graph >graph-files &&
192+
test_line_count = 1 graph-files
193+
) &&
194+
git clone . merge-10-expire &&
195+
(
196+
cd merge-10-expire &&
197+
git config core.commitGraph true &&
198+
test_line_count = 2 $graphdir/commit-graph-chain &&
199+
test_commit 15 &&
200+
git commit-graph write --reachable --split --size-multiple=10 --expire-time=1980-01-01 &&
201+
test_line_count = 1 $graphdir/commit-graph-chain &&
202+
ls $graphdir/graph-*.graph >graph-files &&
203+
test_line_count = 3 graph-files
204+
)
205+
'
206+
172207
test_done

0 commit comments

Comments
 (0)