Skip to content

Commit d5b67a2

Browse files
abhishekkumar2718gitster
authored andcommitted
commit-graph: use generation v2 only if entire chain does
Since there are released versions of Git that understand generation numbers in the commit-graph's CDAT chunk but do not understand the GDAT chunk, the following scenario is possible: 1. "New" Git writes a commit-graph with the GDAT chunk. 2. "Old" Git writes a split commit-graph on top without a GDAT chunk. Because of the current use of inspecting the current layer for a chunk_generation_data pointer, the commits in the lower layer will be interpreted as having very large generation values (commit date plus offset) compared to the generation numbers in the top layer (topological level). This violates the expectation that the generation of a parent is strictly smaller than the generation of a child. It is difficult to expose this issue in a test. Since we _start_ with artificially low generation numbers, any commit walk that prioritizes generation numbers will walk all of the commits with high generation number before walking the commits with low generation number. In all the cases I tried, the commit-graph layers themselves "protect" any incorrect behavior since none of the commits in the lower layer can reach the commits in the upper layer. This issue would manifest itself as a performance problem in this case, especially with something like "git log --graph" since the low generation numbers would cause the in-degree queue to walk all of the commits in the lower layer before allowing the topo-order queue to write anything to output (depending on the size of the upper layer). When writing the new layer in split commit-graph, we write a GDAT chunk only if the topmost layer has a GDAT chunk. This guarantees that if a layer has GDAT chunk, all lower layers must have a GDAT chunk as well. Rewriting layers follows similar approach: if the topmost layer below the set of layers being rewritten (in the split commit-graph chain) exists, and it does not contain GDAT chunk, then the result of rewrite does not have GDAT chunks either. Signed-off-by: Derrick Stolee <[email protected]> Signed-off-by: Abhishek Kumar <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent ac604dd commit d5b67a2

File tree

3 files changed

+115
-1
lines changed

3 files changed

+115
-1
lines changed

commit-graph.c

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -605,6 +605,21 @@ static struct commit_graph *load_commit_graph_chain(struct repository *r,
605605
return graph_chain;
606606
}
607607

608+
static void validate_mixed_generation_chain(struct commit_graph *g)
609+
{
610+
int read_generation_data;
611+
612+
if (!g)
613+
return;
614+
615+
read_generation_data = !!g->chunk_generation_data;
616+
617+
while (g) {
618+
g->read_generation_data = read_generation_data;
619+
g = g->base_graph;
620+
}
621+
}
622+
608623
struct commit_graph *read_commit_graph_one(struct repository *r,
609624
struct object_directory *odb)
610625
{
@@ -613,6 +628,8 @@ struct commit_graph *read_commit_graph_one(struct repository *r,
613628
if (!g)
614629
g = load_commit_graph_chain(r, odb);
615630

631+
validate_mixed_generation_chain(g);
632+
616633
return g;
617634
}
618635

@@ -782,7 +799,7 @@ static void fill_commit_graph_info(struct commit *item, struct commit_graph *g,
782799
date_low = get_be32(commit_data + g->hash_len + 12);
783800
item->date = (timestamp_t)((date_high << 32) | date_low);
784801

785-
if (g->chunk_generation_data) {
802+
if (g->chunk_generation_data && g->read_generation_data) {
786803
offset = (timestamp_t) get_be32(g->chunk_generation_data + sizeof(uint32_t) * lex_index);
787804

788805
if (offset & CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW) {
@@ -2030,6 +2047,9 @@ static void split_graph_merge_strategy(struct write_commit_graph_context *ctx)
20302047
}
20312048
}
20322049

2050+
if (!ctx->write_generation_data && g->chunk_generation_data)
2051+
ctx->write_generation_data = 1;
2052+
20332053
if (flags != COMMIT_GRAPH_SPLIT_REPLACE)
20342054
ctx->new_base_graph = g;
20352055
else if (ctx->num_commit_graphs_after != 1)
@@ -2274,6 +2294,7 @@ int write_commit_graph(struct object_directory *odb,
22742294
struct commit_graph *g = ctx->r->objects->commit_graph;
22752295

22762296
while (g) {
2297+
g->read_generation_data = 1;
22772298
g->topo_levels = &topo_levels;
22782299
g = g->base_graph;
22792300
}
@@ -2300,6 +2321,9 @@ int write_commit_graph(struct object_directory *odb,
23002321

23012322
g = ctx->r->objects->commit_graph;
23022323

2324+
if (g && !g->chunk_generation_data)
2325+
ctx->write_generation_data = 0;
2326+
23032327
while (g) {
23042328
ctx->num_commit_graphs_before++;
23052329
g = g->base_graph;
@@ -2318,6 +2342,9 @@ int write_commit_graph(struct object_directory *odb,
23182342

23192343
if (ctx->opts)
23202344
replace = ctx->opts->split_flags & COMMIT_GRAPH_SPLIT_REPLACE;
2345+
2346+
if (replace)
2347+
ctx->write_generation_data = 1;
23212348
}
23222349

23232350
ctx->approx_nr_objects = approximate_object_count();

commit-graph.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ struct commit_graph {
6464
struct object_directory *odb;
6565

6666
uint32_t num_commits_in_base;
67+
unsigned int read_generation_data;
6768
struct commit_graph *base_graph;
6869

6970
const uint32_t *chunk_oid_fanout;

t/t5324-split-commit-graph.sh

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -440,4 +440,90 @@ test_expect_success '--split=replace with partial Bloom data' '
440440
verify_chain_files_exist $graphdir
441441
'
442442

443+
test_expect_success 'setup repo for mixed generation commit-graph-chain' '
444+
mkdir mixed &&
445+
graphdir=".git/objects/info/commit-graphs" &&
446+
test_oid_cache <<-EOM &&
447+
oid_version sha1:1
448+
oid_version sha256:2
449+
EOM
450+
cd "$TRASH_DIRECTORY/mixed" &&
451+
git init &&
452+
git config core.commitGraph true &&
453+
git config gc.writeCommitGraph false &&
454+
for i in $(test_seq 3)
455+
do
456+
test_commit $i &&
457+
git branch commits/$i || return 1
458+
done &&
459+
git reset --hard commits/1 &&
460+
for i in $(test_seq 4 5)
461+
do
462+
test_commit $i &&
463+
git branch commits/$i || return 1
464+
done &&
465+
git reset --hard commits/2 &&
466+
for i in $(test_seq 6 10)
467+
do
468+
test_commit $i &&
469+
git branch commits/$i || return 1
470+
done &&
471+
git commit-graph write --reachable --split &&
472+
git reset --hard commits/2 &&
473+
git merge commits/4 &&
474+
git branch merge/1 &&
475+
git reset --hard commits/4 &&
476+
git merge commits/6 &&
477+
git branch merge/2 &&
478+
GIT_TEST_COMMIT_GRAPH_NO_GDAT=1 git commit-graph write --reachable --split=no-merge &&
479+
test-tool read-graph >output &&
480+
cat >expect <<-EOF &&
481+
header: 43475048 1 $(test_oid oid_version) 4 1
482+
num_commits: 2
483+
chunks: oid_fanout oid_lookup commit_metadata
484+
EOF
485+
test_cmp expect output &&
486+
git commit-graph verify
487+
'
488+
489+
test_expect_success 'does not write generation data chunk if not present on existing tip' '
490+
cd "$TRASH_DIRECTORY/mixed" &&
491+
git reset --hard commits/3 &&
492+
git merge merge/1 &&
493+
git merge commits/5 &&
494+
git merge merge/2 &&
495+
git branch merge/3 &&
496+
git commit-graph write --reachable --split=no-merge &&
497+
test-tool read-graph >output &&
498+
cat >expect <<-EOF &&
499+
header: 43475048 1 $(test_oid oid_version) 4 2
500+
num_commits: 3
501+
chunks: oid_fanout oid_lookup commit_metadata
502+
EOF
503+
test_cmp expect output &&
504+
git commit-graph verify
505+
'
506+
507+
test_expect_success 'writes generation data chunk when commit-graph chain is replaced' '
508+
cd "$TRASH_DIRECTORY/mixed" &&
509+
git commit-graph write --reachable --split=replace &&
510+
test_path_is_file $graphdir/commit-graph-chain &&
511+
test_line_count = 1 $graphdir/commit-graph-chain &&
512+
verify_chain_files_exist $graphdir &&
513+
graph_read_expect 15 &&
514+
git commit-graph verify
515+
'
516+
517+
test_expect_success 'add one commit, write a tip graph' '
518+
cd "$TRASH_DIRECTORY/mixed" &&
519+
test_commit 11 &&
520+
git branch commits/11 &&
521+
git commit-graph write --reachable --split &&
522+
test_path_is_missing $infodir/commit-graph &&
523+
test_path_is_file $graphdir/commit-graph-chain &&
524+
ls $graphdir/graph-*.graph >graph-files &&
525+
test_line_count = 2 graph-files &&
526+
verify_chain_files_exist $graphdir
527+
'
528+
443529
test_done

0 commit comments

Comments
 (0)