Skip to content

Commit ac604dd

Browse files
abhishekkumar2718gitster
authored andcommitted
commit-graph: implement generation data chunk
As discovered by Ævar, we cannot increment graph version to distinguish between generation numbers v1 and v2 [1]. Thus, one of pre-requistes before implementing generation number was to distinguish between graph versions in a backwards compatible manner. We are going to introduce a new chunk called Generation Data chunk (or GDAT). GDAT stores corrected committer date offsets whereas CDAT will still store topological level. Old Git does not understand GDAT chunk and would ignore it, reading topological levels from CDAT. New Git can parse GDAT and take advantage of newer generation numbers, falling back to topological levels when GDAT chunk is missing (as it would happen with a commit graph written by old Git). We introduce a test environment variable 'GIT_TEST_COMMIT_GRAPH_NO_GDAT' which forces commit-graph file to be written without generation data chunk to emulate a commit-graph file written by old Git. While storing corrected commit date offset instead of the corrected commit date saves us 4 bytes per commit, it's possible for the offsets to overflow the 4-bytes allocated. As such overflows are exceedingly rare, we use the following overflow management scheme: We introduce a new commit-graph chunk, GENERATION_DATA_OVERFLOW ('GDOV') to store corrected commit dates for commits with offsets greater than GENERATION_NUMBER_V2_OFFSET_MAX. If the offset is greater than GENERATION_NUMBER_V2_OFFSET_MAX, we set the MSB of the offset and the other bits store the position of corrected commit date in GDOV chunk, similar to how Extra Edge List is maintained. We test the overflow-related code with the following repo history: F - N - U / \ U - N - U N \ / N - F - N Where the commits denoted by U have committer date of zero seconds since Unix epoch, the commits denoted by N have committer date of 1112354055 (default committer date for the test suite) seconds since Unix epoch and the commits denoted by F have committer date of (2 ^ 31 - 2) seconds since Unix epoch. The largest offset observed is 2 ^ 31, just large enough to overflow. [1]: https://lore.kernel.org/git/[email protected]/ Signed-off-by: Abhishek Kumar <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 1e7561c commit ac604dd

File tree

9 files changed

+206
-57
lines changed

9 files changed

+206
-57
lines changed

commit-graph.c

Lines changed: 93 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,13 @@ void git_test_write_commit_graph_or_die(void)
3838
#define GRAPH_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */
3939
#define GRAPH_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */
4040
#define GRAPH_CHUNKID_DATA 0x43444154 /* "CDAT" */
41+
#define GRAPH_CHUNKID_GENERATION_DATA 0x47444154 /* "GDAT" */
42+
#define GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW 0x47444f56 /* "GDOV" */
4143
#define GRAPH_CHUNKID_EXTRAEDGES 0x45444745 /* "EDGE" */
4244
#define GRAPH_CHUNKID_BLOOMINDEXES 0x42494458 /* "BIDX" */
4345
#define GRAPH_CHUNKID_BLOOMDATA 0x42444154 /* "BDAT" */
4446
#define GRAPH_CHUNKID_BASE 0x42415345 /* "BASE" */
45-
#define MAX_NUM_CHUNKS 7
47+
#define MAX_NUM_CHUNKS 9
4648

4749
#define GRAPH_DATA_WIDTH (the_hash_algo->rawsz + 16)
4850

@@ -61,6 +63,8 @@ void git_test_write_commit_graph_or_die(void)
6163
#define GRAPH_MIN_SIZE (GRAPH_HEADER_SIZE + 4 * GRAPH_CHUNKLOOKUP_WIDTH \
6264
+ GRAPH_FANOUT_SIZE + the_hash_algo->rawsz)
6365

66+
#define CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW (1ULL << 31)
67+
6468
/* Remember to update object flag allocation in object.h */
6569
#define REACHABLE (1u<<15)
6670

@@ -385,6 +389,20 @@ struct commit_graph *parse_commit_graph(struct repository *r,
385389
graph->chunk_commit_data = data + chunk_offset;
386390
break;
387391

392+
case GRAPH_CHUNKID_GENERATION_DATA:
393+
if (graph->chunk_generation_data)
394+
chunk_repeated = 1;
395+
else
396+
graph->chunk_generation_data = data + chunk_offset;
397+
break;
398+
399+
case GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW:
400+
if (graph->chunk_generation_data_overflow)
401+
chunk_repeated = 1;
402+
else
403+
graph->chunk_generation_data_overflow = data + chunk_offset;
404+
break;
405+
388406
case GRAPH_CHUNKID_EXTRAEDGES:
389407
if (graph->chunk_extra_edges)
390408
chunk_repeated = 1;
@@ -745,8 +763,8 @@ static void fill_commit_graph_info(struct commit *item, struct commit_graph *g,
745763
{
746764
const unsigned char *commit_data;
747765
struct commit_graph_data *graph_data;
748-
uint32_t lex_index;
749-
uint64_t date_high, date_low;
766+
uint32_t lex_index, offset_pos;
767+
uint64_t date_high, date_low, offset;
750768

751769
while (pos < g->num_commits_in_base)
752770
g = g->base_graph;
@@ -764,7 +782,16 @@ static void fill_commit_graph_info(struct commit *item, struct commit_graph *g,
764782
date_low = get_be32(commit_data + g->hash_len + 12);
765783
item->date = (timestamp_t)((date_high << 32) | date_low);
766784

767-
graph_data->generation = get_be32(commit_data + g->hash_len + 8) >> 2;
785+
if (g->chunk_generation_data) {
786+
offset = (timestamp_t) get_be32(g->chunk_generation_data + sizeof(uint32_t) * lex_index);
787+
788+
if (offset & CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW) {
789+
offset_pos = offset ^ CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW;
790+
graph_data->generation = get_be64(g->chunk_generation_data_overflow + 8 * offset_pos);
791+
} else
792+
graph_data->generation = item->date + offset;
793+
} else
794+
graph_data->generation = get_be32(commit_data + g->hash_len + 8) >> 2;
768795

769796
if (g->topo_levels)
770797
*topo_level_slab_at(g->topo_levels, item) = get_be32(commit_data + g->hash_len + 8) >> 2;
@@ -942,6 +969,7 @@ struct write_commit_graph_context {
942969
struct packed_oid_list oids;
943970
struct packed_commit_list commits;
944971
int num_extra_edges;
972+
int num_generation_data_overflows;
945973
unsigned long approx_nr_objects;
946974
struct progress *progress;
947975
int progress_done;
@@ -960,7 +988,8 @@ struct write_commit_graph_context {
960988
report_progress:1,
961989
split:1,
962990
changed_paths:1,
963-
order_by_pack:1;
991+
order_by_pack:1,
992+
write_generation_data:1;
964993

965994
struct topo_level_slab *topo_levels;
966995
const struct commit_graph_opts *opts;
@@ -1120,6 +1149,44 @@ static int write_graph_chunk_data(struct hashfile *f,
11201149
return 0;
11211150
}
11221151

1152+
static int write_graph_chunk_generation_data(struct hashfile *f,
1153+
struct write_commit_graph_context *ctx)
1154+
{
1155+
int i, num_generation_data_overflows = 0;
1156+
for (i = 0; i < ctx->commits.nr; i++) {
1157+
struct commit *c = ctx->commits.list[i];
1158+
timestamp_t offset = commit_graph_data_at(c)->generation - c->date;
1159+
display_progress(ctx->progress, ++ctx->progress_cnt);
1160+
1161+
if (offset > GENERATION_NUMBER_V2_OFFSET_MAX) {
1162+
offset = CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW | num_generation_data_overflows;
1163+
num_generation_data_overflows++;
1164+
}
1165+
1166+
hashwrite_be32(f, offset);
1167+
}
1168+
1169+
return 0;
1170+
}
1171+
1172+
static int write_graph_chunk_generation_data_overflow(struct hashfile *f,
1173+
struct write_commit_graph_context *ctx)
1174+
{
1175+
int i;
1176+
for (i = 0; i < ctx->commits.nr; i++) {
1177+
struct commit *c = ctx->commits.list[i];
1178+
timestamp_t offset = commit_graph_data_at(c)->generation - c->date;
1179+
display_progress(ctx->progress, ++ctx->progress_cnt);
1180+
1181+
if (offset > GENERATION_NUMBER_V2_OFFSET_MAX) {
1182+
hashwrite_be32(f, offset >> 32);
1183+
hashwrite_be32(f, (uint32_t) offset);
1184+
}
1185+
}
1186+
1187+
return 0;
1188+
}
1189+
11231190
static int write_graph_chunk_extra_edges(struct hashfile *f,
11241191
struct write_commit_graph_context *ctx)
11251192
{
@@ -1399,7 +1466,11 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx)
13991466

14001467
if (current->date && current->date > max_corrected_commit_date)
14011468
max_corrected_commit_date = current->date - 1;
1469+
14021470
commit_graph_data_at(current)->generation = max_corrected_commit_date + 1;
1471+
1472+
if (commit_graph_data_at(current)->generation - current->date > GENERATION_NUMBER_V2_OFFSET_MAX)
1473+
ctx->num_generation_data_overflows++;
14031474
}
14041475
}
14051476
}
@@ -1765,6 +1836,21 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
17651836
chunks[2].id = GRAPH_CHUNKID_DATA;
17661837
chunks[2].size = (hashsz + 16) * ctx->commits.nr;
17671838
chunks[2].write_fn = write_graph_chunk_data;
1839+
1840+
if (git_env_bool(GIT_TEST_COMMIT_GRAPH_NO_GDAT, 0))
1841+
ctx->write_generation_data = 0;
1842+
if (ctx->write_generation_data) {
1843+
chunks[num_chunks].id = GRAPH_CHUNKID_GENERATION_DATA;
1844+
chunks[num_chunks].size = sizeof(uint32_t) * ctx->commits.nr;
1845+
chunks[num_chunks].write_fn = write_graph_chunk_generation_data;
1846+
num_chunks++;
1847+
}
1848+
if (ctx->num_generation_data_overflows) {
1849+
chunks[num_chunks].id = GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW;
1850+
chunks[num_chunks].size = sizeof(timestamp_t) * ctx->num_generation_data_overflows;
1851+
chunks[num_chunks].write_fn = write_graph_chunk_generation_data_overflow;
1852+
num_chunks++;
1853+
}
17681854
if (ctx->num_extra_edges) {
17691855
chunks[num_chunks].id = GRAPH_CHUNKID_EXTRAEDGES;
17701856
chunks[num_chunks].size = 4 * ctx->num_extra_edges;
@@ -2170,6 +2256,8 @@ int write_commit_graph(struct object_directory *odb,
21702256
ctx->split = flags & COMMIT_GRAPH_WRITE_SPLIT ? 1 : 0;
21712257
ctx->opts = opts;
21722258
ctx->total_bloom_filter_data_size = 0;
2259+
ctx->write_generation_data = 1;
2260+
ctx->num_generation_data_overflows = 0;
21732261

21742262
bloom_settings.bits_per_entry = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY",
21752263
bloom_settings.bits_per_entry);

commit-graph.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include "oidset.h"
77

88
#define GIT_TEST_COMMIT_GRAPH "GIT_TEST_COMMIT_GRAPH"
9+
#define GIT_TEST_COMMIT_GRAPH_NO_GDAT "GIT_TEST_COMMIT_GRAPH_NO_GDAT"
910
#define GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE "GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE"
1011
#define GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS "GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS"
1112

@@ -68,6 +69,8 @@ struct commit_graph {
6869
const uint32_t *chunk_oid_fanout;
6970
const unsigned char *chunk_oid_lookup;
7071
const unsigned char *chunk_commit_data;
72+
const unsigned char *chunk_generation_data;
73+
const unsigned char *chunk_generation_data_overflow;
7174
const unsigned char *chunk_extra_edges;
7275
const unsigned char *chunk_base_graphs;
7376
const unsigned char *chunk_bloom_indexes;

commit.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#define GENERATION_NUMBER_INFINITY ((1ULL << 63) - 1)
1515
#define GENERATION_NUMBER_V1_MAX 0x3FFFFFFF
1616
#define GENERATION_NUMBER_ZERO 0
17+
#define GENERATION_NUMBER_V2_OFFSET_MAX ((1ULL << 31) - 1)
1718

1819
struct commit_list {
1920
struct commit *item;

t/README

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,9 @@ GIT_TEST_COMMIT_GRAPH=<boolean>, when true, forces the commit-graph to
379379
be written after every 'git commit' command, and overrides the
380380
'core.commitGraph' setting to true.
381381

382+
GIT_TEST_COMMIT_GRAPH_NO_GDAT=<boolean>, when true, forces the
383+
commit-graph to be written without generation data chunk.
384+
382385
GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=<boolean>, when true, forces
383386
commit-graph write to compute and write changed path Bloom filters for
384387
every 'git commit-graph write', as if the `--changed-paths` option was

t/helper/test-read-graph.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,10 @@ int cmd__read_graph(int argc, const char **argv)
3333
printf(" oid_lookup");
3434
if (graph->chunk_commit_data)
3535
printf(" commit_metadata");
36+
if (graph->chunk_generation_data)
37+
printf(" generation_data");
38+
if (graph->chunk_generation_data_overflow)
39+
printf(" generation_data_overflow");
3640
if (graph->chunk_extra_edges)
3741
printf(" extra_edges");
3842
if (graph->chunk_bloom_indexes)

t/t4216-log-bloom.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,11 @@ test_expect_success 'setup test - repo, commits, commit graph, log outputs' '
4040
'
4141

4242
graph_read_expect () {
43-
NUM_CHUNKS=5
43+
NUM_CHUNKS=6
4444
cat >expect <<- EOF
4545
header: 43475048 1 $(test_oid oid_version) $NUM_CHUNKS 0
4646
num_commits: $1
47-
chunks: oid_fanout oid_lookup commit_metadata bloom_indexes bloom_data
47+
chunks: oid_fanout oid_lookup commit_metadata generation_data bloom_indexes bloom_data
4848
EOF
4949
test-tool read-graph >actual &&
5050
test_cmp expect actual

t/t5318-commit-graph.sh

Lines changed: 57 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ graph_git_behavior 'no graph' full commits/3 commits/1
7676
graph_read_expect() {
7777
OPTIONAL=""
7878
NUM_CHUNKS=3
79-
if test ! -z $2
79+
if test ! -z "$2"
8080
then
8181
OPTIONAL=" $2"
8282
NUM_CHUNKS=$((3 + $(echo "$2" | wc -w)))
@@ -103,14 +103,14 @@ test_expect_success 'exit with correct error on bad input to --stdin-commits' '
103103
# valid commit and tree OID
104104
git rev-parse HEAD HEAD^{tree} >in &&
105105
git commit-graph write --stdin-commits <in &&
106-
graph_read_expect 3
106+
graph_read_expect 3 generation_data
107107
'
108108

109109
test_expect_success 'write graph' '
110110
cd "$TRASH_DIRECTORY/full" &&
111111
git commit-graph write &&
112112
test_path_is_file $objdir/info/commit-graph &&
113-
graph_read_expect "3"
113+
graph_read_expect "3" generation_data
114114
'
115115

116116
test_expect_success POSIXPERM 'write graph has correct permissions' '
@@ -219,7 +219,7 @@ test_expect_success 'write graph with merges' '
219219
cd "$TRASH_DIRECTORY/full" &&
220220
git commit-graph write &&
221221
test_path_is_file $objdir/info/commit-graph &&
222-
graph_read_expect "10" "extra_edges"
222+
graph_read_expect "10" "generation_data extra_edges"
223223
'
224224

225225
graph_git_behavior 'merge 1 vs 2' full merge/1 merge/2
@@ -254,7 +254,7 @@ test_expect_success 'write graph with new commit' '
254254
cd "$TRASH_DIRECTORY/full" &&
255255
git commit-graph write &&
256256
test_path_is_file $objdir/info/commit-graph &&
257-
graph_read_expect "11" "extra_edges"
257+
graph_read_expect "11" "generation_data extra_edges"
258258
'
259259

260260
graph_git_behavior 'full graph, commit 8 vs merge 1' full commits/8 merge/1
@@ -264,7 +264,7 @@ test_expect_success 'write graph with nothing new' '
264264
cd "$TRASH_DIRECTORY/full" &&
265265
git commit-graph write &&
266266
test_path_is_file $objdir/info/commit-graph &&
267-
graph_read_expect "11" "extra_edges"
267+
graph_read_expect "11" "generation_data extra_edges"
268268
'
269269

270270
graph_git_behavior 'cleared graph, commit 8 vs merge 1' full commits/8 merge/1
@@ -274,7 +274,7 @@ test_expect_success 'build graph from latest pack with closure' '
274274
cd "$TRASH_DIRECTORY/full" &&
275275
cat new-idx | git commit-graph write --stdin-packs &&
276276
test_path_is_file $objdir/info/commit-graph &&
277-
graph_read_expect "9" "extra_edges"
277+
graph_read_expect "9" "generation_data extra_edges"
278278
'
279279

280280
graph_git_behavior 'graph from pack, commit 8 vs merge 1' full commits/8 merge/1
@@ -287,7 +287,7 @@ test_expect_success 'build graph from commits with closure' '
287287
git rev-parse merge/1 >>commits-in &&
288288
cat commits-in | git commit-graph write --stdin-commits &&
289289
test_path_is_file $objdir/info/commit-graph &&
290-
graph_read_expect "6"
290+
graph_read_expect "6" "generation_data"
291291
'
292292

293293
graph_git_behavior 'graph from commits, commit 8 vs merge 1' full commits/8 merge/1
@@ -297,7 +297,7 @@ test_expect_success 'build graph from commits with append' '
297297
cd "$TRASH_DIRECTORY/full" &&
298298
git rev-parse merge/3 | git commit-graph write --stdin-commits --append &&
299299
test_path_is_file $objdir/info/commit-graph &&
300-
graph_read_expect "10" "extra_edges"
300+
graph_read_expect "10" "generation_data extra_edges"
301301
'
302302

303303
graph_git_behavior 'append graph, commit 8 vs merge 1' full commits/8 merge/1
@@ -307,7 +307,7 @@ test_expect_success 'build graph using --reachable' '
307307
cd "$TRASH_DIRECTORY/full" &&
308308
git commit-graph write --reachable &&
309309
test_path_is_file $objdir/info/commit-graph &&
310-
graph_read_expect "11" "extra_edges"
310+
graph_read_expect "11" "generation_data extra_edges"
311311
'
312312

313313
graph_git_behavior 'append graph, commit 8 vs merge 1' full commits/8 merge/1
@@ -328,7 +328,7 @@ test_expect_success 'write graph in bare repo' '
328328
cd "$TRASH_DIRECTORY/bare" &&
329329
git commit-graph write &&
330330
test_path_is_file $baredir/info/commit-graph &&
331-
graph_read_expect "11" "extra_edges"
331+
graph_read_expect "11" "generation_data extra_edges"
332332
'
333333

334334
graph_git_behavior 'bare repo with graph, commit 8 vs merge 1' bare commits/8 merge/1
@@ -454,8 +454,9 @@ test_expect_success 'warn on improper hash version' '
454454

455455
test_expect_success 'git commit-graph verify' '
456456
cd "$TRASH_DIRECTORY/full" &&
457-
git rev-parse commits/8 | git commit-graph write --stdin-commits &&
458-
git commit-graph verify >output
457+
git rev-parse commits/8 | GIT_TEST_COMMIT_GRAPH_NO_GDAT=1 git commit-graph write --stdin-commits &&
458+
git commit-graph verify >output &&
459+
graph_read_expect 9 extra_edges
459460
'
460461

461462
NUM_COMMITS=9
@@ -741,4 +742,47 @@ test_expect_success 'corrupt commit-graph write (missing tree)' '
741742
)
742743
'
743744

745+
test_commit_with_date() {
746+
file="$1.t" &&
747+
echo "$1" >"$file" &&
748+
git add "$file" &&
749+
GIT_COMMITTER_DATE="$2" GIT_AUTHOR_DATE="$2" git commit -m "$1"
750+
git tag "$1"
751+
}
752+
753+
test_expect_success 'overflow corrected commit date offset' '
754+
objdir=".git/objects" &&
755+
UNIX_EPOCH_ZERO="1970-01-01 00:00 +0000" &&
756+
FUTURE_DATE="@2147483646 +0000" &&
757+
test_oid_cache <<-EOF &&
758+
oid_version sha1:1
759+
oid_version sha256:2
760+
EOF
761+
cd "$TRASH_DIRECTORY" &&
762+
mkdir repo &&
763+
cd repo &&
764+
git init &&
765+
test_commit_with_date 1 "$UNIX_EPOCH_ZERO" &&
766+
test_commit 2 &&
767+
test_commit_with_date 3 "$UNIX_EPOCH_ZERO" &&
768+
git commit-graph write --reachable &&
769+
graph_read_expect 3 generation_data &&
770+
test_commit_with_date 4 "$FUTURE_DATE" &&
771+
test_commit 5 &&
772+
test_commit_with_date 6 "$UNIX_EPOCH_ZERO" &&
773+
git branch left &&
774+
git reset --hard 3 &&
775+
test_commit 7 &&
776+
test_commit_with_date 8 "$FUTURE_DATE" &&
777+
test_commit 9 &&
778+
git branch right &&
779+
git reset --hard 3 &&
780+
git merge left right &&
781+
git commit-graph write --reachable &&
782+
graph_read_expect 10 "generation_data generation_data_overflow" &&
783+
git commit-graph verify
784+
'
785+
786+
graph_git_behavior 'overflow corrected commit date offset' repo left right
787+
744788
test_done

0 commit comments

Comments
 (0)