Skip to content

Commit 0087a87

Browse files
derrickstoleegitster
authored andcommitted
commit-graph: persist existence of changed-paths
The changed-path Bloom filters were released in v2.27.0, but have a significant drawback. A user can opt-in to writing the changed-path filters using the "--changed-paths" option to "git commit-graph write" but the next write will drop the filters unless that option is specified. This becomes even more important when considering the interaction with gc.writeCommitGraph (on by default) or fetch.writeCommitGraph (part of features.experimental). These config options trigger commit-graph writes that the user did not signal, and hence there is no --changed-paths option available. Allow a user that opts-in to the changed-path filters to persist the property of "my commit-graph has changed-path filters" automatically. A user can drop filters using the --no-changed-paths option. In the process, we need to be extremely careful to match the Bloom filter settings as specified by the commit-graph. This will allow future versions of Git to customize these settings, and the version with this change will persist those settings as commit-graphs are rewritten on top. Use the trace2 API to signal the settings used during the write, and check that output in a test after manually adjusting the correct bytes in the commit-graph file. Signed-off-by: Derrick Stolee <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 9491974 commit 0087a87

File tree

5 files changed

+67
-6
lines changed

5 files changed

+67
-6
lines changed

Documentation/git-commit-graph.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,10 @@ existing commit-graph file.
6060
With the `--changed-paths` option, compute and write information about the
6161
paths changed between a commit and it's first parent. This operation can
6262
take a while on large repositories. It provides significant performance gains
63-
for getting history of a directory or a file with `git log -- <path>`.
63+
for getting history of a directory or a file with `git log -- <path>`. If
64+
this option is given, future commit-graph writes will automatically assume
65+
that this option was intended. Use `--no-changed-paths` to stop storing this
66+
data.
6467
+
6568
With the `--split` option, write the commit-graph as a chain of multiple
6669
commit-graph files stored in `<dir>/info/commit-graphs`. The new commits

builtin/commit-graph.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ static int graph_write(int argc, const char **argv)
151151
};
152152

153153
opts.progress = isatty(2);
154+
opts.enable_changed_paths = -1;
154155
split_opts.size_multiple = 2;
155156
split_opts.max_commits = 0;
156157
split_opts.expire_time = 0;
@@ -171,7 +172,9 @@ static int graph_write(int argc, const char **argv)
171172
flags |= COMMIT_GRAPH_WRITE_SPLIT;
172173
if (opts.progress)
173174
flags |= COMMIT_GRAPH_WRITE_PROGRESS;
174-
if (opts.enable_changed_paths ||
175+
if (!opts.enable_changed_paths)
176+
flags |= COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS;
177+
if (opts.enable_changed_paths == 1 ||
175178
git_env_bool(GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS, 0))
176179
flags |= COMMIT_GRAPH_WRITE_BLOOM_FILTERS;
177180

commit-graph.c

Lines changed: 42 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
#include "progress.h"
1717
#include "bloom.h"
1818
#include "commit-slab.h"
19+
#include "json-writer.h"
20+
#include "trace2.h"
1921

2022
void git_test_write_commit_graph_or_die(void)
2123
{
@@ -1108,6 +1110,21 @@ static void write_graph_chunk_bloom_indexes(struct hashfile *f,
11081110
stop_progress(&progress);
11091111
}
11101112

1113+
static void trace2_bloom_filter_settings(struct write_commit_graph_context *ctx)
1114+
{
1115+
struct json_writer jw = JSON_WRITER_INIT;
1116+
1117+
jw_object_begin(&jw, 0);
1118+
jw_object_intmax(&jw, "hash_version", ctx->bloom_settings->hash_version);
1119+
jw_object_intmax(&jw, "num_hashes", ctx->bloom_settings->num_hashes);
1120+
jw_object_intmax(&jw, "bits_per_entry", ctx->bloom_settings->bits_per_entry);
1121+
jw_end(&jw);
1122+
1123+
trace2_data_json("bloom", ctx->r, "settings", &jw);
1124+
1125+
jw_release(&jw);
1126+
}
1127+
11111128
static void write_graph_chunk_bloom_data(struct hashfile *f,
11121129
struct write_commit_graph_context *ctx)
11131130
{
@@ -1116,6 +1133,8 @@ static void write_graph_chunk_bloom_data(struct hashfile *f,
11161133
struct progress *progress = NULL;
11171134
int i = 0;
11181135

1136+
trace2_bloom_filter_settings(ctx);
1137+
11191138
if (ctx->report_progress)
11201139
progress = start_delayed_progress(
11211140
_("Writing changed paths Bloom filters data"),
@@ -1547,9 +1566,15 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
15471566
int num_chunks = 3;
15481567
uint64_t chunk_offset;
15491568
struct object_id file_hash;
1550-
const struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS;
1569+
struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS;
15511570

1552-
ctx->bloom_settings = &bloom_settings;
1571+
if (!ctx->bloom_settings) {
1572+
bloom_settings.bits_per_entry = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY",
1573+
bloom_settings.bits_per_entry);
1574+
bloom_settings.num_hashes = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_NUM_HASHES",
1575+
bloom_settings.num_hashes);
1576+
ctx->bloom_settings = &bloom_settings;
1577+
}
15531578

15541579
if (ctx->split) {
15551580
struct strbuf tmp_file = STRBUF_INIT;
@@ -1974,9 +1999,23 @@ int write_commit_graph(struct object_directory *odb,
19741999
ctx->split = flags & COMMIT_GRAPH_WRITE_SPLIT ? 1 : 0;
19752000
ctx->check_oids = flags & COMMIT_GRAPH_WRITE_CHECK_OIDS ? 1 : 0;
19762001
ctx->split_opts = split_opts;
1977-
ctx->changed_paths = flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS ? 1 : 0;
19782002
ctx->total_bloom_filter_data_size = 0;
19792003

2004+
if (flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS)
2005+
ctx->changed_paths = 1;
2006+
if (!(flags & COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS)) {
2007+
struct commit_graph *g;
2008+
prepare_commit_graph_one(ctx->r, ctx->odb);
2009+
2010+
g = ctx->r->objects->commit_graph;
2011+
2012+
/* We have changed-paths already. Keep them in the next graph */
2013+
if (g && g->chunk_bloom_data) {
2014+
ctx->changed_paths = 1;
2015+
ctx->bloom_settings = g->bloom_filter_settings;
2016+
}
2017+
}
2018+
19802019
if (ctx->split) {
19812020
struct commit_graph *g;
19822021
prepare_commit_graph(ctx->r);

commit-graph.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ enum commit_graph_write_flags {
9696
/* Make sure that each OID in the input is a valid commit OID. */
9797
COMMIT_GRAPH_WRITE_CHECK_OIDS = (1 << 3),
9898
COMMIT_GRAPH_WRITE_BLOOM_FILTERS = (1 << 4),
99+
COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS = (1 << 5),
99100
};
100101

101102
struct split_commit_graph_opts {

t/t4216-log-bloom.sh

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ test_expect_success 'setup - add commit-graph to the chain without Bloom filters
126126
test_commit c14 A/anotherFile2 &&
127127
test_commit c15 A/B/anotherFile2 &&
128128
test_commit c16 A/B/C/anotherFile2 &&
129-
GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=0 git commit-graph write --reachable --split &&
129+
git commit-graph write --reachable --split --no-changed-paths &&
130130
test_line_count = 2 .git/objects/info/commit-graphs/commit-graph-chain
131131
'
132132

@@ -152,6 +152,21 @@ test_expect_success 'Use Bloom filters if they exist in the latest but not all c
152152
test_bloom_filters_used_when_some_filters_are_missing "-- A/B"
153153
'
154154

155+
test_expect_success 'persist filter settings' '
156+
test_when_finished rm -rf .git/objects/info/commit-graph* &&
157+
rm -rf .git/objects/info/commit-graph* &&
158+
GIT_TRACE2_EVENT="$(pwd)/trace2.txt" \
159+
GIT_TRACE2_EVENT_NESTING=5 \
160+
GIT_TEST_BLOOM_SETTINGS_NUM_HASHES=9 \
161+
GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY=15 \
162+
git commit-graph write --reachable --changed-paths &&
163+
grep "{\"hash_version\":1,\"num_hashes\":9,\"bits_per_entry\":15}" trace2.txt &&
164+
GIT_TRACE2_EVENT="$(pwd)/trace2-auto.txt" \
165+
GIT_TRACE2_EVENT_NESTING=5 \
166+
git commit-graph write --reachable --changed-paths &&
167+
grep "{\"hash_version\":1,\"num_hashes\":9,\"bits_per_entry\":15}" trace2-auto.txt
168+
'
169+
155170
test_expect_success 'correctly report changes over limit' '
156171
git init 513changes &&
157172
(

0 commit comments

Comments
 (0)