Skip to content

Commit a0dc1a7

Browse files
committed
path-walk: add prune_all_uninteresting option
This option causes the path-walk API to act like the sparse tree-walk algorithm implemented by mark_trees_uninteresting_sparse() in list-objects.c. Starting from the commits marked as UNINTERESTING, their root trees and all objects reachable from those trees are UNINTERSTING, at least as we walk path-by-path. When we reach a path where all objects associated with that path are marked UNINTERESTING, then do no continue walking the children of that path. We need to be careful to pass the UNINTERESTING flag in a deep way on the UNINTERESTING objects before we start the path-walk, or else the depth-first search for the path-walk API may accidentally report some objects as interesting. Signed-off-by: Derrick Stolee <[email protected]>
1 parent d1af7aa commit a0dc1a7

File tree

6 files changed

+152
-37
lines changed

6 files changed

+152
-37
lines changed

Documentation/technical/api-path-walk.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,14 @@ commits are emitted.
5555
While it is possible to walk only commits in this way, consumers would be
5656
better off using the revision walk API instead.
5757

58+
`prune_all_uninteresting`::
59+
By default, all reachable paths are emitted by the path-walk API.
60+
This option allows consumers to declare that they are not
61+
interested in paths where all included objects are marked with the
62+
`UNINTERESTING` flag. This requires using the `boundary` option in
63+
the revision walk so that the walk emits commits marked with the
64+
`UNINTERESTING` flag.
65+
5866
Examples
5967
--------
6068

builtin/pack-objects.c

Lines changed: 29 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3147,6 +3147,33 @@ static int add_ref_tag(const char *tag UNUSED, const struct object_id *oid,
31473147
return 0;
31483148
}
31493149

3150+
static int should_attempt_deltas(struct object_entry *entry)
3151+
{
3152+
if (DELTA(entry))
3153+
return 0;
3154+
3155+
if (!entry->type_valid ||
3156+
oe_size_less_than(&to_pack, entry, 50))
3157+
return 0;
3158+
3159+
if (entry->no_try_delta)
3160+
return 0;
3161+
3162+
if (!entry->preferred_base) {
3163+
if (oe_type(entry) < 0)
3164+
die(_("unable to get type of object %s"),
3165+
oid_to_hex(&entry->idx.oid));
3166+
} else if (oe_type(entry) < 0) {
3167+
/*
3168+
* This object is not found, but we
3169+
* don't have to include it anyway.
3170+
*/
3171+
return 0;
3172+
}
3173+
3174+
return 1;
3175+
}
3176+
31503177
static void prepare_pack(int window, int depth)
31513178
{
31523179
struct object_entry **delta_list;
@@ -3177,33 +3204,11 @@ static void prepare_pack(int window, int depth)
31773204
for (i = 0; i < to_pack.nr_objects; i++) {
31783205
struct object_entry *entry = to_pack.objects + i;
31793206

3180-
if (DELTA(entry))
3181-
/* This happens if we decided to reuse existing
3182-
* delta from a pack. "reuse_delta &&" is implied.
3183-
*/
3184-
continue;
3185-
3186-
if (!entry->type_valid ||
3187-
oe_size_less_than(&to_pack, entry, 50))
3207+
if (!should_attempt_deltas(entry))
31883208
continue;
31893209

3190-
if (entry->no_try_delta)
3191-
continue;
3192-
3193-
if (!entry->preferred_base) {
3210+
if (!entry->preferred_base)
31943211
nr_deltas++;
3195-
if (oe_type(entry) < 0)
3196-
die(_("unable to get type of object %s"),
3197-
oid_to_hex(&entry->idx.oid));
3198-
} else {
3199-
if (oe_type(entry) < 0) {
3200-
/*
3201-
* This object is not found, but we
3202-
* don't have to include it anyway.
3203-
*/
3204-
continue;
3205-
}
3206-
}
32073212

32083213
delta_list[n++] = entry;
32093214
}

path-walk.c

Lines changed: 64 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ struct type_and_oid_list
2222
{
2323
enum object_type type;
2424
struct oid_array oids;
25+
int maybe_interesting;
2526
};
2627

2728
#define TYPE_AND_OID_LIST_INIT { \
@@ -123,6 +124,9 @@ static int add_children(struct path_walk_context *ctx,
123124
list->type = type;
124125
strmap_put(&ctx->paths_to_lists, path.buf, list);
125126
string_list_append(&ctx->path_stack, path.buf);
127+
128+
if (!(o->flags & UNINTERESTING))
129+
list->maybe_interesting = 1;
126130
}
127131
oid_array_append(&list->oids, &entry.oid);
128132
}
@@ -145,6 +149,40 @@ static int walk_path(struct path_walk_context *ctx,
145149

146150
list = strmap_get(&ctx->paths_to_lists, path);
147151

152+
if (ctx->info->prune_all_uninteresting) {
153+
/*
154+
* This is true if all objects were UNINTERESTING
155+
* when added to the list.
156+
*/
157+
if (!list->maybe_interesting)
158+
return 0;
159+
160+
/*
161+
* But it's still possible that the objects were set
162+
* as UNINTERESTING after being added. Do a quick check.
163+
*/
164+
list->maybe_interesting = 0;
165+
for (size_t i = 0;
166+
!list->maybe_interesting && i < list->oids.nr;
167+
i++) {
168+
if (list->type == OBJ_TREE) {
169+
struct tree *t = lookup_tree(ctx->repo,
170+
&list->oids.oid[i]);
171+
if (t && !(t->object.flags & UNINTERESTING))
172+
list->maybe_interesting = 1;
173+
} else {
174+
struct blob *b = lookup_blob(ctx->repo,
175+
&list->oids.oid[i]);
176+
if (b && !(b->object.flags & UNINTERESTING))
177+
list->maybe_interesting = 1;
178+
}
179+
}
180+
181+
/* We have confirmed that all objects are UNINTERESTING. */
182+
if (!list->maybe_interesting)
183+
return 0;
184+
}
185+
148186
/* Evaluate function pointer on this data, if requested. */
149187
if ((list->type == OBJ_TREE && ctx->info->trees) ||
150188
(list->type == OBJ_BLOB && ctx->info->blobs))
@@ -187,7 +225,7 @@ static void clear_strmap(struct strmap *map)
187225
int walk_objects_by_path(struct path_walk_info *info)
188226
{
189227
const char *root_path = "";
190-
int ret = 0;
228+
int ret = 0, has_uninteresting = 0;
191229
size_t commits_nr = 0, paths_nr = 0;
192230
struct commit *c;
193231
struct type_and_oid_list *root_tree_list;
@@ -199,6 +237,7 @@ int walk_objects_by_path(struct path_walk_info *info)
199237
.path_stack = STRING_LIST_INIT_DUP,
200238
.paths_to_lists = STRMAP_INIT
201239
};
240+
struct oidset root_tree_set = OIDSET_INIT;
202241

203242
trace2_region_enter("path-walk", "commit-walk", info->revs->repo);
204243

@@ -211,6 +250,7 @@ int walk_objects_by_path(struct path_walk_info *info)
211250
/* Insert a single list for the root tree into the paths. */
212251
CALLOC_ARRAY(root_tree_list, 1);
213252
root_tree_list->type = OBJ_TREE;
253+
root_tree_list->maybe_interesting = 1;
214254
strmap_put(&ctx.paths_to_lists, root_path, root_tree_list);
215255

216256
/*
@@ -301,11 +341,17 @@ int walk_objects_by_path(struct path_walk_info *info)
301341
oid = get_commit_tree_oid(c);
302342
t = lookup_tree(info->revs->repo, oid);
303343

304-
if (t)
344+
if (t) {
345+
oidset_insert(&root_tree_set, oid);
305346
oid_array_append(&root_tree_list->oids, oid);
306-
else
347+
} else {
307348
warning("could not find tree %s", oid_to_hex(oid));
349+
}
308350

351+
if (t && (c->object.flags & UNINTERESTING)) {
352+
t->object.flags |= UNINTERESTING;
353+
has_uninteresting = 1;
354+
}
309355
}
310356

311357
trace2_data_intmax("path-walk", ctx.repo, "commits", commits_nr);
@@ -318,6 +364,21 @@ int walk_objects_by_path(struct path_walk_info *info)
318364
oid_array_clear(&commit_list->oids);
319365
free(commit_list);
320366

367+
/*
368+
* Before performing a DFS of our paths and emitting them as interesting,
369+
* do a full walk of the trees to distribute the UNINTERESTING bit. Use
370+
* the sparse algorithm if prune_all_uninteresting was set.
371+
*/
372+
if (has_uninteresting) {
373+
trace2_region_enter("path-walk", "uninteresting-walk", info->revs->repo);
374+
if (info->prune_all_uninteresting)
375+
mark_trees_uninteresting_sparse(ctx.repo, &root_tree_set);
376+
else
377+
mark_trees_uninteresting_dense(ctx.repo, &root_tree_set);
378+
trace2_region_leave("path-walk", "uninteresting-walk", info->revs->repo);
379+
}
380+
oidset_clear(&root_tree_set);
381+
321382
string_list_append(&ctx.path_stack, root_path);
322383

323384
trace2_region_enter("path-walk", "path-walk", info->revs->repo);

path-walk.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,14 @@ struct path_walk_info {
3838
int trees;
3939
int blobs;
4040
int tags;
41+
42+
/**
43+
* When 'prune_all_uninteresting' is set and a path has all objects
44+
* marked as UNINTERESTING, then the path-walk will not visit those
45+
* objects. It will not call path_fn on those objects and will not
46+
* walk the children of such trees.
47+
*/
48+
int prune_all_uninteresting;
4149
};
4250

4351
#define PATH_WALK_INFO_INIT { \

t/helper/test-path-walk.c

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,15 @@ static int emit_block(const char *path, struct oid_array *oids,
4949
BUG("we do not understand this type");
5050
}
5151

52-
for (size_t i = 0; i < oids->nr; i++)
53-
printf("%s:%s:%s\n", typestr, path, oid_to_hex(&oids->oid[i]));
52+
for (size_t i = 0; i < oids->nr; i++) {
53+
struct object *o = lookup_unknown_object(the_repository,
54+
&oids->oid[i]);
55+
printf("%s:%s:%s", typestr, path, oid_to_hex(&oids->oid[i]));
56+
57+
if (o->flags & UNINTERESTING)
58+
printf(":UNINTERESTING");
59+
printf("\n");
60+
}
5461

5562
return 0;
5663
}
@@ -75,6 +82,8 @@ int cmd__path_walk(int argc, const char **argv)
7582
info.commits = 0;
7683
if (!strcmp(argv[argi], "--no-tags"))
7784
info.tags = 0;
85+
if (!strcmp(argv[argi], "--prune"))
86+
info.prune_all_uninteresting = 1;
7887
if (!strcmp(argv[argi], "--"))
7988
break;
8089
}

t/t6601-path-walk.sh

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -229,19 +229,19 @@ test_expect_success 'topic, not base, boundary' '
229229
230230
cat >expect <<-EOF &&
231231
COMMIT::$(git rev-parse topic)
232-
COMMIT::$(git rev-parse base~1)
232+
COMMIT::$(git rev-parse base~1):UNINTERESTING
233233
commits:2
234234
TREE::$(git rev-parse topic^{tree})
235-
TREE::$(git rev-parse base~1^{tree})
236-
TREE:left/:$(git rev-parse base~1:left)
235+
TREE::$(git rev-parse base~1^{tree}):UNINTERESTING
236+
TREE:left/:$(git rev-parse base~1:left):UNINTERESTING
237237
TREE:right/:$(git rev-parse topic:right)
238-
TREE:right/:$(git rev-parse base~1:right)
238+
TREE:right/:$(git rev-parse base~1:right):UNINTERESTING
239239
trees:5
240-
BLOB:a:$(git rev-parse base~1:a)
241-
BLOB:left/b:$(git rev-parse base~1:left/b)
242-
BLOB:right/c:$(git rev-parse base~1:right/c)
240+
BLOB:a:$(git rev-parse base~1:a):UNINTERESTING
241+
BLOB:left/b:$(git rev-parse base~1:left/b):UNINTERESTING
242+
BLOB:right/c:$(git rev-parse base~1:right/c):UNINTERESTING
243243
BLOB:right/c:$(git rev-parse topic:right/c)
244-
BLOB:right/d:$(git rev-parse base~1:right/d)
244+
BLOB:right/d:$(git rev-parse base~1:right/d):UNINTERESTING
245245
blobs:5
246246
tags:0
247247
EOF
@@ -252,4 +252,28 @@ test_expect_success 'topic, not base, boundary' '
252252
test_cmp expect.sorted out.sorted
253253
'
254254

255+
test_expect_success 'topic, not base, boundary with pruning' '
256+
test-tool path-walk --prune -- --boundary topic --not base >out &&
257+
258+
cat >expect <<-EOF &&
259+
COMMIT::$(git rev-parse topic)
260+
COMMIT::$(git rev-parse base~1):UNINTERESTING
261+
commits:2
262+
TREE::$(git rev-parse topic^{tree})
263+
TREE::$(git rev-parse base~1^{tree}):UNINTERESTING
264+
TREE:right/:$(git rev-parse topic:right)
265+
TREE:right/:$(git rev-parse base~1:right):UNINTERESTING
266+
trees:4
267+
BLOB:right/c:$(git rev-parse base~1:right/c):UNINTERESTING
268+
BLOB:right/c:$(git rev-parse topic:right/c)
269+
blobs:2
270+
tags:0
271+
EOF
272+
273+
sort expect >expect.sorted &&
274+
sort out >out.sorted &&
275+
276+
test_cmp expect.sorted out.sorted
277+
'
278+
255279
test_done

0 commit comments

Comments
 (0)