Skip to content

Commit 3feb1ca

Browse files
derrickstoleeGit for Windows Build Agent
authored andcommitted
pack-objects: add --path-walk option
In order to more easily compute delta bases among objects that appear at the exact same path, add a --path-walk option to 'git pack-objects'. This option will use the path-walk API instead of the object walk given by the revision machinery. Since objects will be provided in batches representing a common path, those objects can be tested for delta bases immediately instead of waiting for a sort of the full object list by name-hash. This has multiple benefits, including avoiding collisions by name-hash. The objects marked as UNINTERESTING are included in these batches, so we are guaranteeing some locality to find good delta bases. After the individual passes are done on a per-path basis, the default name-hash is used to find other opportunistic delta bases that did not match exactly by the full path name. RFC TODO: It is important to note that this option is inherently incompatible with using a bitmap index. This walk probably also does not work with other advanced features, such as delta islands. Getting ahead of myself, this option compares well with --full-name-hash when the packfile is large enough, but also performs at least as well as the default in all cases that I've seen. RFC TODO: this should probably be recording the batch locations to another list so they could be processed in a second phase using threads. RFC TODO: list some examples of how this outperforms previous pack-objects strategies. (This is coming in later commits that include performance test changes.) Signed-off-by: Derrick Stolee <[email protected]>
1 parent 3b6e255 commit 3feb1ca

File tree

4 files changed

+167
-10
lines changed

4 files changed

+167
-10
lines changed

Documentation/git-pack-objects.adoc

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ SYNOPSIS
1616
[--cruft] [--cruft-expiration=<time>]
1717
[--stdout [--filter=<filter-spec>] | <base-name>]
1818
[--shallow] [--keep-true-parents] [--[no-]sparse]
19-
[--name-hash-version=<n>] < <object-list>
19+
[--name-hash-version=<n>] [--path-walk] < <object-list>
2020

2121

2222
DESCRIPTION
@@ -375,6 +375,16 @@ many different directories. At the moment, this version is not allowed
375375
when writing reachability bitmap files with `--write-bitmap-index` and it
376376
will be automatically changed to version `1`.
377377
378+
--path-walk::
379+
By default, `git pack-objects` walks objects in an order that
380+
presents trees and blobs in an order unrelated to the path they
381+
appear relative to a commit's root tree. The `--path-walk` option
382+
enables a different walking algorithm that organizes trees and
383+
blobs by path. This has the potential to improve delta compression
384+
especially in the presence of filenames that cause collisions in
385+
Git's default name-hash algorithm. Due to changing how the objects
386+
are walked, this option is not compatible with `--delta-islands`,
387+
`--shallow`, or `--filter`.
378388
379389
DELTA ISLANDS
380390
-------------

Documentation/technical/api-path-walk.adoc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,3 +70,4 @@ Examples
7070
See example usages in:
7171
`t/helper/test-path-walk.c`,
7272
`builtin/backfill.c`
73+
`builtin/pack-objects.c`

builtin/pack-objects.c

Lines changed: 138 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@
4141
#include "promisor-remote.h"
4242
#include "pack-mtimes.h"
4343
#include "parse-options.h"
44+
#include "blob.h"
45+
#include "tree.h"
46+
#include "path-walk.h"
4447

4548
/*
4649
* Objects we are going to pack are collected in the `to_pack` structure.
@@ -217,6 +220,7 @@ static int delta_search_threads;
217220
static int pack_to_stdout;
218221
static int sparse;
219222
static int thin;
223+
static int path_walk;
220224
static int num_preferred_base;
221225
static struct progress *progress_state;
222226

@@ -4188,6 +4192,105 @@ static void mark_bitmap_preferred_tips(void)
41884192
}
41894193
}
41904194

4195+
static inline int is_oid_interesting(struct repository *repo,
4196+
struct object_id *oid)
4197+
{
4198+
struct object *o = lookup_object(repo, oid);
4199+
return o && !(o->flags & UNINTERESTING);
4200+
}
4201+
4202+
static int add_objects_by_path(const char *path,
4203+
struct oid_array *oids,
4204+
enum object_type type,
4205+
void *data)
4206+
{
4207+
struct object_entry **delta_list;
4208+
size_t oe_start = to_pack.nr_objects;
4209+
size_t oe_end;
4210+
unsigned int sub_list_size;
4211+
unsigned int *processed = data;
4212+
4213+
/*
4214+
* First, add all objects to the packing data, including the ones
4215+
* marked UNINTERESTING (translated to 'exclude') as they can be
4216+
* used as delta bases.
4217+
*/
4218+
for (size_t i = 0; i < oids->nr; i++) {
4219+
int exclude;
4220+
struct object_info oi = OBJECT_INFO_INIT;
4221+
struct object_id *oid = &oids->oid[i];
4222+
4223+
/* Skip objects that do not exist locally. */
4224+
if (exclude_promisor_objects &&
4225+
oid_object_info_extended(the_repository, oid, &oi,
4226+
OBJECT_INFO_FOR_PREFETCH) < 0)
4227+
continue;
4228+
4229+
exclude = !is_oid_interesting(the_repository, oid);
4230+
4231+
if (exclude && !thin)
4232+
continue;
4233+
4234+
add_object_entry(oid, type, path, exclude);
4235+
}
4236+
4237+
oe_end = to_pack.nr_objects;
4238+
4239+
/* We can skip delta calculations if it is a no-op. */
4240+
if (oe_end == oe_start || !window)
4241+
return 0;
4242+
4243+
sub_list_size = 0;
4244+
ALLOC_ARRAY(delta_list, oe_end - oe_start);
4245+
4246+
for (size_t i = 0; i < oe_end - oe_start; i++) {
4247+
struct object_entry *entry = to_pack.objects + oe_start + i;
4248+
4249+
if (!should_attempt_deltas(entry))
4250+
continue;
4251+
4252+
delta_list[sub_list_size++] = entry;
4253+
}
4254+
4255+
/*
4256+
* Find delta bases among this list of objects that all match the same
4257+
* path. This causes the delta compression to be interleaved in the
4258+
* object walk, which can lead to confusing progress indicators. This is
4259+
* also incompatible with threaded delta calculations. In the future,
4260+
* consider creating a list of regions in the full to_pack.objects array
4261+
* that could be picked up by the threaded delta computation.
4262+
*/
4263+
if (sub_list_size && window) {
4264+
QSORT(delta_list, sub_list_size, type_size_sort);
4265+
find_deltas(delta_list, &sub_list_size, window, depth, processed);
4266+
}
4267+
4268+
free(delta_list);
4269+
return 0;
4270+
}
4271+
4272+
static void get_object_list_path_walk(struct rev_info *revs)
4273+
{
4274+
struct path_walk_info info = PATH_WALK_INFO_INIT;
4275+
unsigned int processed = 0;
4276+
4277+
info.revs = revs;
4278+
info.path_fn = add_objects_by_path;
4279+
info.path_fn_data = &processed;
4280+
revs->tag_objects = 1;
4281+
4282+
/*
4283+
* Allow the --[no-]sparse option to be interesting here, if only
4284+
* for testing purposes. Paths with no interesting objects will not
4285+
* contribute to the resulting pack, but only create noisy preferred
4286+
* base objects.
4287+
*/
4288+
info.prune_all_uninteresting = sparse;
4289+
4290+
if (walk_objects_by_path(&info))
4291+
die(_("failed to pack objects via path-walk"));
4292+
}
4293+
41914294
static void get_object_list(struct rev_info *revs, int ac, const char **av)
41924295
{
41934296
struct setup_revision_opt s_r_opt = {
@@ -4234,7 +4337,7 @@ static void get_object_list(struct rev_info *revs, int ac, const char **av)
42344337

42354338
warn_on_object_refname_ambiguity = save_warning;
42364339

4237-
if (use_bitmap_index && !get_object_list_from_bitmap(revs))
4340+
if (use_bitmap_index && !path_walk && !get_object_list_from_bitmap(revs))
42384341
return;
42394342

42404343
if (use_delta_islands)
@@ -4243,15 +4346,19 @@ static void get_object_list(struct rev_info *revs, int ac, const char **av)
42434346
if (write_bitmap_index)
42444347
mark_bitmap_preferred_tips();
42454348

4246-
if (prepare_revision_walk(revs))
4247-
die(_("revision walk setup failed"));
4248-
mark_edges_uninteresting(revs, show_edge, sparse);
4249-
42504349
if (!fn_show_object)
42514350
fn_show_object = show_object;
4252-
traverse_commit_list(revs,
4253-
show_commit, fn_show_object,
4254-
NULL);
4351+
4352+
if (path_walk) {
4353+
get_object_list_path_walk(revs);
4354+
} else {
4355+
if (prepare_revision_walk(revs))
4356+
die(_("revision walk setup failed"));
4357+
mark_edges_uninteresting(revs, show_edge, sparse);
4358+
traverse_commit_list(revs,
4359+
show_commit, fn_show_object,
4360+
NULL);
4361+
}
42554362

42564363
if (unpack_unreachable_expiration) {
42574364
revs->ignore_missing_links = 1;
@@ -4461,6 +4568,8 @@ int cmd_pack_objects(int argc,
44614568
N_("use the sparse reachability algorithm")),
44624569
OPT_BOOL(0, "thin", &thin,
44634570
N_("create thin packs")),
4571+
OPT_BOOL(0, "path-walk", &path_walk,
4572+
N_("use the path-walk API to walk objects when possible")),
44644573
OPT_BOOL(0, "shallow", &shallow,
44654574
N_("create packs suitable for shallow fetches")),
44664575
OPT_BOOL(0, "honor-pack-keep", &ignore_packed_keep_on_disk,
@@ -4546,7 +4655,27 @@ int cmd_pack_objects(int argc,
45464655
window = 0;
45474656

45484657
strvec_push(&rp, "pack-objects");
4549-
if (thin) {
4658+
4659+
if (path_walk && filter_options.choice) {
4660+
warning(_("cannot use --filter with --path-walk"));
4661+
path_walk = 0;
4662+
}
4663+
if (path_walk && use_delta_islands) {
4664+
warning(_("cannot use delta islands with --path-walk"));
4665+
path_walk = 0;
4666+
}
4667+
if (path_walk && shallow) {
4668+
warning(_("cannot use --shallow with --path-walk"));
4669+
path_walk = 0;
4670+
}
4671+
if (path_walk) {
4672+
strvec_push(&rp, "--boundary");
4673+
/*
4674+
* We must disable the bitmaps because we are removing
4675+
* the --objects / --objects-edge[-aggressive] options.
4676+
*/
4677+
use_bitmap_index = 0;
4678+
} else if (thin) {
45504679
use_internal_rev_list = 1;
45514680
strvec_push(&rp, shallow
45524681
? "--objects-edge-aggressive"

t/t5300-pack-object.sh

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -723,4 +723,21 @@ test_expect_success '--name-hash-version=2 and --write-bitmap-index are incompat
723723
! test_grep "currently, --write-bitmap-index requires --name-hash-version=1" err
724724
'
725725

726+
# Basic "repack everything" test
727+
test_expect_success '--path-walk pack everything' '
728+
git -C server rev-parse HEAD >in &&
729+
git -C server pack-objects --stdout --revs --path-walk <in >out.pack &&
730+
git -C server index-pack --stdin <out.pack
731+
'
732+
733+
# Basic "thin pack" test
734+
test_expect_success '--path-walk thin pack' '
735+
cat >in <<-EOF &&
736+
$(git -C server rev-parse HEAD)
737+
^$(git -C server rev-parse HEAD~2)
738+
EOF
739+
git -C server pack-objects --thin --stdout --revs --path-walk <in >out.pack &&
740+
git -C server index-pack --fix-thin --stdin <out.pack
741+
'
742+
726743
test_done

0 commit comments

Comments
 (0)