Skip to content

Commit 2834bc2

Browse files
vmggitster
authored andcommitted
pack-objects: refactor the packing list
The hash table that stores the packing list for a given `pack-objects` run was tightly coupled to the pack-objects code. In this commit, we refactor the hash table and the underlying storage array into a `packing_data` struct. The functionality for accessing and adding entries to the packing list is hence accessible from other parts of Git besides the `pack-objects` builtin. This refactoring is a requirement for further patches in this series that will require accessing the commit packing list from outside of `pack-objects`. The hash table implementation has been minimally altered: we now use table sizes which are always a power of two, to ensure a uniform index distribution in the array. Signed-off-by: Vicent Marti <[email protected]> Signed-off-by: Jeff King <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 92e5c77 commit 2834bc2

File tree

4 files changed

+200
-135
lines changed

4 files changed

+200
-135
lines changed

Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -694,6 +694,7 @@ LIB_H += notes-merge.h
694694
LIB_H += notes-utils.h
695695
LIB_H += notes.h
696696
LIB_H += object.h
697+
LIB_H += pack-objects.h
697698
LIB_H += pack-revindex.h
698699
LIB_H += pack.h
699700
LIB_H += parse-options.h
@@ -831,6 +832,7 @@ LIB_OBJS += notes-merge.o
831832
LIB_OBJS += notes-utils.o
832833
LIB_OBJS += object.o
833834
LIB_OBJS += pack-check.o
835+
LIB_OBJS += pack-objects.o
834836
LIB_OBJS += pack-revindex.o
835837
LIB_OBJS += pack-write.o
836838
LIB_OBJS += pager.o

builtin/pack-objects.c

Lines changed: 40 additions & 135 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "diff.h"
1515
#include "revision.h"
1616
#include "list-objects.h"
17+
#include "pack-objects.h"
1718
#include "progress.h"
1819
#include "refs.h"
1920
#include "streaming.h"
@@ -25,42 +26,15 @@ static const char *pack_usage[] = {
2526
NULL
2627
};
2728

28-
struct object_entry {
29-
struct pack_idx_entry idx;
30-
unsigned long size; /* uncompressed size */
31-
struct packed_git *in_pack; /* already in pack */
32-
off_t in_pack_offset;
33-
struct object_entry *delta; /* delta base object */
34-
struct object_entry *delta_child; /* deltified objects who bases me */
35-
struct object_entry *delta_sibling; /* other deltified objects who
36-
* uses the same base as me
37-
*/
38-
void *delta_data; /* cached delta (uncompressed) */
39-
unsigned long delta_size; /* delta data size (uncompressed) */
40-
unsigned long z_delta_size; /* delta data size (compressed) */
41-
enum object_type type;
42-
enum object_type in_pack_type; /* could be delta */
43-
uint32_t hash; /* name hint hash */
44-
unsigned char in_pack_header_size;
45-
unsigned preferred_base:1; /*
46-
* we do not pack this, but is available
47-
* to be used as the base object to delta
48-
* objects against.
49-
*/
50-
unsigned no_try_delta:1;
51-
unsigned tagged:1; /* near the very tip of refs */
52-
unsigned filled:1; /* assigned write-order */
53-
};
54-
5529
/*
56-
* Objects we are going to pack are collected in objects array (dynamically
57-
* expanded). nr_objects & nr_alloc controls this array. They are stored
58-
* in the order we see -- typically rev-list --objects order that gives us
59-
* nice "minimum seek" order.
30+
* Objects we are going to pack are collected in the `to_pack` structure.
31+
* It contains an array (dynamically expanded) of the object data, and a map
32+
* that can resolve SHA1s to their position in the array.
6033
*/
61-
static struct object_entry *objects;
34+
static struct packing_data to_pack;
35+
6236
static struct pack_idx_entry **written_list;
63-
static uint32_t nr_objects, nr_alloc, nr_result, nr_written;
37+
static uint32_t nr_result, nr_written;
6438

6539
static int non_empty;
6640
static int reuse_delta = 1, reuse_object = 1;
@@ -89,22 +63,12 @@ static unsigned long cache_max_small_delta_size = 1000;
8963

9064
static unsigned long window_memory_limit = 0;
9165

92-
/*
93-
* The object names in objects array are hashed with this hashtable,
94-
* to help looking up the entry by object name.
95-
* This hashtable is built after all the objects are seen.
96-
*/
97-
static int *object_ix;
98-
static int object_ix_hashsz;
99-
static struct object_entry *locate_object_entry(const unsigned char *sha1);
100-
10166
/*
10267
* stats
10368
*/
10469
static uint32_t written, written_delta;
10570
static uint32_t reused, reused_delta;
10671

107-
10872
static void *get_delta(struct object_entry *entry)
10973
{
11074
unsigned long size, base_size, delta_size;
@@ -553,12 +517,12 @@ static int mark_tagged(const char *path, const unsigned char *sha1, int flag,
553517
void *cb_data)
554518
{
555519
unsigned char peeled[20];
556-
struct object_entry *entry = locate_object_entry(sha1);
520+
struct object_entry *entry = packlist_find(&to_pack, sha1, NULL);
557521

558522
if (entry)
559523
entry->tagged = 1;
560524
if (!peel_ref(path, peeled)) {
561-
entry = locate_object_entry(peeled);
525+
entry = packlist_find(&to_pack, peeled, NULL);
562526
if (entry)
563527
entry->tagged = 1;
564528
}
@@ -633,9 +597,10 @@ static struct object_entry **compute_write_order(void)
633597
{
634598
unsigned int i, wo_end, last_untagged;
635599

636-
struct object_entry **wo = xmalloc(nr_objects * sizeof(*wo));
600+
struct object_entry **wo = xmalloc(to_pack.nr_objects * sizeof(*wo));
601+
struct object_entry *objects = to_pack.objects;
637602

638-
for (i = 0; i < nr_objects; i++) {
603+
for (i = 0; i < to_pack.nr_objects; i++) {
639604
objects[i].tagged = 0;
640605
objects[i].filled = 0;
641606
objects[i].delta_child = NULL;
@@ -647,7 +612,7 @@ static struct object_entry **compute_write_order(void)
647612
* Make sure delta_sibling is sorted in the original
648613
* recency order.
649614
*/
650-
for (i = nr_objects; i > 0;) {
615+
for (i = to_pack.nr_objects; i > 0;) {
651616
struct object_entry *e = &objects[--i];
652617
if (!e->delta)
653618
continue;
@@ -665,7 +630,7 @@ static struct object_entry **compute_write_order(void)
665630
* Give the objects in the original recency order until
666631
* we see a tagged tip.
667632
*/
668-
for (i = wo_end = 0; i < nr_objects; i++) {
633+
for (i = wo_end = 0; i < to_pack.nr_objects; i++) {
669634
if (objects[i].tagged)
670635
break;
671636
add_to_write_order(wo, &wo_end, &objects[i]);
@@ -675,15 +640,15 @@ static struct object_entry **compute_write_order(void)
675640
/*
676641
* Then fill all the tagged tips.
677642
*/
678-
for (; i < nr_objects; i++) {
643+
for (; i < to_pack.nr_objects; i++) {
679644
if (objects[i].tagged)
680645
add_to_write_order(wo, &wo_end, &objects[i]);
681646
}
682647

683648
/*
684649
* And then all remaining commits and tags.
685650
*/
686-
for (i = last_untagged; i < nr_objects; i++) {
651+
for (i = last_untagged; i < to_pack.nr_objects; i++) {
687652
if (objects[i].type != OBJ_COMMIT &&
688653
objects[i].type != OBJ_TAG)
689654
continue;
@@ -693,7 +658,7 @@ static struct object_entry **compute_write_order(void)
693658
/*
694659
* And then all the trees.
695660
*/
696-
for (i = last_untagged; i < nr_objects; i++) {
661+
for (i = last_untagged; i < to_pack.nr_objects; i++) {
697662
if (objects[i].type != OBJ_TREE)
698663
continue;
699664
add_to_write_order(wo, &wo_end, &objects[i]);
@@ -702,13 +667,13 @@ static struct object_entry **compute_write_order(void)
702667
/*
703668
* Finally all the rest in really tight order
704669
*/
705-
for (i = last_untagged; i < nr_objects; i++) {
670+
for (i = last_untagged; i < to_pack.nr_objects; i++) {
706671
if (!objects[i].filled)
707672
add_family_to_write_order(wo, &wo_end, &objects[i]);
708673
}
709674

710-
if (wo_end != nr_objects)
711-
die("ordered %u objects, expected %"PRIu32, wo_end, nr_objects);
675+
if (wo_end != to_pack.nr_objects)
676+
die("ordered %u objects, expected %"PRIu32, wo_end, to_pack.nr_objects);
712677

713678
return wo;
714679
}
@@ -724,7 +689,7 @@ static void write_pack_file(void)
724689

725690
if (progress > pack_to_stdout)
726691
progress_state = start_progress("Writing objects", nr_result);
727-
written_list = xmalloc(nr_objects * sizeof(*written_list));
692+
written_list = xmalloc(to_pack.nr_objects * sizeof(*written_list));
728693
write_order = compute_write_order();
729694

730695
do {
@@ -740,7 +705,7 @@ static void write_pack_file(void)
740705
if (!offset)
741706
die_errno("unable to write pack header");
742707
nr_written = 0;
743-
for (; i < nr_objects; i++) {
708+
for (; i < to_pack.nr_objects; i++) {
744709
struct object_entry *e = write_order[i];
745710
if (write_one(f, e, &offset) == WRITE_ONE_BREAK)
746711
break;
@@ -803,7 +768,7 @@ static void write_pack_file(void)
803768
written_list[j]->offset = (off_t)-1;
804769
}
805770
nr_remaining -= nr_written;
806-
} while (nr_remaining && i < nr_objects);
771+
} while (nr_remaining && i < to_pack.nr_objects);
807772

808773
free(written_list);
809774
free(write_order);
@@ -813,53 +778,6 @@ static void write_pack_file(void)
813778
written, nr_result);
814779
}
815780

816-
static int locate_object_entry_hash(const unsigned char *sha1)
817-
{
818-
int i;
819-
unsigned int ui;
820-
memcpy(&ui, sha1, sizeof(unsigned int));
821-
i = ui % object_ix_hashsz;
822-
while (0 < object_ix[i]) {
823-
if (!hashcmp(sha1, objects[object_ix[i] - 1].idx.sha1))
824-
return i;
825-
if (++i == object_ix_hashsz)
826-
i = 0;
827-
}
828-
return -1 - i;
829-
}
830-
831-
static struct object_entry *locate_object_entry(const unsigned char *sha1)
832-
{
833-
int i;
834-
835-
if (!object_ix_hashsz)
836-
return NULL;
837-
838-
i = locate_object_entry_hash(sha1);
839-
if (0 <= i)
840-
return &objects[object_ix[i]-1];
841-
return NULL;
842-
}
843-
844-
static void rehash_objects(void)
845-
{
846-
uint32_t i;
847-
struct object_entry *oe;
848-
849-
object_ix_hashsz = nr_objects * 3;
850-
if (object_ix_hashsz < 1024)
851-
object_ix_hashsz = 1024;
852-
object_ix = xrealloc(object_ix, sizeof(int) * object_ix_hashsz);
853-
memset(object_ix, 0, sizeof(int) * object_ix_hashsz);
854-
for (i = 0, oe = objects; i < nr_objects; i++, oe++) {
855-
int ix = locate_object_entry_hash(oe->idx.sha1);
856-
if (0 <= ix)
857-
continue;
858-
ix = -1 - ix;
859-
object_ix[ix] = i + 1;
860-
}
861-
}
862-
863781
static uint32_t name_hash(const char *name)
864782
{
865783
uint32_t c, hash = 0;
@@ -908,13 +826,12 @@ static int add_object_entry(const unsigned char *sha1, enum object_type type,
908826
struct object_entry *entry;
909827
struct packed_git *p, *found_pack = NULL;
910828
off_t found_offset = 0;
911-
int ix;
912829
uint32_t hash = name_hash(name);
830+
uint32_t index_pos;
913831

914-
ix = nr_objects ? locate_object_entry_hash(sha1) : -1;
915-
if (ix >= 0) {
832+
entry = packlist_find(&to_pack, sha1, &index_pos);
833+
if (entry) {
916834
if (exclude) {
917-
entry = objects + object_ix[ix] - 1;
918835
if (!entry->preferred_base)
919836
nr_result--;
920837
entry->preferred_base = 1;
@@ -947,14 +864,7 @@ static int add_object_entry(const unsigned char *sha1, enum object_type type,
947864
}
948865
}
949866

950-
if (nr_objects >= nr_alloc) {
951-
nr_alloc = (nr_alloc + 1024) * 3 / 2;
952-
objects = xrealloc(objects, nr_alloc * sizeof(*entry));
953-
}
954-
955-
entry = objects + nr_objects++;
956-
memset(entry, 0, sizeof(*entry));
957-
hashcpy(entry->idx.sha1, sha1);
867+
entry = packlist_alloc(&to_pack, sha1, index_pos);
958868
entry->hash = hash;
959869
if (type)
960870
entry->type = type;
@@ -967,12 +877,7 @@ static int add_object_entry(const unsigned char *sha1, enum object_type type,
967877
entry->in_pack_offset = found_offset;
968878
}
969879

970-
if (object_ix_hashsz * 3 <= nr_objects * 4)
971-
rehash_objects();
972-
else
973-
object_ix[-1 - ix] = nr_objects;
974-
975-
display_progress(progress_state, nr_objects);
880+
display_progress(progress_state, to_pack.nr_objects);
976881

977882
if (name && no_try_delta(name))
978883
entry->no_try_delta = 1;
@@ -1329,7 +1234,7 @@ static void check_object(struct object_entry *entry)
13291234
break;
13301235
}
13311236

1332-
if (base_ref && (base_entry = locate_object_entry(base_ref))) {
1237+
if (base_ref && (base_entry = packlist_find(&to_pack, base_ref, NULL))) {
13331238
/*
13341239
* If base_ref was set above that means we wish to
13351240
* reuse delta data, and we even found that base
@@ -1403,12 +1308,12 @@ static void get_object_details(void)
14031308
uint32_t i;
14041309
struct object_entry **sorted_by_offset;
14051310

1406-
sorted_by_offset = xcalloc(nr_objects, sizeof(struct object_entry *));
1407-
for (i = 0; i < nr_objects; i++)
1408-
sorted_by_offset[i] = objects + i;
1409-
qsort(sorted_by_offset, nr_objects, sizeof(*sorted_by_offset), pack_offset_sort);
1311+
sorted_by_offset = xcalloc(to_pack.nr_objects, sizeof(struct object_entry *));
1312+
for (i = 0; i < to_pack.nr_objects; i++)
1313+
sorted_by_offset[i] = to_pack.objects + i;
1314+
qsort(sorted_by_offset, to_pack.nr_objects, sizeof(*sorted_by_offset), pack_offset_sort);
14101315

1411-
for (i = 0; i < nr_objects; i++) {
1316+
for (i = 0; i < to_pack.nr_objects; i++) {
14121317
struct object_entry *entry = sorted_by_offset[i];
14131318
check_object(entry);
14141319
if (big_file_threshold < entry->size)
@@ -2034,7 +1939,7 @@ static int add_ref_tag(const char *path, const unsigned char *sha1, int flag, vo
20341939

20351940
if (!prefixcmp(path, "refs/tags/") && /* is a tag? */
20361941
!peel_ref(path, peeled) && /* peelable? */
2037-
locate_object_entry(peeled)) /* object packed? */
1942+
packlist_find(&to_pack, peeled, NULL)) /* object packed? */
20381943
add_object_entry(sha1, OBJ_TAG, NULL, 0);
20391944
return 0;
20401945
}
@@ -2057,14 +1962,14 @@ static void prepare_pack(int window, int depth)
20571962
if (!pack_to_stdout)
20581963
do_check_packed_object_crc = 1;
20591964

2060-
if (!nr_objects || !window || !depth)
1965+
if (!to_pack.nr_objects || !window || !depth)
20611966
return;
20621967

2063-
delta_list = xmalloc(nr_objects * sizeof(*delta_list));
1968+
delta_list = xmalloc(to_pack.nr_objects * sizeof(*delta_list));
20641969
nr_deltas = n = 0;
20651970

2066-
for (i = 0; i < nr_objects; i++) {
2067-
struct object_entry *entry = objects + i;
1971+
for (i = 0; i < to_pack.nr_objects; i++) {
1972+
struct object_entry *entry = to_pack.objects + i;
20681973

20691974
if (entry->delta)
20701975
/* This happens if we decided to reuse existing
@@ -2342,7 +2247,7 @@ static void loosen_unused_packed_objects(struct rev_info *revs)
23422247

23432248
for (i = 0; i < p->num_objects; i++) {
23442249
sha1 = nth_packed_object_sha1(p, i);
2345-
if (!locate_object_entry(sha1) &&
2250+
if (!packlist_find(&to_pack, sha1, NULL) &&
23462251
!has_sha1_pack_kept_or_nonlocal(sha1))
23472252
if (force_object_loose(sha1, p->mtime))
23482253
die("unable to force loose object");

0 commit comments

Comments
 (0)