Skip to content

Commit bcaaf97

Browse files
committed
Merge branch 'tb/pack-revindex-api'
Abstract accesses to in-core revindex that allows enumerating objects stored in a packfile in the order they appear in the pack, in preparation for introducing an on-disk precomputed revindex. * tb/pack-revindex-api: (21 commits) for_each_object_in_pack(): clarify pack vs index ordering pack-revindex.c: avoid direct revindex access in 'offset_to_pack_pos()' pack-revindex: hide the definition of 'revindex_entry' pack-revindex: remove unused 'find_revindex_position()' pack-revindex: remove unused 'find_pack_revindex()' builtin/gc.c: guess the size of the revindex for_each_object_in_pack(): convert to new revindex API unpack_entry(): convert to new revindex API packed_object_info(): convert to new revindex API retry_bad_packed_offset(): convert to new revindex API get_delta_base_oid(): convert to new revindex API rebuild_existing_bitmaps(): convert to new revindex API try_partial_reuse(): convert to new revindex API get_size_by_pos(): convert to new revindex API show_objects_for_type(): convert to new revindex API bitmap_position_packfile(): convert to new revindex API check_object(): convert to new revindex API write_reused_pack_verbatim(): convert to new revindex API write_reused_pack_one(): convert to new revindex API write_reuse_object(): convert to new revindex API ...
2 parents 381dac2 + 779412b commit bcaaf97

File tree

6 files changed

+185
-85
lines changed

6 files changed

+185
-85
lines changed

builtin/gc.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,7 @@ static uint64_t estimate_repack_memory(struct packed_git *pack)
301301
/* and then obj_hash[], underestimated in fact */
302302
heap += sizeof(struct object *) * nr_objects;
303303
/* revindex is used also */
304-
heap += sizeof(struct revindex_entry) * nr_objects;
304+
heap += (sizeof(off_t) + sizeof(uint32_t)) * nr_objects;
305305
/*
306306
* read_sha1_file() (either at delta calculation phase, or
307307
* writing phase) also fills up the delta base cache

builtin/pack-objects.c

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,7 @@ static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
419419
{
420420
struct packed_git *p = IN_PACK(entry);
421421
struct pack_window *w_curs = NULL;
422-
struct revindex_entry *revidx;
422+
uint32_t pos;
423423
off_t offset;
424424
enum object_type type = oe_type(entry);
425425
off_t datalen;
@@ -436,10 +436,15 @@ static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
436436
type, entry_size);
437437

438438
offset = entry->in_pack_offset;
439-
revidx = find_pack_revindex(p, offset);
440-
datalen = revidx[1].offset - offset;
439+
if (offset_to_pack_pos(p, offset, &pos) < 0)
440+
die(_("write_reuse_object: could not locate %s, expected at "
441+
"offset %"PRIuMAX" in pack %s"),
442+
oid_to_hex(&entry->idx.oid), (uintmax_t)offset,
443+
p->pack_name);
444+
datalen = pack_pos_to_offset(p, pos + 1) - offset;
441445
if (!pack_to_stdout && p->index_version > 1 &&
442-
check_pack_crc(p, &w_curs, offset, datalen, revidx->nr)) {
446+
check_pack_crc(p, &w_curs, offset, datalen,
447+
pack_pos_to_index(p, pos))) {
443448
error(_("bad packed object CRC for %s"),
444449
oid_to_hex(&entry->idx.oid));
445450
unuse_pack(&w_curs);
@@ -863,8 +868,8 @@ static void write_reused_pack_one(size_t pos, struct hashfile *out,
863868
enum object_type type;
864869
unsigned long size;
865870

866-
offset = reuse_packfile->revindex[pos].offset;
867-
next = reuse_packfile->revindex[pos + 1].offset;
871+
offset = pack_pos_to_offset(reuse_packfile, pos);
872+
next = pack_pos_to_offset(reuse_packfile, pos + 1);
868873

869874
record_reused_object(offset, offset - hashfile_total(out));
870875

@@ -884,11 +889,17 @@ static void write_reused_pack_one(size_t pos, struct hashfile *out,
884889

885890
/* Convert to REF_DELTA if we must... */
886891
if (!allow_ofs_delta) {
887-
int base_pos = find_revindex_position(reuse_packfile, base_offset);
892+
uint32_t base_pos;
888893
struct object_id base_oid;
889894

895+
if (offset_to_pack_pos(reuse_packfile, base_offset, &base_pos) < 0)
896+
die(_("expected object at offset %"PRIuMAX" "
897+
"in pack %s"),
898+
(uintmax_t)base_offset,
899+
reuse_packfile->pack_name);
900+
890901
nth_packed_object_id(&base_oid, reuse_packfile,
891-
reuse_packfile->revindex[base_pos].nr);
902+
pack_pos_to_index(reuse_packfile, base_pos));
892903

893904
len = encode_in_pack_object_header(header, sizeof(header),
894905
OBJ_REF_DELTA, size);
@@ -941,7 +952,7 @@ static size_t write_reused_pack_verbatim(struct hashfile *out,
941952
off_t to_write;
942953

943954
written = (pos * BITS_IN_EWORD);
944-
to_write = reuse_packfile->revindex[written].offset
955+
to_write = pack_pos_to_offset(reuse_packfile, written)
945956
- sizeof(struct pack_header);
946957

947958
/* We're recording one chunk, not one object. */
@@ -1806,11 +1817,11 @@ static void check_object(struct object_entry *entry, uint32_t object_index)
18061817
goto give_up;
18071818
}
18081819
if (reuse_delta && !entry->preferred_base) {
1809-
struct revindex_entry *revidx;
1810-
revidx = find_pack_revindex(p, ofs);
1811-
if (!revidx)
1820+
uint32_t pos;
1821+
if (offset_to_pack_pos(p, ofs, &pos) < 0)
18121822
goto give_up;
1813-
if (!nth_packed_object_id(&base_ref, p, revidx->nr))
1823+
if (!nth_packed_object_id(&base_ref, p,
1824+
pack_pos_to_index(p, pos)))
18141825
have_base = 1;
18151826
}
18161827
entry->in_pack_header_size = used + used_0;

pack-bitmap.c

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -407,11 +407,14 @@ static inline int bitmap_position_extended(struct bitmap_index *bitmap_git,
407407
static inline int bitmap_position_packfile(struct bitmap_index *bitmap_git,
408408
const struct object_id *oid)
409409
{
410+
uint32_t pos;
410411
off_t offset = find_pack_entry_one(oid->hash, bitmap_git->pack);
411412
if (!offset)
412413
return -1;
413414

414-
return find_revindex_position(bitmap_git->pack, offset);
415+
if (offset_to_pack_pos(bitmap_git->pack, offset, &pos) < 0)
416+
return -1;
417+
return pos;
415418
}
416419

417420
static int bitmap_position(struct bitmap_index *bitmap_git,
@@ -708,21 +711,22 @@ static void show_objects_for_type(
708711

709712
for (offset = 0; offset < BITS_IN_EWORD; ++offset) {
710713
struct object_id oid;
711-
struct revindex_entry *entry;
712-
uint32_t hash = 0;
714+
uint32_t hash = 0, index_pos;
715+
off_t ofs;
713716

714717
if ((word >> offset) == 0)
715718
break;
716719

717720
offset += ewah_bit_ctz64(word >> offset);
718721

719-
entry = &bitmap_git->pack->revindex[pos + offset];
720-
nth_packed_object_id(&oid, bitmap_git->pack, entry->nr);
722+
index_pos = pack_pos_to_index(bitmap_git->pack, pos + offset);
723+
ofs = pack_pos_to_offset(bitmap_git->pack, pos + offset);
724+
nth_packed_object_id(&oid, bitmap_git->pack, index_pos);
721725

722726
if (bitmap_git->hashes)
723-
hash = get_be32(bitmap_git->hashes + entry->nr);
727+
hash = get_be32(bitmap_git->hashes + index_pos);
724728

725-
show_reach(&oid, object_type, 0, hash, bitmap_git->pack, entry->offset);
729+
show_reach(&oid, object_type, 0, hash, bitmap_git->pack, ofs);
726730
}
727731
}
728732
}
@@ -831,11 +835,11 @@ static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
831835
oi.sizep = &size;
832836

833837
if (pos < pack->num_objects) {
834-
struct revindex_entry *entry = &pack->revindex[pos];
835-
if (packed_object_info(the_repository, pack,
836-
entry->offset, &oi) < 0) {
838+
off_t ofs = pack_pos_to_offset(pack, pos);
839+
if (packed_object_info(the_repository, pack, ofs, &oi) < 0) {
837840
struct object_id oid;
838-
nth_packed_object_id(&oid, pack, entry->nr);
841+
nth_packed_object_id(&oid, pack,
842+
pack_pos_to_index(pack, pos));
839843
die(_("unable to get size of %s"), oid_to_hex(&oid));
840844
}
841845
} else {
@@ -1065,23 +1069,21 @@ static void try_partial_reuse(struct bitmap_index *bitmap_git,
10651069
struct bitmap *reuse,
10661070
struct pack_window **w_curs)
10671071
{
1068-
struct revindex_entry *revidx;
1069-
off_t offset;
1072+
off_t offset, header;
10701073
enum object_type type;
10711074
unsigned long size;
10721075

10731076
if (pos >= bitmap_git->pack->num_objects)
10741077
return; /* not actually in the pack */
10751078

1076-
revidx = &bitmap_git->pack->revindex[pos];
1077-
offset = revidx->offset;
1079+
offset = header = pack_pos_to_offset(bitmap_git->pack, pos);
10781080
type = unpack_object_header(bitmap_git->pack, w_curs, &offset, &size);
10791081
if (type < 0)
10801082
return; /* broken packfile, punt */
10811083

10821084
if (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA) {
10831085
off_t base_offset;
1084-
int base_pos;
1086+
uint32_t base_pos;
10851087

10861088
/*
10871089
* Find the position of the base object so we can look it up
@@ -1092,11 +1094,10 @@ static void try_partial_reuse(struct bitmap_index *bitmap_git,
10921094
* more detail.
10931095
*/
10941096
base_offset = get_delta_base(bitmap_git->pack, w_curs,
1095-
&offset, type, revidx->offset);
1097+
&offset, type, header);
10961098
if (!base_offset)
10971099
return;
1098-
base_pos = find_revindex_position(bitmap_git->pack, base_offset);
1099-
if (base_pos < 0)
1100+
if (offset_to_pack_pos(bitmap_git->pack, base_offset, &base_pos) < 0)
11001101
return;
11011102

11021103
/*
@@ -1391,11 +1392,10 @@ uint32_t *create_bitmap_mapping(struct bitmap_index *bitmap_git,
13911392

13921393
for (i = 0; i < num_objects; ++i) {
13931394
struct object_id oid;
1394-
struct revindex_entry *entry;
13951395
struct object_entry *oe;
13961396

1397-
entry = &bitmap_git->pack->revindex[i];
1398-
nth_packed_object_id(&oid, bitmap_git->pack, entry->nr);
1397+
nth_packed_object_id(&oid, bitmap_git->pack,
1398+
pack_pos_to_index(bitmap_git->pack, i));
13991399
oe = packlist_find(mapping, &oid);
14001400

14011401
if (oe)

pack-revindex.c

Lines changed: 33 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,11 @@
33
#include "object-store.h"
44
#include "packfile.h"
55

6+
struct revindex_entry {
7+
off_t offset;
8+
unsigned int nr;
9+
};
10+
611
/*
712
* Pack index for existing packs give us easy access to the offsets into
813
* corresponding pack file where each object's data starts, but the entries
@@ -169,17 +174,24 @@ int load_pack_revindex(struct packed_git *p)
169174
return 0;
170175
}
171176

172-
int find_revindex_position(struct packed_git *p, off_t ofs)
177+
int offset_to_pack_pos(struct packed_git *p, off_t ofs, uint32_t *pos)
173178
{
174-
int lo = 0;
175-
int hi = p->num_objects + 1;
176-
const struct revindex_entry *revindex = p->revindex;
179+
unsigned lo, hi;
180+
181+
if (load_pack_revindex(p) < 0)
182+
return -1;
183+
184+
lo = 0;
185+
hi = p->num_objects + 1;
177186

178187
do {
179188
const unsigned mi = lo + (hi - lo) / 2;
180-
if (revindex[mi].offset == ofs) {
181-
return mi;
182-
} else if (ofs < revindex[mi].offset)
189+
off_t got = pack_pos_to_offset(p, mi);
190+
191+
if (got == ofs) {
192+
*pos = mi;
193+
return 0;
194+
} else if (ofs < got)
183195
hi = mi;
184196
else
185197
lo = mi + 1;
@@ -189,17 +201,20 @@ int find_revindex_position(struct packed_git *p, off_t ofs)
189201
return -1;
190202
}
191203

192-
struct revindex_entry *find_pack_revindex(struct packed_git *p, off_t ofs)
204+
uint32_t pack_pos_to_index(struct packed_git *p, uint32_t pos)
193205
{
194-
int pos;
195-
196-
if (load_pack_revindex(p))
197-
return NULL;
198-
199-
pos = find_revindex_position(p, ofs);
200-
201-
if (pos < 0)
202-
return NULL;
206+
if (!p->revindex)
207+
BUG("pack_pos_to_index: reverse index not yet loaded");
208+
if (p->num_objects <= pos)
209+
BUG("pack_pos_to_index: out-of-bounds object at %"PRIu32, pos);
210+
return p->revindex[pos].nr;
211+
}
203212

204-
return p->revindex + pos;
213+
off_t pack_pos_to_offset(struct packed_git *p, uint32_t pos)
214+
{
215+
if (!p->revindex)
216+
BUG("pack_pos_to_index: reverse index not yet loaded");
217+
if (p->num_objects < pos)
218+
BUG("pack_pos_to_offset: out-of-bounds object at %"PRIu32, pos);
219+
return p->revindex[pos].offset;
205220
}

pack-revindex.h

Lines changed: 53 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,62 @@
11
#ifndef PACK_REVINDEX_H
22
#define PACK_REVINDEX_H
33

4-
struct packed_git;
4+
/**
5+
* A revindex allows converting efficiently between three properties
6+
* of an object within a pack:
7+
*
8+
* - index position: the numeric position within the list of sorted object ids
9+
* found in the .idx file
10+
*
11+
* - pack position: the numeric position within the list of objects in their
12+
* order within the actual .pack file (i.e., 0 is the first object in the
13+
* .pack, 1 is the second, and so on)
14+
*
15+
* - offset: the byte offset within the .pack file at which the object contents
16+
* can be found
17+
*/
518

6-
struct revindex_entry {
7-
off_t offset;
8-
unsigned int nr;
9-
};
19+
struct packed_git;
1020

21+
/*
22+
* load_pack_revindex populates the revindex's internal data-structures for the
23+
* given pack, returning zero on success and a negative value otherwise.
24+
*/
1125
int load_pack_revindex(struct packed_git *p);
12-
int find_revindex_position(struct packed_git *p, off_t ofs);
1326

14-
struct revindex_entry *find_pack_revindex(struct packed_git *p, off_t ofs);
27+
/*
28+
* offset_to_pack_pos converts an object offset to a pack position. This
29+
* function returns zero on success, and a negative number otherwise. The
30+
* parameter 'pos' is usable only on success.
31+
*
32+
* If the reverse index has not yet been loaded, this function loads it lazily,
33+
* and returns an negative number if an error was encountered.
34+
*
35+
* This function runs in time O(log N) with the number of objects in the pack.
36+
*/
37+
int offset_to_pack_pos(struct packed_git *p, off_t ofs, uint32_t *pos);
38+
39+
/*
40+
* pack_pos_to_index converts the given pack-relative position 'pos' by
41+
* returning an index-relative position.
42+
*
43+
* If the reverse index has not yet been loaded, or the position is out of
44+
* bounds, this function aborts.
45+
*
46+
* This function runs in constant time.
47+
*/
48+
uint32_t pack_pos_to_index(struct packed_git *p, uint32_t pos);
49+
50+
/*
51+
* pack_pos_to_offset converts the given pack-relative position 'pos' into a
52+
* pack offset. For a pack with 'N' objects, asking for position 'N' will return
53+
* the total size (in bytes) of the pack.
54+
*
55+
* If the reverse index has not yet been loaded, or the position is out of
56+
* bounds, this function aborts.
57+
*
58+
* This function runs in constant time.
59+
*/
60+
off_t pack_pos_to_offset(struct packed_git *p, uint32_t pos);
1561

1662
#endif

0 commit comments

Comments
 (0)