Skip to content

Commit f894081

Browse files
ttaylorrgitster
authored andcommitted
pack-revindex: read multi-pack reverse indexes
Implement reading for multi-pack reverse indexes, as described in the previous patch. Note that these functions don't yet have any callers, and won't until multi-pack reachability bitmaps are introduced in a later patch series. In the meantime, this patch implements some of the infrastructure necessary to support multi-pack bitmaps. There are three new functions exposed by the revindex API: - load_midx_revindex(): loads the reverse index corresponding to the given multi-pack index. - midx_to_pack_pos() and pack_pos_to_midx(): these convert between the multi-pack index and pseudo-pack order. load_midx_revindex() and pack_pos_to_midx() are both relatively straightforward. load_midx_revindex() needs a few functions to be exposed from the midx API. One to get the checksum of a midx, and another to get the .rev's filename. Similar to recent changes in the packed_git struct, three new fields are added to the multi_pack_index struct: one to keep track of the size, one to keep track of the mmap'd pointer, and another to point past the header and at the reverse index's data. pack_pos_to_midx() simply reads the corresponding entry out of the table. midx_to_pack_pos() is the trickiest, since it needs to find an object's position in the psuedo-pack order, but that order can only be recovered in the .rev file itself. This mapping can be implemented with a binary search, but note that the thing we're binary searching over isn't an array of values, but rather a permuted order of those values. So, when comparing two items, it's helpful to keep in mind the difference. Instead of a traditional binary search, where you are comparing two things directly, here we're comparing a (pack, offset) tuple with an index into the multi-pack index. That index describes another (pack, offset) tuple, and it is _those_ two tuples that are compared. Signed-off-by: Taylor Blau <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent b25fd24 commit f894081

File tree

5 files changed

+199
-0
lines changed

5 files changed

+199
-0
lines changed

midx.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,22 @@ static uint8_t oid_version(void)
4747
}
4848
}
4949

50+
static const unsigned char *get_midx_checksum(struct multi_pack_index *m)
51+
{
52+
return m->data + m->data_len - the_hash_algo->rawsz;
53+
}
54+
5055
static char *get_midx_filename(const char *object_dir)
5156
{
5257
return xstrfmt("%s/pack/multi-pack-index", object_dir);
5358
}
5459

60+
char *get_midx_rev_filename(struct multi_pack_index *m)
61+
{
62+
return xstrfmt("%s/pack/multi-pack-index-%s.rev",
63+
m->object_dir, hash_to_hex(get_midx_checksum(m)));
64+
}
65+
5566
static int midx_read_oid_fanout(const unsigned char *chunk_start,
5667
size_t chunk_size, void *data)
5768
{

midx.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@ struct multi_pack_index {
1515
const unsigned char *data;
1616
size_t data_len;
1717

18+
const uint32_t *revindex_data;
19+
const uint32_t *revindex_map;
20+
size_t revindex_len;
21+
1822
uint32_t signature;
1923
unsigned char version;
2024
unsigned char hash_len;
@@ -37,6 +41,8 @@ struct multi_pack_index {
3741

3842
#define MIDX_PROGRESS (1 << 0)
3943

44+
char *get_midx_rev_filename(struct multi_pack_index *m);
45+
4046
struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local);
4147
int prepare_midx_pack(struct repository *r, struct multi_pack_index *m, uint32_t pack_int_id);
4248
int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m, uint32_t *result);

pack-revindex.c

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include "object-store.h"
44
#include "packfile.h"
55
#include "config.h"
6+
#include "midx.h"
67

78
struct revindex_entry {
89
off_t offset;
@@ -292,6 +293,43 @@ int load_pack_revindex(struct packed_git *p)
292293
return -1;
293294
}
294295

296+
int load_midx_revindex(struct multi_pack_index *m)
297+
{
298+
char *revindex_name;
299+
int ret;
300+
if (m->revindex_data)
301+
return 0;
302+
303+
revindex_name = get_midx_rev_filename(m);
304+
305+
ret = load_revindex_from_disk(revindex_name,
306+
m->num_objects,
307+
&m->revindex_map,
308+
&m->revindex_len);
309+
if (ret)
310+
goto cleanup;
311+
312+
m->revindex_data = (const uint32_t *)((const char *)m->revindex_map + RIDX_HEADER_SIZE);
313+
314+
cleanup:
315+
free(revindex_name);
316+
return ret;
317+
}
318+
319+
int close_midx_revindex(struct multi_pack_index *m)
320+
{
321+
if (!m || !m->revindex_map)
322+
return 0;
323+
324+
munmap((void*)m->revindex_map, m->revindex_len);
325+
326+
m->revindex_map = NULL;
327+
m->revindex_data = NULL;
328+
m->revindex_len = 0;
329+
330+
return 0;
331+
}
332+
295333
int offset_to_pack_pos(struct packed_git *p, off_t ofs, uint32_t *pos)
296334
{
297335
unsigned lo, hi;
@@ -346,3 +384,91 @@ off_t pack_pos_to_offset(struct packed_git *p, uint32_t pos)
346384
else
347385
return nth_packed_object_offset(p, pack_pos_to_index(p, pos));
348386
}
387+
388+
uint32_t pack_pos_to_midx(struct multi_pack_index *m, uint32_t pos)
389+
{
390+
if (!m->revindex_data)
391+
BUG("pack_pos_to_midx: reverse index not yet loaded");
392+
if (m->num_objects <= pos)
393+
BUG("pack_pos_to_midx: out-of-bounds object at %"PRIu32, pos);
394+
return get_be32(m->revindex_data + pos);
395+
}
396+
397+
struct midx_pack_key {
398+
uint32_t pack;
399+
off_t offset;
400+
401+
uint32_t preferred_pack;
402+
struct multi_pack_index *midx;
403+
};
404+
405+
static int midx_pack_order_cmp(const void *va, const void *vb)
406+
{
407+
const struct midx_pack_key *key = va;
408+
struct multi_pack_index *midx = key->midx;
409+
410+
uint32_t versus = pack_pos_to_midx(midx, (uint32_t*)vb - (const uint32_t *)midx->revindex_data);
411+
uint32_t versus_pack = nth_midxed_pack_int_id(midx, versus);
412+
off_t versus_offset;
413+
414+
uint32_t key_preferred = key->pack == key->preferred_pack;
415+
uint32_t versus_preferred = versus_pack == key->preferred_pack;
416+
417+
/*
418+
* First, compare the preferred-ness, noting that the preferred pack
419+
* comes first.
420+
*/
421+
if (key_preferred && !versus_preferred)
422+
return -1;
423+
else if (!key_preferred && versus_preferred)
424+
return 1;
425+
426+
/* Then, break ties first by comparing the pack IDs. */
427+
if (key->pack < versus_pack)
428+
return -1;
429+
else if (key->pack > versus_pack)
430+
return 1;
431+
432+
/* Finally, break ties by comparing offsets within a pack. */
433+
versus_offset = nth_midxed_offset(midx, versus);
434+
if (key->offset < versus_offset)
435+
return -1;
436+
else if (key->offset > versus_offset)
437+
return 1;
438+
439+
return 0;
440+
}
441+
442+
int midx_to_pack_pos(struct multi_pack_index *m, uint32_t at, uint32_t *pos)
443+
{
444+
struct midx_pack_key key;
445+
uint32_t *found;
446+
447+
if (!m->revindex_data)
448+
BUG("midx_to_pack_pos: reverse index not yet loaded");
449+
if (m->num_objects <= at)
450+
BUG("midx_to_pack_pos: out-of-bounds object at %"PRIu32, at);
451+
452+
key.pack = nth_midxed_pack_int_id(m, at);
453+
key.offset = nth_midxed_offset(m, at);
454+
key.midx = m;
455+
/*
456+
* The preferred pack sorts first, so determine its identifier by
457+
* looking at the first object in pseudo-pack order.
458+
*
459+
* Note that if no --preferred-pack is explicitly given when writing a
460+
* multi-pack index, then whichever pack has the lowest identifier
461+
* implicitly is preferred (and includes all its objects, since ties are
462+
* broken first by pack identifier).
463+
*/
464+
key.preferred_pack = nth_midxed_pack_int_id(m, pack_pos_to_midx(m, 0));
465+
466+
found = bsearch(&key, m->revindex_data, m->num_objects,
467+
sizeof(*m->revindex_data), midx_pack_order_cmp);
468+
469+
if (!found)
470+
return error("bad offset for revindex");
471+
472+
*pos = found - m->revindex_data;
473+
return 0;
474+
}

pack-revindex.h

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,20 @@
1414
*
1515
* - offset: the byte offset within the .pack file at which the object contents
1616
* can be found
17+
*
18+
* The revindex can also be used with a multi-pack index (MIDX). In this
19+
* setting:
20+
*
21+
* - index position refers to an object's numeric position within the MIDX
22+
*
23+
* - pack position refers to an object's position within a non-existent pack
24+
* described by the MIDX. The pack structure is described in
25+
* Documentation/technical/pack-format.txt.
26+
*
27+
* It is effectively a concatanation of all packs in the MIDX (ordered by
28+
* their numeric ID within the MIDX) in their original order within each
29+
* pack), removing duplicates, and placing the preferred pack (if any)
30+
* first.
1731
*/
1832

1933

@@ -24,6 +38,7 @@
2438
#define GIT_TEST_REV_INDEX_DIE_IN_MEMORY "GIT_TEST_REV_INDEX_DIE_IN_MEMORY"
2539

2640
struct packed_git;
41+
struct multi_pack_index;
2742

2843
/*
2944
* load_pack_revindex populates the revindex's internal data-structures for the
@@ -34,6 +49,22 @@ struct packed_git;
3449
*/
3550
int load_pack_revindex(struct packed_git *p);
3651

52+
/*
53+
* load_midx_revindex loads the '.rev' file corresponding to the given
54+
* multi-pack index by mmap-ing it and assigning pointers in the
55+
* multi_pack_index to point at it.
56+
*
57+
* A negative number is returned on error.
58+
*/
59+
int load_midx_revindex(struct multi_pack_index *m);
60+
61+
/*
62+
* Frees resources associated with a multi-pack reverse index.
63+
*
64+
* A negative number is returned on error.
65+
*/
66+
int close_midx_revindex(struct multi_pack_index *m);
67+
3768
/*
3869
* offset_to_pack_pos converts an object offset to a pack position. This
3970
* function returns zero on success, and a negative number otherwise. The
@@ -71,4 +102,26 @@ uint32_t pack_pos_to_index(struct packed_git *p, uint32_t pos);
71102
*/
72103
off_t pack_pos_to_offset(struct packed_git *p, uint32_t pos);
73104

105+
/*
106+
* pack_pos_to_midx converts the object at position "pos" within the MIDX
107+
* pseudo-pack into a MIDX position.
108+
*
109+
* If the reverse index has not yet been loaded, or the position is out of
110+
* bounds, this function aborts.
111+
*
112+
* This function runs in time O(log N) with the number of objects in the MIDX.
113+
*/
114+
uint32_t pack_pos_to_midx(struct multi_pack_index *m, uint32_t pos);
115+
116+
/*
117+
* midx_to_pack_pos converts from the MIDX-relative position at "at" to the
118+
* corresponding pack position.
119+
*
120+
* If the reverse index has not yet been loaded, or the position is out of
121+
* bounds, this function aborts.
122+
*
123+
* This function runs in constant time.
124+
*/
125+
int midx_to_pack_pos(struct multi_pack_index *midx, uint32_t at, uint32_t *pos);
126+
74127
#endif

packfile.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -862,6 +862,9 @@ static void prepare_pack(const char *full_name, size_t full_name_len,
862862

863863
if (!strcmp(file_name, "multi-pack-index"))
864864
return;
865+
if (starts_with(file_name, "multi-pack-index") &&
866+
ends_with(file_name, ".rev"))
867+
return;
865868
if (ends_with(file_name, ".idx") ||
866869
ends_with(file_name, ".rev") ||
867870
ends_with(file_name, ".pack") ||

0 commit comments

Comments
 (0)