Skip to content

Commit 28cd730

Browse files
Abhra303gitster
authored andcommitted
pack-bitmap: prepare to read lookup table extension
Earlier change teaches Git to write bitmap lookup table. But Git does not know how to parse them. Teach Git to parse the existing bitmap lookup table. The older versions of Git are not affected by it. Those versions ignore the lookup table. Mentored-by: Taylor Blau <[email protected]> Co-Mentored-by: Kaartic Sivaraam <[email protected]> Signed-off-by: Abhradeep Chakraborty <[email protected]> Reviewed-by: Taylor Blau <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 76f14b7 commit 28cd730

File tree

3 files changed

+312
-9
lines changed

3 files changed

+312
-9
lines changed

pack-bitmap.c

Lines changed: 281 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,12 @@ struct bitmap_index {
8383
/* The checksum of the packfile or MIDX; points into map. */
8484
const unsigned char *checksum;
8585

86+
/*
87+
* If not NULL, this point into the commit table extension
88+
* (within the memory mapped region `map`).
89+
*/
90+
unsigned char *table_lookup;
91+
8692
/*
8793
* Extended index.
8894
*
@@ -186,6 +192,16 @@ static int load_bitmap_header(struct bitmap_index *index)
186192
index->hashes = (void *)(index_end - cache_size);
187193
index_end -= cache_size;
188194
}
195+
196+
if (flags & BITMAP_OPT_LOOKUP_TABLE) {
197+
size_t table_size = st_mult(ntohl(header->entry_count),
198+
BITMAP_LOOKUP_TABLE_TRIPLET_WIDTH);
199+
if (table_size > index_end - index->map - header_size)
200+
return error(_("corrupted bitmap index file (too short to fit lookup table)"));
201+
if (git_env_bool("GIT_TEST_READ_COMMIT_TABLE", 1))
202+
index->table_lookup = (void *)(index_end - table_size);
203+
index_end -= table_size;
204+
}
189205
}
190206

191207
index->entry_count = ntohl(header->entry_count);
@@ -212,9 +228,11 @@ static struct stored_bitmap *store_bitmap(struct bitmap_index *index,
212228

213229
hash_pos = kh_put_oid_map(index->bitmaps, stored->oid, &ret);
214230

215-
/* a 0 return code means the insertion succeeded with no changes,
216-
* because the SHA1 already existed on the map. this is bad, there
217-
* shouldn't be duplicated commits in the index */
231+
/*
232+
* A 0 return code means the insertion succeeded with no changes,
233+
* because the SHA1 already existed on the map. This is bad, there
234+
* shouldn't be duplicated commits in the index.
235+
*/
218236
if (ret == 0) {
219237
error(_("duplicate entry in bitmap index: '%s'"), oid_to_hex(oid));
220238
return NULL;
@@ -482,7 +500,7 @@ static int load_bitmap(struct bitmap_index *bitmap_git)
482500
!(bitmap_git->tags = read_bitmap_1(bitmap_git)))
483501
goto failed;
484502

485-
if (load_bitmap_entries_v1(bitmap_git) < 0)
503+
if (!bitmap_git->table_lookup && load_bitmap_entries_v1(bitmap_git) < 0)
486504
goto failed;
487505

488506
return 0;
@@ -570,13 +588,256 @@ struct include_data {
570588
struct bitmap *seen;
571589
};
572590

591+
struct bitmap_lookup_table_triplet {
592+
uint32_t commit_pos;
593+
uint64_t offset;
594+
uint32_t xor_row;
595+
};
596+
597+
struct bitmap_lookup_table_xor_item {
598+
struct object_id oid;
599+
uint64_t offset;
600+
};
601+
602+
/*
603+
* Given a `triplet` struct pointer and pointer `p`, this
604+
* function reads the triplet beginning at `p` into the struct.
605+
* Note that this function assumes that there is enough memory
606+
* left for filling the `triplet` struct from `p`.
607+
*/
608+
static int bitmap_lookup_table_get_triplet_by_pointer(struct bitmap_lookup_table_triplet *triplet,
609+
const unsigned char *p)
610+
{
611+
if (!triplet)
612+
return -1;
613+
614+
triplet->commit_pos = get_be32(p);
615+
p += sizeof(uint32_t);
616+
triplet->offset = get_be64(p);
617+
p += sizeof(uint64_t);
618+
triplet->xor_row = get_be32(p);
619+
return 0;
620+
}
621+
622+
/*
623+
* This function gets the raw triplet from `row`'th row in the
624+
* lookup table and fills that data to the `triplet`.
625+
*/
626+
static int bitmap_lookup_table_get_triplet(struct bitmap_index *bitmap_git,
627+
uint32_t pos,
628+
struct bitmap_lookup_table_triplet *triplet)
629+
{
630+
unsigned char *p = NULL;
631+
if (pos >= bitmap_git->entry_count)
632+
return error(_("corrupt bitmap lookup table: triplet position out of index"));
633+
634+
p = bitmap_git->table_lookup + st_mult(pos, BITMAP_LOOKUP_TABLE_TRIPLET_WIDTH);
635+
636+
return bitmap_lookup_table_get_triplet_by_pointer(triplet, p);
637+
}
638+
639+
/*
640+
* Searches for a matching triplet. `commit_pos` is a pointer
641+
* to the wanted commit position value. `table_entry` points to
642+
* a triplet in lookup table. The first 4 bytes of each
643+
* triplet (pointed by `table_entry`) are compared with `*commit_pos`.
644+
*/
645+
static int triplet_cmp(const void *commit_pos, const void *table_entry)
646+
{
647+
648+
uint32_t a = *(uint32_t *)commit_pos;
649+
uint32_t b = get_be32(table_entry);
650+
if (a > b)
651+
return 1;
652+
else if (a < b)
653+
return -1;
654+
655+
return 0;
656+
}
657+
658+
static uint32_t bitmap_bsearch_pos(struct bitmap_index *bitmap_git,
659+
struct object_id *oid,
660+
uint32_t *result)
661+
{
662+
int found;
663+
664+
if (bitmap_is_midx(bitmap_git))
665+
found = bsearch_midx(oid, bitmap_git->midx, result);
666+
else
667+
found = bsearch_pack(oid, bitmap_git->pack, result);
668+
669+
return found;
670+
}
671+
672+
/*
673+
* `bsearch_triplet_by_pos` function searches for the raw triplet
674+
* having commit position same as `commit_pos` and fills `triplet`
675+
* object from the raw triplet. Returns 1 on success and 0 on
676+
* failure.
677+
*/
678+
static int bitmap_bsearch_triplet_by_pos(uint32_t commit_pos,
679+
struct bitmap_index *bitmap_git,
680+
struct bitmap_lookup_table_triplet *triplet)
681+
{
682+
unsigned char *p = bsearch(&commit_pos, bitmap_git->table_lookup, bitmap_git->entry_count,
683+
BITMAP_LOOKUP_TABLE_TRIPLET_WIDTH, triplet_cmp);
684+
685+
if (!p)
686+
return -1;
687+
688+
return bitmap_lookup_table_get_triplet_by_pointer(triplet, p);
689+
}
690+
691+
static struct stored_bitmap *lazy_bitmap_for_commit(struct bitmap_index *bitmap_git,
692+
struct commit *commit)
693+
{
694+
uint32_t commit_pos, xor_row;
695+
uint64_t offset;
696+
int flags;
697+
struct bitmap_lookup_table_triplet triplet;
698+
struct object_id *oid = &commit->object.oid;
699+
struct ewah_bitmap *bitmap;
700+
struct stored_bitmap *xor_bitmap = NULL;
701+
const int bitmap_header_size = 6;
702+
static struct bitmap_lookup_table_xor_item *xor_items = NULL;
703+
static size_t xor_items_nr = 0, xor_items_alloc = 0;
704+
static int is_corrupt = 0;
705+
int xor_flags;
706+
khiter_t hash_pos;
707+
struct bitmap_lookup_table_xor_item *xor_item;
708+
709+
if (is_corrupt)
710+
return NULL;
711+
712+
if (!bitmap_bsearch_pos(bitmap_git, oid, &commit_pos))
713+
return NULL;
714+
715+
if (bitmap_bsearch_triplet_by_pos(commit_pos, bitmap_git, &triplet) < 0)
716+
return NULL;
717+
718+
xor_items_nr = 0;
719+
offset = triplet.offset;
720+
xor_row = triplet.xor_row;
721+
722+
while (xor_row != 0xffffffff) {
723+
ALLOC_GROW(xor_items, xor_items_nr + 1, xor_items_alloc);
724+
725+
if (xor_items_nr + 1 >= bitmap_git->entry_count) {
726+
error(_("corrupt bitmap lookup table: xor chain exceed entry count"));
727+
goto corrupt;
728+
}
729+
730+
if (bitmap_lookup_table_get_triplet(bitmap_git, xor_row, &triplet) < 0)
731+
goto corrupt;
732+
733+
xor_item = &xor_items[xor_items_nr];
734+
xor_item->offset = triplet.offset;
735+
736+
if (nth_bitmap_object_oid(bitmap_git, &xor_item->oid, triplet.commit_pos) < 0) {
737+
error(_("corrupt bitmap lookup table: commit index %u out of range"),
738+
triplet.commit_pos);
739+
goto corrupt;
740+
}
741+
742+
hash_pos = kh_get_oid_map(bitmap_git->bitmaps, xor_item->oid);
743+
744+
/*
745+
* If desired bitmap is already stored, we don't need
746+
* to iterate further. Because we know that bitmaps
747+
* that are needed to be parsed to parse this bitmap
748+
* has already been stored. So, assign this stored bitmap
749+
* to the xor_bitmap.
750+
*/
751+
if (hash_pos < kh_end(bitmap_git->bitmaps) &&
752+
(xor_bitmap = kh_value(bitmap_git->bitmaps, hash_pos)))
753+
break;
754+
xor_items_nr++;
755+
xor_row = triplet.xor_row;
756+
}
757+
758+
while (xor_items_nr) {
759+
xor_item = &xor_items[xor_items_nr - 1];
760+
bitmap_git->map_pos = xor_item->offset;
761+
if (bitmap_git->map_size - bitmap_git->map_pos < bitmap_header_size) {
762+
error(_("corrupt ewah bitmap: truncated header for bitmap of commit \"%s\""),
763+
oid_to_hex(&xor_item->oid));
764+
goto corrupt;
765+
}
766+
767+
bitmap_git->map_pos += sizeof(uint32_t) + sizeof(uint8_t);
768+
xor_flags = read_u8(bitmap_git->map, &bitmap_git->map_pos);
769+
bitmap = read_bitmap_1(bitmap_git);
770+
771+
if (!bitmap)
772+
goto corrupt;
773+
774+
xor_bitmap = store_bitmap(bitmap_git, bitmap, &xor_item->oid, xor_bitmap, xor_flags);
775+
xor_items_nr--;
776+
}
777+
778+
bitmap_git->map_pos = offset;
779+
if (bitmap_git->map_size - bitmap_git->map_pos < bitmap_header_size) {
780+
error(_("corrupt ewah bitmap: truncated header for bitmap of commit \"%s\""),
781+
oid_to_hex(oid));
782+
goto corrupt;
783+
}
784+
785+
/*
786+
* Don't bother reading the commit's index position or its xor
787+
* offset:
788+
*
789+
* - The commit's index position is irrelevant to us, since
790+
* load_bitmap_entries_v1 only uses it to learn the object
791+
* id which is used to compute the hashmap's key. We already
792+
* have an object id, so no need to look it up again.
793+
*
794+
* - The xor_offset is unusable for us, since it specifies how
795+
* many entries previous to ours we should look at. This
796+
* makes sense when reading the bitmaps sequentially (as in
797+
* load_bitmap_entries_v1()), since we can keep track of
798+
* each bitmap as we read them.
799+
*
800+
* But it can't work for us, since the bitmap's don't have a
801+
* fixed size. So we learn the position of the xor'd bitmap
802+
* from the commit table (and resolve it to a bitmap in the
803+
* above if-statement).
804+
*
805+
* Instead, we can skip ahead and immediately read the flags and
806+
* ewah bitmap.
807+
*/
808+
bitmap_git->map_pos += sizeof(uint32_t) + sizeof(uint8_t);
809+
flags = read_u8(bitmap_git->map, &bitmap_git->map_pos);
810+
bitmap = read_bitmap_1(bitmap_git);
811+
812+
if (!bitmap)
813+
goto corrupt;
814+
815+
return store_bitmap(bitmap_git, bitmap, oid, xor_bitmap, flags);
816+
817+
corrupt:
818+
free(xor_items);
819+
is_corrupt = 1;
820+
return NULL;
821+
}
822+
573823
struct ewah_bitmap *bitmap_for_commit(struct bitmap_index *bitmap_git,
574824
struct commit *commit)
575825
{
576826
khiter_t hash_pos = kh_get_oid_map(bitmap_git->bitmaps,
577827
commit->object.oid);
578-
if (hash_pos >= kh_end(bitmap_git->bitmaps))
579-
return NULL;
828+
if (hash_pos >= kh_end(bitmap_git->bitmaps)) {
829+
struct stored_bitmap *bitmap = NULL;
830+
if (!bitmap_git->table_lookup)
831+
return NULL;
832+
833+
trace2_region_enter("pack-bitmap", "reading_lookup_table", the_repository);
834+
/* NEEDSWORK: cache misses aren't recorded */
835+
bitmap = lazy_bitmap_for_commit(bitmap_git, commit);
836+
trace2_region_leave("pack-bitmap", "reading_lookup_table", the_repository);
837+
if (!bitmap)
838+
return NULL;
839+
return lookup_stored_bitmap(bitmap);
840+
}
580841
return lookup_stored_bitmap(kh_value(bitmap_git->bitmaps, hash_pos));
581842
}
582843

@@ -1712,8 +1973,10 @@ void test_bitmap_walk(struct rev_info *revs)
17121973
if (revs->pending.nr != 1)
17131974
die(_("you must specify exactly one commit to test"));
17141975

1715-
fprintf_ln(stderr, "Bitmap v%d test (%d entries loaded)",
1716-
bitmap_git->version, bitmap_git->entry_count);
1976+
fprintf_ln(stderr, "Bitmap v%d test (%d entries%s)",
1977+
bitmap_git->version,
1978+
bitmap_git->entry_count,
1979+
bitmap_git->table_lookup ? "" : " loaded");
17171980

17181981
root = revs->pending.objects[0].item;
17191982
bm = bitmap_for_commit(bitmap_git, (struct commit *)root);
@@ -1766,13 +2029,22 @@ void test_bitmap_walk(struct rev_info *revs)
17662029

17672030
int test_bitmap_commits(struct repository *r)
17682031
{
1769-
struct bitmap_index *bitmap_git = prepare_bitmap_git(r);
17702032
struct object_id oid;
17712033
MAYBE_UNUSED void *value;
2034+
struct bitmap_index *bitmap_git = prepare_bitmap_git(r);
17722035

17732036
if (!bitmap_git)
17742037
die(_("failed to load bitmap indexes"));
17752038

2039+
/*
2040+
* As this function is only used to print bitmap selected
2041+
* commits, we don't have to read the commit table.
2042+
*/
2043+
if (bitmap_git->table_lookup) {
2044+
if (load_bitmap_entries_v1(bitmap_git) < 0)
2045+
die(_("failed to load bitmap indexes"));
2046+
}
2047+
17762048
kh_foreach(bitmap_git->bitmaps, oid, value, {
17772049
printf_ln("%s", oid_to_hex(&oid));
17782050
});

pack-bitmap.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,15 @@ struct bitmap_disk_header {
2323

2424
#define NEEDS_BITMAP (1u<<22)
2525

26+
/*
27+
* The width in bytes of a single triplet in the lookup table
28+
* extension:
29+
* (commit_pos, offset, xor_row)
30+
*
31+
* whose fields ar 32-, 64-, 32- bits wide, respectively.
32+
*/
33+
#define BITMAP_LOOKUP_TABLE_TRIPLET_WIDTH (16)
34+
2635
enum pack_bitmap_opts {
2736
BITMAP_OPT_FULL_DAG = 0x1,
2837
BITMAP_OPT_HASH_CACHE = 0x4,

0 commit comments

Comments
 (0)