Skip to content

Commit 5ae18df

Browse files
jeffhostetlergitster
authored andcommitted
midx: during verify group objects by packfile to speed verification
Teach `multi-pack-index verify` to sort the set of object by packfile so that only one packfile needs to be open at a time. This is a performance improvement. Previously, objects were verified in OID order. This essentially requires all packfiles to be open at the same time. If the number of packfiles exceeds the open file limit, packfiles would be LRU-closed and re-opened many times. Signed-off-by: Jeff Hostetler <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 430efb8 commit 5ae18df

File tree

3 files changed

+49
-4
lines changed

3 files changed

+49
-4
lines changed

midx.c

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -962,6 +962,20 @@ static void midx_report(const char *fmt, ...)
962962
va_end(ap);
963963
}
964964

965+
struct pair_pos_vs_id
966+
{
967+
uint32_t pos;
968+
uint32_t pack_int_id;
969+
};
970+
971+
static int compare_pair_pos_vs_id(const void *_a, const void *_b)
972+
{
973+
struct pair_pos_vs_id *a = (struct pair_pos_vs_id *)_a;
974+
struct pair_pos_vs_id *b = (struct pair_pos_vs_id *)_b;
975+
976+
return b->pack_int_id - a->pack_int_id;
977+
}
978+
965979
/*
966980
* Limit calls to display_progress() for performance reasons.
967981
* The interval here was arbitrarily chosen.
@@ -976,6 +990,7 @@ static void midx_report(const char *fmt, ...)
976990

977991
int verify_midx_file(const char *object_dir)
978992
{
993+
struct pair_pos_vs_id *pairs = NULL;
979994
uint32_t i;
980995
struct progress *progress;
981996
struct multi_pack_index *m = load_multi_pack_index(object_dir, 1);
@@ -1019,16 +1034,42 @@ int verify_midx_file(const char *object_dir)
10191034
}
10201035
stop_progress(&progress);
10211036

1037+
/*
1038+
* Create an array mapping each object to its packfile id. Sort it
1039+
* to group the objects by packfile. Use this permutation to visit
1040+
* each of the objects and only require 1 packfile to be open at a
1041+
* time.
1042+
*/
1043+
ALLOC_ARRAY(pairs, m->num_objects);
1044+
for (i = 0; i < m->num_objects; i++) {
1045+
pairs[i].pos = i;
1046+
pairs[i].pack_int_id = nth_midxed_pack_int_id(m, i);
1047+
}
1048+
1049+
progress = start_sparse_progress(_("Sorting objects by packfile"),
1050+
m->num_objects);
1051+
display_progress(progress, 0); /* TODO: Measure QSORT() progress */
1052+
QSORT(pairs, m->num_objects, compare_pair_pos_vs_id);
1053+
stop_progress(&progress);
1054+
10221055
progress = start_sparse_progress(_("Verifying object offsets"), m->num_objects);
10231056
for (i = 0; i < m->num_objects; i++) {
10241057
struct object_id oid;
10251058
struct pack_entry e;
10261059
off_t m_offset, p_offset;
10271060

1028-
nth_midxed_object_oid(&oid, m, i);
1061+
if (i > 0 && pairs[i-1].pack_int_id != pairs[i].pack_int_id &&
1062+
m->packs[pairs[i-1].pack_int_id])
1063+
{
1064+
close_pack_fd(m->packs[pairs[i-1].pack_int_id]);
1065+
close_pack_index(m->packs[pairs[i-1].pack_int_id]);
1066+
}
1067+
1068+
nth_midxed_object_oid(&oid, m, pairs[i].pos);
1069+
10291070
if (!fill_midx_entry(&oid, &e, m)) {
10301071
midx_report(_("failed to load pack entry for oid[%d] = %s"),
1031-
i, oid_to_hex(&oid));
1072+
pairs[i].pos, oid_to_hex(&oid));
10321073
continue;
10331074
}
10341075

@@ -1043,11 +1084,13 @@ int verify_midx_file(const char *object_dir)
10431084

10441085
if (m_offset != p_offset)
10451086
midx_report(_("incorrect object offset for oid[%d] = %s: %"PRIx64" != %"PRIx64),
1046-
i, oid_to_hex(&oid), m_offset, p_offset);
1087+
pairs[i].pos, oid_to_hex(&oid), m_offset, p_offset);
10471088

10481089
midx_display_sparse_progress(progress, i + 1);
10491090
}
10501091
stop_progress(&progress);
10511092

1093+
free(pairs);
1094+
10521095
return verify_midx_error;
10531096
}

packfile.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,7 @@ void close_pack_windows(struct packed_git *p)
309309
}
310310
}
311311

312-
static int close_pack_fd(struct packed_git *p)
312+
int close_pack_fd(struct packed_git *p)
313313
{
314314
if (p->pack_fd < 0)
315315
return 0;

packfile.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@ extern int open_pack_index(struct packed_git *);
7676
*/
7777
extern void close_pack_index(struct packed_git *);
7878

79+
int close_pack_fd(struct packed_git *p);
80+
7981
extern uint32_t get_pack_fanout(struct packed_git *p, uint32_t value);
8082

8183
extern unsigned char *use_pack(struct packed_git *, struct pack_window **, off_t, unsigned long *);

0 commit comments

Comments
 (0)