Skip to content

Commit 2ac6c93

Browse files
committed
Merge branch 'jh/midx-verify-too-many-packs' into next
"git multi-pack-index verify" did not scale well with the number of packfiles, which is being improved. * jh/midx-verify-too-many-packs: midx: during verify group objects by packfile to speed verification midx: add progress indicators in multi-pack-index verify trace2:data: add trace2 data to midx progress: add sparse mode to force 100% complete message
2 parents b0dfa10 + 5ae18df commit 2ac6c93

File tree

6 files changed

+118
-9
lines changed

6 files changed

+118
-9
lines changed

builtin/multi-pack-index.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include "config.h"
44
#include "parse-options.h"
55
#include "midx.h"
6+
#include "trace2.h"
67

78
static char const * const builtin_multi_pack_index_usage[] = {
89
N_("git multi-pack-index [--object-dir=<dir>] (write|verify)"),
@@ -40,6 +41,8 @@ int cmd_multi_pack_index(int argc, const char **argv,
4041
return 1;
4142
}
4243

44+
trace2_cmd_mode(argv[0]);
45+
4346
if (!strcmp(argv[0], "write"))
4447
return write_midx_file(opts.object_dir);
4548
if (!strcmp(argv[0], "verify"))

midx.c

Lines changed: 74 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include "sha1-lookup.h"
99
#include "midx.h"
1010
#include "progress.h"
11+
#include "trace2.h"
1112

1213
#define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */
1314
#define MIDX_VERSION 1
@@ -164,6 +165,9 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local
164165
m->pack_names[i]);
165166
}
166167

168+
trace2_data_intmax("midx", the_repository, "load/num_packs", m->num_packs);
169+
trace2_data_intmax("midx", the_repository, "load/num_objects", m->num_objects);
170+
167171
return m;
168172

169173
cleanup_fail:
@@ -958,8 +962,35 @@ static void midx_report(const char *fmt, ...)
958962
va_end(ap);
959963
}
960964

965+
struct pair_pos_vs_id
966+
{
967+
uint32_t pos;
968+
uint32_t pack_int_id;
969+
};
970+
971+
static int compare_pair_pos_vs_id(const void *_a, const void *_b)
972+
{
973+
struct pair_pos_vs_id *a = (struct pair_pos_vs_id *)_a;
974+
struct pair_pos_vs_id *b = (struct pair_pos_vs_id *)_b;
975+
976+
return b->pack_int_id - a->pack_int_id;
977+
}
978+
979+
/*
980+
* Limit calls to display_progress() for performance reasons.
981+
* The interval here was arbitrarily chosen.
982+
*/
983+
#define SPARSE_PROGRESS_INTERVAL (1 << 12)
984+
#define midx_display_sparse_progress(progress, n) \
985+
do { \
986+
uint64_t _n = (n); \
987+
if ((_n & (SPARSE_PROGRESS_INTERVAL - 1)) == 0) \
988+
display_progress(progress, _n); \
989+
} while (0)
990+
961991
int verify_midx_file(const char *object_dir)
962992
{
993+
struct pair_pos_vs_id *pairs = NULL;
963994
uint32_t i;
964995
struct progress *progress;
965996
struct multi_pack_index *m = load_multi_pack_index(object_dir, 1);
@@ -968,10 +999,15 @@ int verify_midx_file(const char *object_dir)
968999
if (!m)
9691000
return 0;
9701001

1002+
progress = start_progress(_("Looking for referenced packfiles"),
1003+
m->num_packs);
9711004
for (i = 0; i < m->num_packs; i++) {
9721005
if (prepare_midx_pack(m, i))
9731006
midx_report("failed to load pack in position %d", i);
1007+
1008+
display_progress(progress, i + 1);
9741009
}
1010+
stop_progress(&progress);
9751011

9761012
for (i = 0; i < 255; i++) {
9771013
uint32_t oid_fanout1 = ntohl(m->chunk_oid_fanout[i]);
@@ -982,6 +1018,8 @@ int verify_midx_file(const char *object_dir)
9821018
i, oid_fanout1, oid_fanout2, i + 1);
9831019
}
9841020

1021+
progress = start_sparse_progress(_("Verifying OID order in MIDX"),
1022+
m->num_objects - 1);
9851023
for (i = 0; i < m->num_objects - 1; i++) {
9861024
struct object_id oid1, oid2;
9871025

@@ -991,18 +1029,47 @@ int verify_midx_file(const char *object_dir)
9911029
if (oidcmp(&oid1, &oid2) >= 0)
9921030
midx_report(_("oid lookup out of order: oid[%d] = %s >= %s = oid[%d]"),
9931031
i, oid_to_hex(&oid1), oid_to_hex(&oid2), i + 1);
1032+
1033+
midx_display_sparse_progress(progress, i + 1);
9941034
}
1035+
stop_progress(&progress);
9951036

996-
progress = start_progress(_("Verifying object offsets"), m->num_objects);
1037+
/*
1038+
* Create an array mapping each object to its packfile id. Sort it
1039+
* to group the objects by packfile. Use this permutation to visit
1040+
* each of the objects and only require 1 packfile to be open at a
1041+
* time.
1042+
*/
1043+
ALLOC_ARRAY(pairs, m->num_objects);
1044+
for (i = 0; i < m->num_objects; i++) {
1045+
pairs[i].pos = i;
1046+
pairs[i].pack_int_id = nth_midxed_pack_int_id(m, i);
1047+
}
1048+
1049+
progress = start_sparse_progress(_("Sorting objects by packfile"),
1050+
m->num_objects);
1051+
display_progress(progress, 0); /* TODO: Measure QSORT() progress */
1052+
QSORT(pairs, m->num_objects, compare_pair_pos_vs_id);
1053+
stop_progress(&progress);
1054+
1055+
progress = start_sparse_progress(_("Verifying object offsets"), m->num_objects);
9971056
for (i = 0; i < m->num_objects; i++) {
9981057
struct object_id oid;
9991058
struct pack_entry e;
10001059
off_t m_offset, p_offset;
10011060

1002-
nth_midxed_object_oid(&oid, m, i);
1061+
if (i > 0 && pairs[i-1].pack_int_id != pairs[i].pack_int_id &&
1062+
m->packs[pairs[i-1].pack_int_id])
1063+
{
1064+
close_pack_fd(m->packs[pairs[i-1].pack_int_id]);
1065+
close_pack_index(m->packs[pairs[i-1].pack_int_id]);
1066+
}
1067+
1068+
nth_midxed_object_oid(&oid, m, pairs[i].pos);
1069+
10031070
if (!fill_midx_entry(&oid, &e, m)) {
10041071
midx_report(_("failed to load pack entry for oid[%d] = %s"),
1005-
i, oid_to_hex(&oid));
1072+
pairs[i].pos, oid_to_hex(&oid));
10061073
continue;
10071074
}
10081075

@@ -1017,11 +1084,13 @@ int verify_midx_file(const char *object_dir)
10171084

10181085
if (m_offset != p_offset)
10191086
midx_report(_("incorrect object offset for oid[%d] = %s: %"PRIx64" != %"PRIx64),
1020-
i, oid_to_hex(&oid), m_offset, p_offset);
1087+
pairs[i].pos, oid_to_hex(&oid), m_offset, p_offset);
10211088

1022-
display_progress(progress, i + 1);
1089+
midx_display_sparse_progress(progress, i + 1);
10231090
}
10241091
stop_progress(&progress);
10251092

1093+
free(pairs);
1094+
10261095
return verify_midx_error;
10271096
}

packfile.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,7 @@ void close_pack_windows(struct packed_git *p)
309309
}
310310
}
311311

312-
static int close_pack_fd(struct packed_git *p)
312+
int close_pack_fd(struct packed_git *p)
313313
{
314314
if (p->pack_fd < 0)
315315
return 0;

packfile.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@ extern int open_pack_index(struct packed_git *);
7676
*/
7777
extern void close_pack_index(struct packed_git *);
7878

79+
int close_pack_fd(struct packed_git *p);
80+
7981
extern uint32_t get_pack_fanout(struct packed_git *p, uint32_t value);
8082

8183
extern unsigned char *use_pack(struct packed_git *, struct pack_window **, off_t, unsigned long *);

progress.c

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ struct progress {
3434
uint64_t total;
3535
unsigned last_percent;
3636
unsigned delay;
37+
unsigned sparse;
3738
struct throughput *throughput;
3839
uint64_t start_ns;
3940
};
@@ -194,7 +195,7 @@ int display_progress(struct progress *progress, uint64_t n)
194195
}
195196

196197
static struct progress *start_progress_delay(const char *title, uint64_t total,
197-
unsigned delay)
198+
unsigned delay, unsigned sparse)
198199
{
199200
struct progress *progress = malloc(sizeof(*progress));
200201
if (!progress) {
@@ -208,6 +209,7 @@ static struct progress *start_progress_delay(const char *title, uint64_t total,
208209
progress->last_value = -1;
209210
progress->last_percent = -1;
210211
progress->delay = delay;
212+
progress->sparse = sparse;
211213
progress->throughput = NULL;
212214
progress->start_ns = getnanotime();
213215
set_progress_signal();
@@ -216,16 +218,46 @@ static struct progress *start_progress_delay(const char *title, uint64_t total,
216218

217219
struct progress *start_delayed_progress(const char *title, uint64_t total)
218220
{
219-
return start_progress_delay(title, total, 2);
221+
return start_progress_delay(title, total, 2, 0);
220222
}
221223

222224
struct progress *start_progress(const char *title, uint64_t total)
223225
{
224-
return start_progress_delay(title, total, 0);
226+
return start_progress_delay(title, total, 0, 0);
227+
}
228+
229+
/*
230+
* Here "sparse" means that the caller might use some sampling criteria to
231+
* decide when to call display_progress() rather than calling it for every
232+
* integer value in[0 .. total). In particular, the caller might not call
233+
* display_progress() for the last value in the range.
234+
*
235+
* When "sparse" is set, stop_progress() will automatically force the done
236+
* message to show 100%.
237+
*/
238+
struct progress *start_sparse_progress(const char *title, uint64_t total)
239+
{
240+
return start_progress_delay(title, total, 0, 1);
241+
}
242+
243+
struct progress *start_delayed_sparse_progress(const char *title,
244+
uint64_t total)
245+
{
246+
return start_progress_delay(title, total, 2, 1);
247+
}
248+
249+
static void finish_if_sparse(struct progress *progress)
250+
{
251+
if (progress &&
252+
progress->sparse &&
253+
progress->last_value != progress->total)
254+
display_progress(progress, progress->total);
225255
}
226256

227257
void stop_progress(struct progress **p_progress)
228258
{
259+
finish_if_sparse(*p_progress);
260+
229261
stop_progress_msg(p_progress, _("done"));
230262
}
231263

progress.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,10 @@ struct progress;
66
void display_throughput(struct progress *progress, uint64_t total);
77
int display_progress(struct progress *progress, uint64_t n);
88
struct progress *start_progress(const char *title, uint64_t total);
9+
struct progress *start_sparse_progress(const char *title, uint64_t total);
910
struct progress *start_delayed_progress(const char *title, uint64_t total);
11+
struct progress *start_delayed_sparse_progress(const char *title,
12+
uint64_t total);
1013
void stop_progress(struct progress **progress);
1114
void stop_progress_msg(struct progress **progress, const char *msg);
1215

0 commit comments

Comments
 (0)