Skip to content

Commit 7fc3fc8

Browse files
committed
survey: summarize total sizes by object type
Now that we have explored objects by count, we can expand that a bit more to summarize the data for the on-disk and inflated size of those objects. This information is helpful for diagnosing both why disk space (and perhaps clone or fetch times) is growing but also why certain operations are slow because the inflated size of the abstract objects that must be processed is so large. Signed-off-by: Derrick Stolee <[email protected]>
1 parent 846c8a6 commit 7fc3fc8

File tree

2 files changed

+134
-0
lines changed

2 files changed

+134
-0
lines changed

builtin/survey.c

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,15 +60,36 @@ struct survey_report_object_summary {
6060
size_t blobs_nr;
6161
};
6262

63+
/**
64+
* For some category given by 'label', count the number of objects
65+
* that match that label along with the on-disk size and the size
66+
* after decompressing (both with delta bases and zlib).
67+
*/
68+
struct survey_report_object_size_summary {
69+
char *label;
70+
size_t nr;
71+
size_t disk_size;
72+
size_t inflated_size;
73+
size_t num_missing;
74+
};
75+
6376
/**
6477
* This struct contains all of the information that needs to be printed
6578
* at the end of the exploration of the repository and its references.
6679
*/
6780
struct survey_report {
6881
struct survey_report_ref_summary refs;
6982
struct survey_report_object_summary reachable_objects;
83+
84+
struct survey_report_object_size_summary *by_type;
7085
};
7186

87+
#define REPORT_TYPE_COMMIT 0
88+
#define REPORT_TYPE_TREE 1
89+
#define REPORT_TYPE_BLOB 2
90+
#define REPORT_TYPE_TAG 3
91+
#define REPORT_TYPE_COUNT 4
92+
7293
struct survey_context {
7394
struct repository *repo;
7495

@@ -278,12 +299,41 @@ static void survey_report_plaintext_reachable_object_summary(struct survey_conte
278299
clear_table(&table);
279300
}
280301

302+
static void survey_report_object_sizes(const char *title,
303+
const char *categories,
304+
struct survey_report_object_size_summary *summary,
305+
size_t summary_nr)
306+
{
307+
struct survey_table table = SURVEY_TABLE_INIT;
308+
table.table_name = title;
309+
310+
strvec_push(&table.header, xstrdup(categories));
311+
strvec_push(&table.header, xstrdup(_("Count")));
312+
strvec_push(&table.header, xstrdup(_("Disk Size")));
313+
strvec_push(&table.header, xstrdup(_("Inflated Size")));
314+
315+
for (size_t i = 0; i < summary_nr; i++) {
316+
insert_table_rowv(&table, xstrdup(summary[i].label),
317+
xstrfmt("%"PRIuMAX, summary[i].nr),
318+
xstrfmt("%"PRIuMAX, summary[i].disk_size),
319+
xstrfmt("%"PRIuMAX, summary[i].inflated_size),
320+
NULL);
321+
}
322+
323+
print_table_plaintext(&table);
324+
clear_table(&table);
325+
}
326+
281327
static void survey_report_plaintext(struct survey_context *ctx)
282328
{
283329
printf("GIT SURVEY for \"%s\"\n", ctx->repo->worktree);
284330
printf("-----------------------------------------------------\n");
285331
survey_report_plaintext_refs(ctx);
286332
survey_report_plaintext_reachable_object_summary(ctx);
333+
survey_report_object_sizes(_("TOTAL OBJECT SIZES BY TYPE"),
334+
_("Object Type"),
335+
ctx->report.by_type,
336+
REPORT_TYPE_COUNT);
287337
}
288338

289339
/*
@@ -496,6 +546,68 @@ static void increment_object_counts(
496546
}
497547
}
498548

549+
static void increment_totals(struct survey_context *ctx,
550+
struct oid_array *oids,
551+
struct survey_report_object_size_summary *summary)
552+
{
553+
for (size_t i = 0; i < oids->nr; i++) {
554+
struct object_info oi = OBJECT_INFO_INIT;
555+
unsigned oi_flags = OBJECT_INFO_FOR_PREFETCH;
556+
unsigned long object_length = 0;
557+
off_t disk_sizep = 0;
558+
enum object_type type;
559+
560+
oi.typep = &type;
561+
oi.sizep = &object_length;
562+
oi.disk_sizep = &disk_sizep;
563+
564+
if (oid_object_info_extended(ctx->repo, &oids->oid[i],
565+
&oi, oi_flags) < 0) {
566+
summary->num_missing++;
567+
} else {
568+
summary->nr++;
569+
summary->disk_size += disk_sizep;
570+
summary->inflated_size += object_length;
571+
}
572+
}
573+
}
574+
575+
static void increment_object_totals(struct survey_context *ctx,
576+
struct oid_array *oids,
577+
enum object_type type)
578+
{
579+
struct survey_report_object_size_summary *total;
580+
struct survey_report_object_size_summary summary = { 0 };
581+
582+
increment_totals(ctx, oids, &summary);
583+
584+
switch (type) {
585+
case OBJ_COMMIT:
586+
total = &ctx->report.by_type[REPORT_TYPE_COMMIT];
587+
break;
588+
589+
case OBJ_TREE:
590+
total = &ctx->report.by_type[REPORT_TYPE_TREE];
591+
break;
592+
593+
case OBJ_BLOB:
594+
total = &ctx->report.by_type[REPORT_TYPE_BLOB];
595+
break;
596+
597+
case OBJ_TAG:
598+
total = &ctx->report.by_type[REPORT_TYPE_TAG];
599+
break;
600+
601+
default:
602+
BUG("No other type allowed");
603+
}
604+
605+
total->nr += summary.nr;
606+
total->disk_size += summary.disk_size;
607+
total->inflated_size += summary.inflated_size;
608+
total->num_missing += summary.num_missing;
609+
}
610+
499611
static int survey_objects_path_walk_fn(const char *path,
500612
struct oid_array *oids,
501613
enum object_type type,
@@ -505,10 +617,20 @@ static int survey_objects_path_walk_fn(const char *path,
505617

506618
increment_object_counts(&ctx->report.reachable_objects,
507619
type, oids->nr);
620+
increment_object_totals(ctx, oids, type);
508621

509622
return 0;
510623
}
511624

625+
static void initialize_report(struct survey_context *ctx)
626+
{
627+
CALLOC_ARRAY(ctx->report.by_type, REPORT_TYPE_COUNT);
628+
ctx->report.by_type[REPORT_TYPE_COMMIT].label = xstrdup(_("Commits"));
629+
ctx->report.by_type[REPORT_TYPE_TREE].label = xstrdup(_("Trees"));
630+
ctx->report.by_type[REPORT_TYPE_BLOB].label = xstrdup(_("Blobs"));
631+
ctx->report.by_type[REPORT_TYPE_TAG].label = xstrdup(_("Tags"));
632+
}
633+
512634
static void survey_phase_objects(struct survey_context *ctx)
513635
{
514636
struct rev_info revs = REV_INFO_INIT;
@@ -521,12 +643,15 @@ static void survey_phase_objects(struct survey_context *ctx)
521643
info.path_fn = survey_objects_path_walk_fn;
522644
info.path_fn_data = ctx;
523645

646+
initialize_report(ctx);
647+
524648
repo_init_revisions(ctx->repo, &revs, "");
525649
revs.tag_objects = 1;
526650

527651
for (size_t i = 0; i < ctx->ref_array.nr; i++) {
528652
struct ref_array_item *item = ctx->ref_array.items[i];
529653
add_pending_oid(&revs, NULL, &item->objectname, add_flags);
654+
display_progress(ctx->progress, ++(ctx->progress_nr));
530655
}
531656

532657
walk_objects_by_path(&info);

t/t8100-git-survey.sh

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,15 @@ test_expect_success 'git survey (default)' '
5050
Commits | 10
5151
Trees | 10
5252
Blobs | 10
53+
54+
TOTAL OBJECT SIZES BY TYPE
55+
===============================================
56+
Object Type | Count | Disk Size | Inflated Size
57+
------------+-------+-----------+--------------
58+
Commits | 10 | 1523 | 2153
59+
Trees | 10 | 495 | 1706
60+
Blobs | 10 | 191 | 101
61+
Tags | 4 | 510 | 528
5362
EOF
5463
5564
test_cmp expect out

0 commit comments

Comments
 (0)