Skip to content

Commit 018b9de

Browse files
peffgitster
authored andcommitted
pretty: lazy-load commit data when expanding user-format
When we expand a user-format, we try to avoid work that isn't necessary for the output. For instance, we don't bother parsing the commit header until we know we need the author, subject, etc. But we do always load the commit object's contents from disk, even if the format doesn't require it (e.g., just "%H"). Traditionally this didn't matter much, because we'd have loaded it as part of the traversal anyway, and we'd typically have those bytes attached to the commit struct (or these days, cached in a commit-slab). But when we have a commit-graph, we might easily get to the point of pretty-printing a commit without ever having looked at the actual object contents. We should push off that load (and reencoding) until we're certain that it's needed. I think the results of p4205 show the advantage pretty clearly (we serve parent and tree oids out of the commit struct itself, so they benefit as well): # using git.git as the test repo Test HEAD^ HEAD ---------------------------------------------------------------------- 4205.1: log with %H 0.40(0.39+0.01) 0.03(0.02+0.01) -92.5% 4205.2: log with %h 0.45(0.44+0.01) 0.09(0.09+0.00) -80.0% 4205.3: log with %T 0.40(0.39+0.00) 0.04(0.04+0.00) -90.0% 4205.4: log with %t 0.46(0.46+0.00) 0.09(0.08+0.01) -80.4% 4205.5: log with %P 0.39(0.39+0.00) 0.03(0.03+0.00) -92.3% 4205.6: log with %p 0.46(0.46+0.00) 0.10(0.09+0.00) -78.3% 4205.7: log with %h-%h-%h 0.52(0.51+0.01) 0.15(0.14+0.00) -71.2% 4205.8: log with %an-%ae-%s 0.42(0.41+0.00) 0.42(0.41+0.01) +0.0% # using linux.git as the test repo Test HEAD^ HEAD ---------------------------------------------------------------------- 4205.1: log with %H 7.12(6.97+0.14) 0.76(0.65+0.11) -89.3% 4205.2: log with %h 7.35(7.19+0.16) 1.30(1.19+0.11) -82.3% 4205.3: log with %T 7.58(7.42+0.15) 1.02(0.94+0.08) -86.5% 4205.4: log with %t 8.05(7.89+0.15) 1.55(1.41+0.13) -80.7% 4205.5: log with %P 7.12(7.01+0.10) 0.76(0.69+0.07) -89.3% 4205.6: log with %p 7.38(7.27+0.10) 1.32(1.20+0.12) -82.1% 4205.7: log with %h-%h-%h 7.81(7.67+0.13) 1.79(1.67+0.12) -77.1% 4205.8: log with %an-%ae-%s 7.90(7.74+0.15) 7.81(7.66+0.15) -1.1% I added the final test to show where we don't improve (the 1% there is just lucky noise), but also as a regression test to make sure we're not doing anything stupid like loading the commit multiple times when there are several placeholders that need it. Reported-by: Michael Haggerty <[email protected]> Signed-off-by: Jeff King <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent e636282 commit 018b9de

File tree

2 files changed

+13
-12
lines changed

2 files changed

+13
-12
lines changed

pretty.c

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -783,6 +783,7 @@ enum trunc_type {
783783
};
784784

785785
struct format_commit_context {
786+
struct repository *repository;
786787
const struct commit *commit;
787788
const struct pretty_print_context *pretty_ctx;
788789
unsigned commit_header_parsed:1;
@@ -1373,10 +1374,13 @@ static size_t format_commit_one(struct strbuf *sb, /* in UTF-8 */
13731374
return 2;
13741375
}
13751376

1376-
13771377
/* For the rest we have to parse the commit header. */
1378-
if (!c->commit_header_parsed)
1378+
if (!c->commit_header_parsed) {
1379+
msg = c->message =
1380+
repo_logmsg_reencode(c->repository, commit,
1381+
&c->commit_encoding, "UTF-8");
13791382
parse_commit_header(c);
1383+
}
13801384

13811385
switch (placeholder[0]) {
13821386
case 'a': /* author ... */
@@ -1667,25 +1671,22 @@ void repo_format_commit_message(struct repository *r,
16671671
const struct pretty_print_context *pretty_ctx)
16681672
{
16691673
struct format_commit_context context = {
1674+
.repository = r,
16701675
.commit = commit,
16711676
.pretty_ctx = pretty_ctx,
16721677
.wrap_start = sb->len
16731678
};
16741679
const char *output_enc = pretty_ctx->output_encoding;
16751680
const char *utf8 = "UTF-8";
16761681

1677-
/*
1678-
* convert a commit message to UTF-8 first
1679-
* as far as 'format_commit_item' assumes it in UTF-8
1680-
*/
1681-
context.message = repo_logmsg_reencode(r, commit,
1682-
&context.commit_encoding,
1683-
utf8);
1684-
16851682
strbuf_expand(sb, format, format_commit_item, &context);
16861683
rewrap_message_tail(sb, &context, 0, 0, 0);
16871684

1688-
/* then convert a commit message to an actual output encoding */
1685+
/*
1686+
* Convert output to an actual output encoding; note that
1687+
* format_commit_item() will always use UTF-8, so we don't
1688+
* have to bother if that's what the output wants.
1689+
*/
16891690
if (output_enc) {
16901691
if (same_encoding(utf8, output_enc))
16911692
output_enc = NULL;

t/perf/p4205-log-pretty-formats.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ test_description='Tests the performance of various pretty format placeholders'
66

77
test_perf_default_repo
88

9-
for format in %H %h %T %t %P %p %h-%h-%h
9+
for format in %H %h %T %t %P %p %h-%h-%h %an-%ae-%s
1010
do
1111
test_perf "log with $format" "
1212
git log --format=\"$format\" >/dev/null

0 commit comments

Comments
 (0)