Skip to content

Commit 8518ff8

Browse files
Kirill Smelkovgitster
authored andcommitted
combine-diff: optimize combine_diff_path sets intersection
When generating combined diff, for each commit, we intersect diff paths from diff(parent_0,commit) to diff(parent_i,commit) comparing all paths pairs, i.e. doing it the quadratic way. That is correct, but could be optimized. Paths come from trees in sorted (= tree) order, and so does diff_tree() emits resulting paths in that order too. Now if we look at diffcore transformations, all of them, except diffcore_order, preserve resulting path ordering: - skip_stat_unmatch, grep, pickaxe, filter -- just skip elements -> order stays preserved - break -- just breaks diff for a path, adding path dup after the path -> order stays preserved - detect rename/copy -- resulting paths are emitted sorted (verified empirically) So only diffcore_order changes diff paths ordering. But diffcore_order meaning affects only presentation - i.e. only how to show the diff, so we could do all the internal computations without paths reordering, and order only resultant paths set. This is faster, since, if we know two paths sets are all ordered, their intersection could be done in linear time. This patch does just that. Timings for `git log --raw --no-abbrev --no-renames` without `-c` ("git log") and with `-c` ("git log -c") before and after the patch are as follows: linux.git v3.10..v3.11 log log -c before 1.9s 20.4s after 1.9s 16.6s navy.git (private repo) log log -c before 0.83s 15.6s after 0.83s 2.1s P.S. I think linux.git case is sped up not so much as the second one, since in navy.git, there are more exotic (subtree, etc) merges. P.P.S. My tracing showed that the rest of the time (16.6s vs 1.9s) is usually spent in computing huge diffs from commit to second parent. Will try to deal with it, if I'll have time. P.P.P.S. For combine_diff_path, ->len is not needed anymore - will remove it in the next noisy cleanup path, to maintain good signal/noise ratio here. Signed-off-by: Kirill Smelkov <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 91921ce commit 8518ff8

File tree

1 file changed

+73
-21
lines changed

1 file changed

+73
-21
lines changed

combine-diff.c

Lines changed: 73 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515
static struct combine_diff_path *intersect_paths(struct combine_diff_path *curr, int n, int num_parent)
1616
{
1717
struct diff_queue_struct *q = &diff_queued_diff;
18-
struct combine_diff_path *p;
19-
int i;
18+
struct combine_diff_path *p, *pprev, *ptmp;
19+
int i, cmp;
2020

2121
if (!n) {
2222
struct combine_diff_path *list = NULL, **tail = &list;
@@ -47,28 +47,43 @@ static struct combine_diff_path *intersect_paths(struct combine_diff_path *curr,
4747
return list;
4848
}
4949

50-
for (p = curr; p; p = p->next) {
51-
int found = 0;
52-
if (!p->len)
50+
/*
51+
* NOTE paths are coming sorted here (= in tree order)
52+
*/
53+
54+
pprev = NULL;
55+
p = curr;
56+
i = 0;
57+
58+
while (1) {
59+
if (!p)
60+
break;
61+
62+
cmp = (i >= q->nr) ? -1
63+
: strcmp(p->path, q->queue[i]->two->path);
64+
if (cmp < 0) {
65+
if (pprev)
66+
pprev->next = p->next;
67+
ptmp = p;
68+
p = p->next;
69+
free(ptmp);
70+
if (curr == ptmp)
71+
curr = p;
5372
continue;
54-
for (i = 0; i < q->nr; i++) {
55-
const char *path;
56-
int len;
73+
}
5774

58-
if (diff_unmodified_pair(q->queue[i]))
59-
continue;
60-
path = q->queue[i]->two->path;
61-
len = strlen(path);
62-
if (len == p->len && !memcmp(path, p->path, len)) {
63-
found = 1;
64-
hashcpy(p->parent[n].sha1, q->queue[i]->one->sha1);
65-
p->parent[n].mode = q->queue[i]->one->mode;
66-
p->parent[n].status = q->queue[i]->status;
67-
break;
68-
}
75+
if (cmp > 0) {
76+
i++;
77+
continue;
6978
}
70-
if (!found)
71-
p->len = 0;
79+
80+
hashcpy(p->parent[n].sha1, q->queue[i]->one->sha1);
81+
p->parent[n].mode = q->queue[i]->one->mode;
82+
p->parent[n].status = q->queue[i]->status;
83+
84+
pprev = p;
85+
p = p->next;
86+
i++;
7287
}
7388
return curr;
7489
}
@@ -1295,6 +1310,13 @@ static void handle_combined_callback(struct diff_options *opt,
12951310
free(q.queue);
12961311
}
12971312

1313+
static const char *path_path(void *obj)
1314+
{
1315+
struct combine_diff_path *path = (struct combine_diff_path *)obj;
1316+
1317+
return path->path;
1318+
}
1319+
12981320
void diff_tree_combined(const unsigned char *sha1,
12991321
const struct sha1_array *parents,
13001322
int dense,
@@ -1310,6 +1332,8 @@ void diff_tree_combined(const unsigned char *sha1,
13101332
diffopts.output_format = DIFF_FORMAT_NO_OUTPUT;
13111333
DIFF_OPT_SET(&diffopts, RECURSIVE);
13121334
DIFF_OPT_CLR(&diffopts, ALLOW_EXTERNAL);
1335+
/* tell diff_tree to emit paths in sorted (=tree) order */
1336+
diffopts.orderfile = NULL;
13131337

13141338
show_log_first = !!rev->loginfo && !rev->no_commit_id;
13151339
needsep = 0;
@@ -1335,6 +1359,13 @@ void diff_tree_combined(const unsigned char *sha1,
13351359
printf("%s%c", diff_line_prefix(opt),
13361360
opt->line_termination);
13371361
}
1362+
1363+
/* if showing diff, show it in requested order */
1364+
if (diffopts.output_format != DIFF_FORMAT_NO_OUTPUT &&
1365+
opt->orderfile) {
1366+
diffcore_order(opt->orderfile);
1367+
}
1368+
13381369
diff_flush(&diffopts);
13391370
}
13401371

@@ -1343,6 +1374,27 @@ void diff_tree_combined(const unsigned char *sha1,
13431374
if (p->len)
13441375
num_paths++;
13451376
}
1377+
1378+
/* order paths according to diffcore_order */
1379+
if (opt->orderfile && num_paths) {
1380+
struct obj_order *o;
1381+
1382+
o = xmalloc(sizeof(*o) * num_paths);
1383+
for (i = 0, p = paths; p; p = p->next, i++)
1384+
o[i].obj = p;
1385+
order_objects(opt->orderfile, path_path, o, num_paths);
1386+
for (i = 0; i < num_paths - 1; i++) {
1387+
p = o[i].obj;
1388+
p->next = o[i+1].obj;
1389+
}
1390+
1391+
p = o[num_paths-1].obj;
1392+
p->next = NULL;
1393+
paths = o[0].obj;
1394+
free(o);
1395+
}
1396+
1397+
13461398
if (num_paths) {
13471399
if (opt->output_format & (DIFF_FORMAT_RAW |
13481400
DIFF_FORMAT_NAME |

0 commit comments

Comments
 (0)