Skip to content

Commit e29e114

Browse files
author
Junio C Hamano
committed
diffcore-delta: stop using deltifier for packing.
This switches the change estimation logic used by break, rename and copy detection from delta packing code to a more line oriented one. This way, thee performance-density tradeoff by delta packing code can be made without worrying about breaking the rename detection. Signed-off-by: Junio C Hamano <[email protected]>
1 parent 6541675 commit e29e114

File tree

1 file changed

+113
-28
lines changed

1 file changed

+113
-28
lines changed

diffcore-delta.c

Lines changed: 113 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,128 @@
11
#include "cache.h"
22
#include "diff.h"
33
#include "diffcore.h"
4-
#include "delta.h"
5-
#include "count-delta.h"
6-
7-
static int diffcore_count_changes_1(void *src, unsigned long src_size,
8-
void *dst, unsigned long dst_size,
9-
unsigned long delta_limit,
10-
unsigned long *src_copied,
11-
unsigned long *literal_added)
12-
{
13-
void *delta;
14-
unsigned long delta_size;
15-
16-
delta = diff_delta(src, src_size,
17-
dst, dst_size,
18-
&delta_size, delta_limit);
19-
if (!delta)
20-
/* If delta_limit is exceeded, we have too much differences */
21-
return -1;
224

23-
/* Estimate the edit size by interpreting delta. */
24-
if (count_delta(delta, delta_size, src_copied, literal_added)) {
25-
free(delta);
26-
return -1;
5+
struct linehash {
6+
unsigned long bytes;
7+
unsigned long hash;
8+
};
9+
10+
static unsigned long hash_extended_line(const unsigned char **buf_p,
11+
unsigned long left)
12+
{
13+
/* An extended line is zero or more whitespace letters (including LF)
14+
* followed by one non whitespace letter followed by zero or more
15+
* non LF, and terminated with by a LF (or EOF).
16+
*/
17+
const unsigned char *bol = *buf_p;
18+
const unsigned char *buf = bol;
19+
unsigned long hashval = 0;
20+
while (left) {
21+
unsigned c = *buf++;
22+
if (!c)
23+
goto binary;
24+
left--;
25+
if (' ' < c) {
26+
hashval = c;
27+
break;
28+
}
29+
}
30+
while (left) {
31+
unsigned c = *buf++;
32+
if (!c)
33+
goto binary;
34+
left--;
35+
if (c == '\n')
36+
break;
37+
if (' ' < c)
38+
hashval = hashval * 11 + c;
2739
}
28-
free(delta);
40+
*buf_p = buf;
41+
return hashval;
42+
43+
binary:
44+
*buf_p = NULL;
45+
return 0;
46+
}
47+
48+
static int linehash_compare(const void *a_, const void *b_)
49+
{
50+
struct linehash *a = (struct linehash *) a_;
51+
struct linehash *b = (struct linehash *) b_;
52+
if (a->hash < b->hash) return -1;
53+
if (a->hash > b->hash) return 1;
2954
return 0;
3055
}
3156

57+
static struct linehash *hash_lines(const unsigned char *buf,
58+
unsigned long size)
59+
{
60+
const unsigned char *eobuf = buf + size;
61+
struct linehash *line = NULL;
62+
int alloc = 0, used = 0;
63+
64+
while (buf < eobuf) {
65+
const unsigned char *ptr = buf;
66+
unsigned long hash = hash_extended_line(&buf, eobuf-ptr);
67+
if (!buf) {
68+
free(line);
69+
return NULL;
70+
}
71+
if (alloc <= used) {
72+
alloc = alloc_nr(alloc);
73+
line = xrealloc(line, sizeof(*line) * alloc);
74+
}
75+
line[used].bytes = buf - ptr;
76+
line[used].hash = hash;
77+
used++;
78+
}
79+
qsort(line, used, sizeof(*line), linehash_compare);
80+
81+
/* Terminate the list */
82+
if (alloc <= used)
83+
line = xrealloc(line, sizeof(*line) * (used+1));
84+
line[used].bytes = line[used].hash = 0;
85+
return line;
86+
}
87+
3288
int diffcore_count_changes(void *src, unsigned long src_size,
3389
void *dst, unsigned long dst_size,
3490
unsigned long delta_limit,
3591
unsigned long *src_copied,
3692
unsigned long *literal_added)
3793
{
38-
return diffcore_count_changes_1(src, src_size,
39-
dst, dst_size,
40-
delta_limit,
41-
src_copied,
42-
literal_added);
94+
struct linehash *src_lines, *dst_lines;
95+
unsigned long sc, la;
96+
97+
src_lines = hash_lines(src, src_size);
98+
if (!src_lines)
99+
return -1;
100+
dst_lines = hash_lines(dst, dst_size);
101+
if (!dst_lines) {
102+
free(src_lines);
103+
return -1;
104+
}
105+
sc = la = 0;
106+
while (src_lines->bytes && dst_lines->bytes) {
107+
int cmp = linehash_compare(src_lines, dst_lines);
108+
if (!cmp) {
109+
sc += src_lines->bytes;
110+
src_lines++;
111+
dst_lines++;
112+
continue;
113+
}
114+
if (cmp < 0) {
115+
src_lines++;
116+
continue;
117+
}
118+
la += dst_lines->bytes;
119+
dst_lines++;
120+
}
121+
while (dst_lines->bytes) {
122+
la += dst_lines->bytes;
123+
dst_lines++;
124+
}
125+
*src_copied = sc;
126+
*literal_added = la;
127+
return 0;
43128
}

0 commit comments

Comments
 (0)