Skip to content

Commit 4388576

Browse files
Barret Rhodengitster
authored andcommitted
blame: use the fingerprint heuristic to match ignored lines
This commit integrates the fuzzy fingerprint heuristic into guess_line_blames(). We actually make two passes. The first pass uses the fuzzy algorithm to find a match within the current diff chunk. If that fails, the second pass searches the entire parent file for the best match. For an example of scanning the entire parent for a match, consider: commit-a 30) #include <sys/header_a.h> commit-b 31) #include <header_b.h> commit-c 32) #include <header_c.h> Then commit X alphabetizes them: commit-X 30) #include <header_b.h> commit-X 31) #include <header_c.h> commit-X 32) #include <sys/header_a.h> If we just check the parent's chunk (i.e. the first pass), we'd get: commit-b 30) #include <header_b.h> commit-c 31) #include <header_c.h> commit-X 32) #include <sys/header_a.h> That's because commit X actually consists of two chunks: one chunk is removing sys/header_a.h, then some context, and the second chunk is adding sys/header_a.h. If we scan the entire parent file, we get: commit-b 30) #include <header_b.h> commit-c 31) #include <header_c.h> commit-a 32) #include <sys/header_a.h> Signed-off-by: Barret Rhoden <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 170072f commit 4388576

File tree

2 files changed

+55
-8
lines changed

2 files changed

+55
-8
lines changed

blame.c

Lines changed: 55 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -997,12 +997,19 @@ static void fill_origin_fingerprints(struct blame_origin *o, mmfile_t *file)
997997
return;
998998
o->num_lines = find_line_starts(&line_starts, o->file.ptr,
999999
o->file.size);
1000-
/* TODO: Will fill in fingerprints in a future commit */
1000+
o->fingerprints = xcalloc(sizeof(struct fingerprint), o->num_lines);
1001+
get_line_fingerprints(o->fingerprints, o->file.ptr, line_starts,
1002+
0, o->num_lines);
10011003
free(line_starts);
10021004
}
10031005

10041006
static void drop_origin_fingerprints(struct blame_origin *o)
10051007
{
1008+
if (o->fingerprints) {
1009+
free_line_fingerprints(o->fingerprints, o->num_lines);
1010+
o->num_lines = 0;
1011+
FREE_AND_NULL(o->fingerprints);
1012+
}
10061013
}
10071014

10081015
/*
@@ -1580,9 +1587,34 @@ static int are_lines_adjacent(struct blame_line_tracker *first,
15801587
first->s_lno + 1 == second->s_lno;
15811588
}
15821589

1590+
static int scan_parent_range(struct fingerprint *p_fps,
1591+
struct fingerprint *t_fps, int t_idx,
1592+
int from, int nr_lines)
1593+
{
1594+
int sim, p_idx;
1595+
#define FINGERPRINT_FILE_THRESHOLD 10
1596+
int best_sim_val = FINGERPRINT_FILE_THRESHOLD;
1597+
int best_sim_idx = -1;
1598+
1599+
for (p_idx = from; p_idx < from + nr_lines; p_idx++) {
1600+
sim = fingerprint_similarity(&t_fps[t_idx], &p_fps[p_idx]);
1601+
if (sim < best_sim_val)
1602+
continue;
1603+
/* Break ties with the closest-to-target line number */
1604+
if (sim == best_sim_val && best_sim_idx != -1 &&
1605+
abs(best_sim_idx - t_idx) < abs(p_idx - t_idx))
1606+
continue;
1607+
best_sim_val = sim;
1608+
best_sim_idx = p_idx;
1609+
}
1610+
return best_sim_idx;
1611+
}
1612+
15831613
/*
1584-
* This cheap heuristic assigns lines in the chunk to their relative location in
1585-
* the parent's chunk. Any additional lines are left with the target.
1614+
* The first pass checks the blame entry (from the target) against the parent's
1615+
* diff chunk. If that fails for a line, the second pass tries to match that
1616+
* line to any part of parent file. That catches cases where a change was
1617+
* broken into two chunks by 'context.'
15861618
*/
15871619
static void guess_line_blames(struct blame_origin *parent,
15881620
struct blame_origin *target,
@@ -1591,18 +1623,30 @@ static void guess_line_blames(struct blame_origin *parent,
15911623
{
15921624
int i, best_idx, target_idx;
15931625
int parent_slno = tlno + offset;
1626+
int *fuzzy_matches;
15941627

1628+
fuzzy_matches = fuzzy_find_matching_lines(parent, target,
1629+
tlno, parent_slno, same,
1630+
parent_len);
15951631
for (i = 0; i < same - tlno; i++) {
15961632
target_idx = tlno + i;
1597-
best_idx = target_idx + offset;
1598-
if (best_idx < parent_slno + parent_len) {
1633+
if (fuzzy_matches && fuzzy_matches[i] >= 0) {
1634+
best_idx = fuzzy_matches[i];
1635+
} else {
1636+
best_idx = scan_parent_range(parent->fingerprints,
1637+
target->fingerprints,
1638+
target_idx, 0,
1639+
parent->num_lines);
1640+
}
1641+
if (best_idx >= 0) {
15991642
line_blames[i].is_parent = 1;
16001643
line_blames[i].s_lno = best_idx;
16011644
} else {
16021645
line_blames[i].is_parent = 0;
16031646
line_blames[i].s_lno = target_idx;
16041647
}
16051648
}
1649+
free(fuzzy_matches);
16061650
}
16071651

16081652
/*
@@ -2379,6 +2423,12 @@ static void pass_blame(struct blame_scoreboard *sb, struct blame_origin *origin,
23792423
if (!porigin)
23802424
continue;
23812425
pass_blame_to_parent(sb, origin, porigin, 1);
2426+
/*
2427+
* Preemptively drop porigin so we can refresh the
2428+
* fingerprints if we use the parent again, which can
2429+
* occur if you ignore back-to-back commits.
2430+
*/
2431+
drop_origin_blob(porigin);
23822432
if (!origin->suspects)
23832433
goto finish;
23842434
}

t/t8014-blame-ignore-fuzzy.sh

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,6 @@
33
test_description='git blame ignore fuzzy heuristic'
44
. ./test-lib.sh
55

6-
# short circuit until blame has the fuzzy capabilities
7-
test_done
8-
96
pick_author='s/^[0-9a-f^]* *(\([^ ]*\) .*/\1/'
107

118
# Each test is composed of 4 variables:

0 commit comments

Comments
 (0)