Skip to content

Commit 461cf59

Browse files
Linus TorvaldsJunio C Hamano
authored andcommitted
rev-list: stop when the file disappears
The one thing I've considered doing (I really should) is to add a "stop when you don't find the file" option to "git-rev-list". This patch does some of the work towards that: it removes the "parent" thing when the file disappears, so a "git annotate" could do do something like git-rev-list --remove-empty --parents HEAD -- "$filename" and it would get a good graph that stops when the filename disappears (it's not perfect though: it won't remove all the unintersting commits). It also simplifies the logic of finding tree differences a bit, at the cost of making it a tad less efficient. The old logic was two-phase: it would first simplify _only_ merges tree as it traversed the tree, and then simplify the linear parts of the remainder independently. That was pretty optimal from an efficiency standpoint because it avoids doing any comparisons that we can see are unnecessary, but it made it much harder to understand than it really needed to be. The new logic is a lot more straightforward, and compares the trees as it traverses the graph (ie everything is a single phase). That makes it much easier to stop graph traversal at any point where a file disappears. As an example, let's say that you have a git repository that has had a file called "A" some time in the past. That file gets renamed to B, and then gets renamed back again to A. The old "git-rev-list" would show two commits: the commit that renames B to A (because it changes A) _and_ as its parent the commit that renames A to B (because it changes A). With the new --remove-empty flag, git-rev-list will show just the commit that renames B to A as the "root" commit, and stop traversal there (because that's what you want for "annotate" - you want to stop there, and for every "root" commit you then separately see if it really is a new file, or if the paths history disappeared because it was renamed from some other file). With this patch, you should be able to basically do a "poor mans 'git annotate'" with a fairly simple loop: push("HEAD", "$filename") while (revision,filename = pop()) { for each i in $(git-rev-list --parents --remove-empty $revision -- "$filename") pseudo-parents($i) = git-rev-list parents for that line if (pseudo-parents($i) is non-empty) { show diff of $i against pseudo-parents continue } /* See if the _real_ parents of $i had a rename */ parent($i) = real-parent($i) if (find-rename in $parent($i)->$i) push $parent($i), "old-name" } which should be doable in perl or something (doing stacks in shell is just too painful to be worth it, so I'm not going to do this). Anybody want to try? Linus
1 parent 6b94f1e commit 461cf59

File tree

1 file changed

+78
-58
lines changed

1 file changed

+78
-58
lines changed

rev-list.c

Lines changed: 78 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ static int stop_traversal = 0;
5454
static int topo_order = 0;
5555
static int no_merges = 0;
5656
static const char **paths = NULL;
57+
static int remove_empty_trees = 0;
5758

5859
static void show_commit(struct commit *commit)
5960
{
@@ -424,14 +425,33 @@ static void mark_edges_uninteresting(struct commit_list *list)
424425
}
425426
}
426427

427-
static int is_different = 0;
428+
#define TREE_SAME 0
429+
#define TREE_NEW 1
430+
#define TREE_DIFFERENT 2
431+
static int tree_difference = TREE_SAME;
428432

429433
static void file_add_remove(struct diff_options *options,
430434
int addremove, unsigned mode,
431435
const unsigned char *sha1,
432436
const char *base, const char *path)
433437
{
434-
is_different = 1;
438+
int diff = TREE_DIFFERENT;
439+
440+
/*
441+
* Is it an add of a new file? It means that
442+
* the old tree didn't have it at all, so we
443+
* will turn "TREE_SAME" -> "TREE_NEW", but
444+
* leave any "TREE_DIFFERENT" alone (and if
445+
* it already was "TREE_NEW", we'll keep it
446+
* "TREE_NEW" of course).
447+
*/
448+
if (addremove == '+') {
449+
diff = tree_difference;
450+
if (diff != TREE_SAME)
451+
return;
452+
diff = TREE_NEW;
453+
}
454+
tree_difference = diff;
435455
}
436456

437457
static void file_change(struct diff_options *options,
@@ -440,7 +460,7 @@ static void file_change(struct diff_options *options,
440460
const unsigned char *new_sha1,
441461
const char *base, const char *path)
442462
{
443-
is_different = 1;
463+
tree_difference = TREE_DIFFERENT;
444464
}
445465

446466
static struct diff_options diff_opt = {
@@ -449,12 +469,16 @@ static struct diff_options diff_opt = {
449469
.change = file_change,
450470
};
451471

452-
static int same_tree(struct tree *t1, struct tree *t2)
472+
static int compare_tree(struct tree *t1, struct tree *t2)
453473
{
454-
is_different = 0;
474+
if (!t1)
475+
return TREE_NEW;
476+
if (!t2)
477+
return TREE_DIFFERENT;
478+
tree_difference = TREE_SAME;
455479
if (diff_tree_sha1(t1->object.sha1, t2->object.sha1, "", &diff_opt) < 0)
456-
return 0;
457-
return !is_different;
480+
return TREE_DIFFERENT;
481+
return tree_difference;
458482
}
459483

460484
static int same_tree_as_empty(struct tree *t1)
@@ -474,28 +498,55 @@ static int same_tree_as_empty(struct tree *t1)
474498
empty.buf = "";
475499
empty.size = 0;
476500

477-
is_different = 0;
501+
tree_difference = 0;
478502
retval = diff_tree(&empty, &real, "", &diff_opt);
479503
free(tree);
480504

481-
return retval >= 0 && !is_different;
505+
return retval >= 0 && !tree_difference;
482506
}
483507

484-
static struct commit *try_to_simplify_merge(struct commit *commit, struct commit_list *parent)
508+
static void try_to_simplify_commit(struct commit *commit)
485509
{
510+
struct commit_list **pp, *parent;
511+
486512
if (!commit->tree)
487-
return NULL;
513+
return;
488514

489-
while (parent) {
515+
if (!commit->parents) {
516+
if (!same_tree_as_empty(commit->tree))
517+
commit->object.flags |= TREECHANGE;
518+
return;
519+
}
520+
521+
pp = &commit->parents;
522+
while ((parent = *pp) != NULL) {
490523
struct commit *p = parent->item;
491-
parent = parent->next;
524+
525+
if (p->object.flags & UNINTERESTING) {
526+
pp = &parent->next;
527+
continue;
528+
}
529+
492530
parse_commit(p);
493-
if (!p->tree)
531+
switch (compare_tree(p->tree, commit->tree)) {
532+
case TREE_SAME:
533+
parent->next = NULL;
534+
commit->parents = parent;
535+
return;
536+
537+
case TREE_NEW:
538+
if (remove_empty_trees && same_tree_as_empty(p->tree)) {
539+
*pp = parent->next;
540+
continue;
541+
}
542+
/* fallthrough */
543+
case TREE_DIFFERENT:
544+
pp = &parent->next;
494545
continue;
495-
if (same_tree(commit->tree, p->tree))
496-
return p;
546+
}
547+
die("bad tree compare for commit %s", sha1_to_hex(commit->object.sha1));
497548
}
498-
return NULL;
549+
commit->object.flags |= TREECHANGE;
499550
}
500551

501552
static void add_parents_to_list(struct commit *commit, struct commit_list **list)
@@ -531,20 +582,14 @@ static void add_parents_to_list(struct commit *commit, struct commit_list **list
531582
}
532583

533584
/*
534-
* Ok, the commit wasn't uninteresting. If it
535-
* is a merge, try to find the parent that has
536-
* no differences in the path set if one exists.
585+
* Ok, the commit wasn't uninteresting. Try to
586+
* simplify the commit history and find the parent
587+
* that has no differences in the path set if one exists.
537588
*/
538-
if (paths && parent && parent->next) {
539-
struct commit *preferred;
540-
541-
preferred = try_to_simplify_merge(commit, parent);
542-
if (preferred) {
543-
parent->item = preferred;
544-
parent->next = NULL;
545-
}
546-
}
589+
if (paths)
590+
try_to_simplify_commit(commit);
547591

592+
parent = commit->parents;
548593
while (parent) {
549594
struct commit *p = parent->item;
550595

@@ -558,33 +603,6 @@ static void add_parents_to_list(struct commit *commit, struct commit_list **list
558603
}
559604
}
560605

561-
static void compress_list(struct commit_list *list)
562-
{
563-
while (list) {
564-
struct commit *commit = list->item;
565-
struct commit_list *parent = commit->parents;
566-
list = list->next;
567-
568-
if (!parent) {
569-
if (!same_tree_as_empty(commit->tree))
570-
commit->object.flags |= TREECHANGE;
571-
continue;
572-
}
573-
574-
/*
575-
* Exactly one parent? Check if it leaves the tree
576-
* unchanged
577-
*/
578-
if (!parent->next) {
579-
struct tree *t1 = commit->tree;
580-
struct tree *t2 = parent->item->tree;
581-
if (!t1 || !t2 || same_tree(t1, t2))
582-
continue;
583-
}
584-
commit->object.flags |= TREECHANGE;
585-
}
586-
}
587-
588606
static struct commit_list *limit_list(struct commit_list *list)
589607
{
590608
struct commit_list *newlist = NULL;
@@ -614,8 +632,6 @@ static struct commit_list *limit_list(struct commit_list *list)
614632
}
615633
if (tree_objects)
616634
mark_edges_uninteresting(newlist);
617-
if (paths && dense)
618-
compress_list(newlist);
619635
if (bisect_list)
620636
newlist = find_bisection(newlist);
621637
return newlist;
@@ -808,6 +824,10 @@ int main(int argc, const char **argv)
808824
dense = 0;
809825
continue;
810826
}
827+
if (!strcmp(arg, "--remove-empty")) {
828+
remove_empty_trees = 1;
829+
continue;
830+
}
811831
if (!strcmp(arg, "--")) {
812832
i++;
813833
break;

0 commit comments

Comments
 (0)