Skip to content

Commit 8713ab3

Browse files
spearceJunio C Hamano
authored andcommitted
Improve git-describe performance by reducing revision listing.
My prior version of git-describe ran very slowly on even reasonably sized projects like git.git and linux.git as it tended to identify a large number of possible tags and then needed to generate the revision list for each of those tags to sort them and select the best tag to describe the input commit. All we really need is the number of commits in the input revision which are not in the tag. We can generate these counts during the revision walking and tag matching loop by assigning a color to each tag and coloring the commits as we walk them. This limits us to identifying no more than 26 possible tags, as there is limited space available within the flags field of struct commit. The limitation of 26 possible tags is hopefully not going to be a problem in real usage, as most projects won't create 26 maintenance releases and merge them back into a development trunk after the development trunk was tagged with a release candidate tag. If that does occur git-describe will start to revert to its old behavior of using the newer maintenance release tag to describe the development trunk, rather than the development trunk's own tag. The suggested workaround would be to retag the development trunk's tip. However since even 26 possible tags can take a while to generate a description for on some projects I'm defaulting the limit to 10 but offering the user --candidates to increase the number of possible matches if they need a more accurate result. I specifically chose 10 for the default as it seems unlikely projects will have more than 10 maintenance releases merged into a development trunk before retagging the development trunk, and it seems to perform about the same on linux.git as v1.4.4.4 git-describe. A large amount of debugging information was also added during the development of this change, so I've left it in to be toggled on with --debug. It may be useful to the end user to help them understand why git-describe took one particular tag over another. Signed-off-by: Shawn O. Pearce <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 910c0d7 commit 8713ab3

File tree

2 files changed

+106
-49
lines changed

2 files changed

+106
-49
lines changed

Documentation/git-describe.txt

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,16 @@ OPTIONS
3535
Instead of using the default 8 hexadecimal digits as the
3636
abbreviated object name, use <n> digits.
3737

38+
--candidates=<n>::
39+
Instead of considering only the 10 most recent tags as
40+
candidates to describe the input committish consider
41+
up to <n> candidates. Increasing <n> above 10 will take
42+
slightly longer but may produce a more accurate result.
43+
44+
--debug::
45+
Verbosely display information about the searching strategy
46+
being employed to standard error. The tag name will still
47+
be printed to standard out.
3848

3949
EXAMPLES
4050
--------
@@ -63,6 +73,26 @@ the output shows the reference path as well:
6373
[torvalds@g5 git]$ git describe --all HEAD^
6474
heads/lt/describe-g975b
6575

76+
SEARCH STRATEGY
77+
---------------
78+
79+
For each committish supplied "git describe" will first look for
80+
a tag which tags exactly that commit. Annotated tags will always
81+
be preferred over lightweight tags, and tags with newer dates will
82+
always be preferred over tags with older dates. If an exact match
83+
is found, its name will be output and searching will stop.
84+
85+
If an exact match was not found "git describe" will walk back
86+
through the commit history to locate an ancestor commit which
87+
has been tagged. The ancestor's tag will be output along with an
88+
abbreviation of the input committish's SHA1.
89+
90+
If multiple tags were found during the walk then the tag which
91+
has the fewest commits different from the input committish will be
92+
selected and output. Here fewest commits different is defined as
93+
the number of commits which would be shown by "git log tag..input"
94+
will be the smallest number of commits possible.
95+
6696

6797
Author
6898
------

builtin-describe.c

Lines changed: 76 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,19 @@
22
#include "commit.h"
33
#include "tag.h"
44
#include "refs.h"
5-
#include "diff.h"
6-
#include "diffcore.h"
7-
#include "revision.h"
85
#include "builtin.h"
96

7+
#define SEEN (1u<<0)
8+
#define MAX_TAGS (FLAG_BITS - 1)
9+
1010
static const char describe_usage[] =
1111
"git-describe [--all] [--tags] [--abbrev=<n>] <committish>*";
1212

13+
static int debug; /* Display lots of verbose info */
1314
static int all; /* Default to annotated tags only */
1415
static int tags; /* But allow any tags if --tags is specified */
1516
static int abbrev = DEFAULT_ABBREV;
17+
static int max_candidates = 10;
1618

1719
static unsigned int names[256], allocs[256];
1820
static struct commit_name {
@@ -115,19 +117,21 @@ static int compare_names(const void *_a, const void *_b)
115117
}
116118

117119
struct possible_tag {
118-
struct possible_tag *next;
119120
struct commit_name *name;
120121
unsigned long depth;
122+
unsigned flag_within;
121123
};
122124

123125
static void describe(const char *arg, int last_one)
124126
{
125127
unsigned char sha1[20];
126-
struct commit *cmit;
128+
struct commit *cmit, *gave_up_on = NULL;
127129
struct commit_list *list;
128130
static int initialized = 0;
129131
struct commit_name *n;
130-
struct possible_tag *all_matches, *min_match, *cur_match;
132+
struct possible_tag all_matches[MAX_TAGS], *min_match;
133+
unsigned int match_cnt = 0, annotated_cnt = 0, cur_match;
134+
unsigned long seen_commits = 0;
131135

132136
if (get_sha1(arg, sha1))
133137
die("Not a valid object name %s", arg);
@@ -150,71 +154,85 @@ static void describe(const char *arg, int last_one)
150154
return;
151155
}
152156

157+
if (debug)
158+
fprintf(stderr, "searching to describe %s\n", arg);
159+
153160
list = NULL;
154-
all_matches = NULL;
155-
cur_match = NULL;
161+
cmit->object.flags = SEEN;
156162
commit_list_insert(cmit, &list);
157163
while (list) {
158164
struct commit *c = pop_commit(&list);
159165
struct commit_list *parents = c->parents;
166+
seen_commits++;
160167
n = match(c);
161168
if (n) {
162-
struct possible_tag *p = xmalloc(sizeof(*p));
163-
p->name = n;
164-
p->next = NULL;
165-
if (cur_match)
166-
cur_match->next = p;
167-
else
168-
all_matches = p;
169-
cur_match = p;
170-
if (n->prio == 2)
171-
continue;
169+
if (match_cnt < max_candidates) {
170+
struct possible_tag *t = &all_matches[match_cnt++];
171+
t->name = n;
172+
t->depth = seen_commits - 1;
173+
t->flag_within = 1u << match_cnt;
174+
c->object.flags |= t->flag_within;
175+
if (n->prio == 2)
176+
annotated_cnt++;
177+
}
178+
else {
179+
gave_up_on = c;
180+
break;
181+
}
182+
}
183+
for (cur_match = 0; cur_match < match_cnt; cur_match++) {
184+
struct possible_tag *t = &all_matches[cur_match];
185+
if (!(c->object.flags & t->flag_within))
186+
t->depth++;
187+
}
188+
if (annotated_cnt && !list) {
189+
if (debug)
190+
fprintf(stderr, "finished search at %s\n",
191+
sha1_to_hex(c->object.sha1));
192+
break;
172193
}
173194
while (parents) {
174195
struct commit *p = parents->item;
175196
parse_commit(p);
176-
if (!(p->object.flags & SEEN)) {
177-
p->object.flags |= SEEN;
197+
if (!(p->object.flags & SEEN))
178198
insert_by_date(p, &list);
179-
}
199+
p->object.flags |= c->object.flags;
180200
parents = parents->next;
181201
}
182202
}
203+
free_commit_list(list);
183204

184-
if (!all_matches)
205+
if (!match_cnt)
185206
die("cannot describe '%s'", sha1_to_hex(cmit->object.sha1));
186207

187-
min_match = NULL;
188-
for (cur_match = all_matches; cur_match; cur_match = cur_match->next) {
189-
struct rev_info revs;
190-
struct commit *tagged = cur_match->name->commit;
191-
192-
clear_commit_marks(cmit, -1);
193-
init_revisions(&revs, NULL);
194-
tagged->object.flags |= UNINTERESTING;
195-
add_pending_object(&revs, &tagged->object, NULL);
196-
add_pending_object(&revs, &cmit->object, NULL);
197-
198-
prepare_revision_walk(&revs);
199-
cur_match->depth = 0;
200-
while ((!min_match || cur_match->depth < min_match->depth)
201-
&& get_revision(&revs))
202-
cur_match->depth++;
203-
if (!min_match || (cur_match->depth < min_match->depth
204-
&& cur_match->name->prio >= min_match->name->prio))
205-
min_match = cur_match;
206-
free_commit_list(revs.commits);
208+
min_match = &all_matches[0];
209+
for (cur_match = 1; cur_match < match_cnt; cur_match++) {
210+
struct possible_tag *t = &all_matches[cur_match];
211+
if (t->depth < min_match->depth
212+
&& t->name->prio >= min_match->name->prio)
213+
min_match = t;
214+
}
215+
if (debug) {
216+
for (cur_match = 0; cur_match < match_cnt; cur_match++) {
217+
struct possible_tag *t = &all_matches[cur_match];
218+
fprintf(stderr, " %c %8lu %s\n",
219+
min_match == t ? '*' : ' ',
220+
t->depth, t->name->path);
221+
}
222+
fprintf(stderr, "traversed %lu commits\n", seen_commits);
223+
if (gave_up_on) {
224+
fprintf(stderr,
225+
"more than %i tags found; listed %i most recent\n"
226+
"gave up search at %s\n",
227+
max_candidates, max_candidates,
228+
sha1_to_hex(gave_up_on->object.sha1));
229+
}
207230
}
208231
printf("%s-g%s\n", min_match->name->path,
209232
find_unique_abbrev(cmit->object.sha1, abbrev));
210233

211-
if (!last_one) {
212-
for (cur_match = all_matches; cur_match; cur_match = min_match) {
213-
min_match = cur_match->next;
214-
free(cur_match);
215-
}
216-
clear_commit_marks(cmit, SEEN);
217-
}
234+
if (!last_one)
235+
clear_commit_marks(cmit, -1);
218236
}
219237

220238
int cmd_describe(int argc, const char **argv, const char *prefix)
@@ -226,6 +244,8 @@ int cmd_describe(int argc, const char **argv, const char *prefix)
226244

227245
if (*arg != '-')
228246
break;
247+
else if (!strcmp(arg, "--debug"))
248+
debug = 1;
229249
else if (!strcmp(arg, "--all"))
230250
all = 1;
231251
else if (!strcmp(arg, "--tags"))
@@ -235,6 +255,13 @@ int cmd_describe(int argc, const char **argv, const char *prefix)
235255
if (abbrev < MINIMUM_ABBREV || 40 < abbrev)
236256
abbrev = DEFAULT_ABBREV;
237257
}
258+
else if (!strncmp(arg, "--candidates=", 13)) {
259+
max_candidates = strtoul(arg + 13, NULL, 10);
260+
if (max_candidates < 1)
261+
max_candidates = 1;
262+
else if (max_candidates > MAX_TAGS)
263+
max_candidates = MAX_TAGS;
264+
}
238265
else
239266
usage(describe_usage);
240267
}

0 commit comments

Comments
 (0)