Skip to content

Commit 27e1e22

Browse files
peffgitster
authored andcommitted
prune: factor out loose-object directory traversal
Prune has to walk $GIT_DIR/objects/?? in order to find the set of loose objects to prune. Other parts of the code (e.g., count-objects) want to do the same. Let's factor it out into a reusable for_each-style function. Note that this is not quite a straight code movement. The original code had strange behavior when it found a file of the form "[0-9a-f]{2}/.{38}" that did _not_ contain all hex digits. It executed a "break" from the loop, meaning that we stopped pruning in that directory (but still pruned other directories!). This was probably a bug; we do not want to process the file as an object, but we should keep going otherwise (and that is how the new code handles it). We are also a little more careful with loose object directories which fail to open. The original code silently ignored any failures, but the new code will complain about any problems besides ENOENT. Signed-off-by: Jeff King <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 718ccc9 commit 27e1e22

File tree

3 files changed

+143
-61
lines changed

3 files changed

+143
-61
lines changed

builtin/prune.c

Lines changed: 26 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,23 @@ static int prune_tmp_file(const char *fullpath)
3131
return 0;
3232
}
3333

34-
static int prune_object(const char *fullpath, const unsigned char *sha1)
34+
static int prune_object(const unsigned char *sha1, const char *fullpath,
35+
void *data)
3536
{
3637
struct stat st;
37-
if (lstat(fullpath, &st))
38-
return error("Could not stat '%s'", fullpath);
38+
39+
/*
40+
* Do we know about this object?
41+
* It must have been reachable
42+
*/
43+
if (lookup_object(sha1))
44+
return 0;
45+
46+
if (lstat(fullpath, &st)) {
47+
/* report errors, but do not stop pruning */
48+
error("Could not stat '%s'", fullpath);
49+
return 0;
50+
}
3951
if (st.st_mtime > expire)
4052
return 0;
4153
if (show_only || verbose) {
@@ -48,68 +60,20 @@ static int prune_object(const char *fullpath, const unsigned char *sha1)
4860
return 0;
4961
}
5062

51-
static int prune_dir(int i, struct strbuf *path)
63+
static int prune_cruft(const char *basename, const char *path, void *data)
5264
{
53-
size_t baselen = path->len;
54-
DIR *dir = opendir(path->buf);
55-
struct dirent *de;
56-
57-
if (!dir)
58-
return 0;
59-
60-
while ((de = readdir(dir)) != NULL) {
61-
char name[100];
62-
unsigned char sha1[20];
63-
64-
if (is_dot_or_dotdot(de->d_name))
65-
continue;
66-
if (strlen(de->d_name) == 38) {
67-
sprintf(name, "%02x", i);
68-
memcpy(name+2, de->d_name, 39);
69-
if (get_sha1_hex(name, sha1) < 0)
70-
break;
71-
72-
/*
73-
* Do we know about this object?
74-
* It must have been reachable
75-
*/
76-
if (lookup_object(sha1))
77-
continue;
78-
79-
strbuf_addf(path, "/%s", de->d_name);
80-
prune_object(path->buf, sha1);
81-
strbuf_setlen(path, baselen);
82-
continue;
83-
}
84-
if (starts_with(de->d_name, "tmp_obj_")) {
85-
strbuf_addf(path, "/%s", de->d_name);
86-
prune_tmp_file(path->buf);
87-
strbuf_setlen(path, baselen);
88-
continue;
89-
}
90-
fprintf(stderr, "bad sha1 file: %s/%s\n", path->buf, de->d_name);
91-
}
92-
closedir(dir);
93-
if (!show_only)
94-
rmdir(path->buf);
65+
if (starts_with(basename, "tmp_obj_"))
66+
prune_tmp_file(path);
67+
else
68+
fprintf(stderr, "bad sha1 file: %s\n", path);
9569
return 0;
9670
}
9771

98-
static void prune_object_dir(const char *path)
72+
static int prune_subdir(int nr, const char *path, void *data)
9973
{
100-
struct strbuf buf = STRBUF_INIT;
101-
size_t baselen;
102-
int i;
103-
104-
strbuf_addstr(&buf, path);
105-
strbuf_addch(&buf, '/');
106-
baselen = buf.len;
107-
108-
for (i = 0; i < 256; i++) {
109-
strbuf_addf(&buf, "%02x", i);
110-
prune_dir(i, &buf);
111-
strbuf_setlen(&buf, baselen);
112-
}
74+
if (!show_only)
75+
rmdir(path);
76+
return 0;
11377
}
11478

11579
/*
@@ -173,7 +137,8 @@ int cmd_prune(int argc, const char **argv, const char *prefix)
173137

174138
mark_reachable_objects(&revs, 1, progress);
175139
stop_progress(&progress);
176-
prune_object_dir(get_object_directory());
140+
for_each_loose_file_in_objdir(get_object_directory(), prune_object,
141+
prune_cruft, prune_subdir, NULL);
177142

178143
prune_packed_objects(show_only ? PRUNE_PACKED_DRY_RUN : 0);
179144
remove_temporary_files(get_object_directory());

cache.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1239,6 +1239,39 @@ extern unsigned long unpack_object_header_buffer(const unsigned char *buf, unsig
12391239
extern unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t);
12401240
extern int unpack_object_header(struct packed_git *, struct pack_window **, off_t *, unsigned long *);
12411241

1242+
/*
1243+
* Iterate over the files in the loose-object parts of the object
1244+
* directory "path", triggering the following callbacks:
1245+
*
1246+
* - loose_object is called for each loose object we find.
1247+
*
1248+
* - loose_cruft is called for any files that do not appear to be
1249+
* loose objects. Note that we only look in the loose object
1250+
* directories "objects/[0-9a-f]{2}/", so we will not report
1251+
* "objects/foobar" as cruft.
1252+
*
1253+
* - loose_subdir is called for each top-level hashed subdirectory
1254+
* of the object directory (e.g., "$OBJDIR/f0"). It is called
1255+
* after the objects in the directory are processed.
1256+
*
1257+
* Any callback that is NULL will be ignored. Callbacks returning non-zero
1258+
* will end the iteration.
1259+
*/
1260+
typedef int each_loose_object_fn(const unsigned char *sha1,
1261+
const char *path,
1262+
void *data);
1263+
typedef int each_loose_cruft_fn(const char *basename,
1264+
const char *path,
1265+
void *data);
1266+
typedef int each_loose_subdir_fn(int nr,
1267+
const char *path,
1268+
void *data);
1269+
int for_each_loose_file_in_objdir(const char *path,
1270+
each_loose_object_fn obj_cb,
1271+
each_loose_cruft_fn cruft_cb,
1272+
each_loose_subdir_fn subdir_cb,
1273+
void *data);
1274+
12421275
struct object_info {
12431276
/* Request */
12441277
enum object_type *typep;

sha1_file.c

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3264,3 +3264,87 @@ void assert_sha1_type(const unsigned char *sha1, enum object_type expect)
32643264
die("%s is not a valid '%s' object", sha1_to_hex(sha1),
32653265
typename(expect));
32663266
}
3267+
3268+
static int for_each_file_in_obj_subdir(int subdir_nr,
3269+
struct strbuf *path,
3270+
each_loose_object_fn obj_cb,
3271+
each_loose_cruft_fn cruft_cb,
3272+
each_loose_subdir_fn subdir_cb,
3273+
void *data)
3274+
{
3275+
size_t baselen = path->len;
3276+
DIR *dir = opendir(path->buf);
3277+
struct dirent *de;
3278+
int r = 0;
3279+
3280+
if (!dir) {
3281+
if (errno == ENOENT)
3282+
return 0;
3283+
return error("unable to open %s: %s", path->buf, strerror(errno));
3284+
}
3285+
3286+
while ((de = readdir(dir))) {
3287+
if (is_dot_or_dotdot(de->d_name))
3288+
continue;
3289+
3290+
strbuf_setlen(path, baselen);
3291+
strbuf_addf(path, "/%s", de->d_name);
3292+
3293+
if (strlen(de->d_name) == 38) {
3294+
char hex[41];
3295+
unsigned char sha1[20];
3296+
3297+
snprintf(hex, sizeof(hex), "%02x%s",
3298+
subdir_nr, de->d_name);
3299+
if (!get_sha1_hex(hex, sha1)) {
3300+
if (obj_cb) {
3301+
r = obj_cb(sha1, path->buf, data);
3302+
if (r)
3303+
break;
3304+
}
3305+
continue;
3306+
}
3307+
}
3308+
3309+
if (cruft_cb) {
3310+
r = cruft_cb(de->d_name, path->buf, data);
3311+
if (r)
3312+
break;
3313+
}
3314+
}
3315+
strbuf_setlen(path, baselen);
3316+
3317+
if (!r && subdir_cb)
3318+
r = subdir_cb(subdir_nr, path->buf, data);
3319+
3320+
closedir(dir);
3321+
return r;
3322+
}
3323+
3324+
int for_each_loose_file_in_objdir(const char *path,
3325+
each_loose_object_fn obj_cb,
3326+
each_loose_cruft_fn cruft_cb,
3327+
each_loose_subdir_fn subdir_cb,
3328+
void *data)
3329+
{
3330+
struct strbuf buf = STRBUF_INIT;
3331+
size_t baselen;
3332+
int r = 0;
3333+
int i;
3334+
3335+
strbuf_addstr(&buf, path);
3336+
strbuf_addch(&buf, '/');
3337+
baselen = buf.len;
3338+
3339+
for (i = 0; i < 256; i++) {
3340+
strbuf_addf(&buf, "%02x", i);
3341+
r = for_each_file_in_obj_subdir(i, &buf, obj_cb, cruft_cb,
3342+
subdir_cb, data);
3343+
strbuf_setlen(&buf, baselen);
3344+
if (r)
3345+
break;
3346+
}
3347+
3348+
strbuf_release(&buf);
3349+
return r;
3350+
}

0 commit comments

Comments
 (0)