Skip to content

Commit 159e7b0

Browse files
committed
fsck: detect gitmodules files
In preparation for performing fsck checks on .gitmodules files, this commit plumbs in the actual detection of the files. Note that unlike most other fsck checks, this cannot be a property of a single object: we must know that the object is found at a ".gitmodules" path at the root tree of a commit. Since the fsck code only sees one object at a time, we have to mark the related objects to fit the puzzle together. When we see a commit we mark its tree as a root tree, and when we see a root tree with a .gitmodules file, we mark the corresponding blob to be checked. In an ideal world, we'd check the objects in topological order: commits followed by trees followed by blobs. In that case we can avoid ever loading an object twice, since all markings would be complete by the time we get to the marked objects. And indeed, if we are checking a single packfile, this is the order in which Git will generally write the objects. But we can't count on that: 1. git-fsck may show us the objects in arbitrary order (loose objects are fed in sha1 order, but we may also have multiple packs, and we process each pack fully in sequence). 2. The type ordering is just what git-pack-objects happens to write now. The pack format does not require a specific order, and it's possible that future versions of Git (or a custom version trying to fool official Git's fsck checks!) may order it differently. 3. We may not even be fscking all of the relevant objects at once. Consider pushing with transfer.fsckObjects, where one push adds a blob at path "foo", and then a second push adds the same blob at path ".gitmodules". The blob is not part of the second push at all, but we need to mark and check it. So in the general case, we need to make up to three passes over the objects: once to make sure we've seen all commits, then once to cover any trees we might have missed, and then a final pass to cover any .gitmodules blobs we found in the second pass. We can simplify things a bit by loosening the requirement that we find .gitmodules only at root trees. Technically a file like "subdir/.gitmodules" is not parsed by Git, but it's not unreasonable for us to declare that Git is aware of all ".gitmodules" files and make them eligible for checking. That lets us drop the root-tree requirement, which eliminates one pass entirely. And it makes our worst case much better: instead of potentially queueing every root tree to be re-examined, the worst case is that we queue each unique .gitmodules blob for a second look. This patch just adds the boilerplate to find .gitmodules files. The actual content checks will come in a subsequent commit. Signed-off-by: Jeff King <[email protected]>
1 parent 7ac4f3a commit 159e7b0

File tree

2 files changed

+65
-0
lines changed

2 files changed

+65
-0
lines changed

fsck.c

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@
1010
#include "utf8.h"
1111
#include "sha1-array.h"
1212
#include "decorate.h"
13+
#include "oidset.h"
14+
15+
static struct oidset gitmodules_found = OIDSET_INIT;
16+
static struct oidset gitmodules_done = OIDSET_INIT;
1317

1418
#define FSCK_FATAL -1
1519
#define FSCK_INFO -2
@@ -44,13 +48,16 @@
4448
FUNC(MISSING_TAG_ENTRY, ERROR) \
4549
FUNC(MISSING_TAG_OBJECT, ERROR) \
4650
FUNC(MISSING_TREE, ERROR) \
51+
FUNC(MISSING_TREE_OBJECT, ERROR) \
4752
FUNC(MISSING_TYPE, ERROR) \
4853
FUNC(MISSING_TYPE_ENTRY, ERROR) \
4954
FUNC(MULTIPLE_AUTHORS, ERROR) \
5055
FUNC(TAG_OBJECT_NOT_TAG, ERROR) \
5156
FUNC(TREE_NOT_SORTED, ERROR) \
5257
FUNC(UNKNOWN_TYPE, ERROR) \
5358
FUNC(ZERO_PADDED_DATE, ERROR) \
59+
FUNC(GITMODULES_MISSING, ERROR) \
60+
FUNC(GITMODULES_BLOB, ERROR) \
5461
/* warnings */ \
5562
FUNC(BAD_FILEMODE, WARN) \
5663
FUNC(EMPTY_NAME, WARN) \
@@ -563,6 +570,10 @@ static int fsck_tree(struct tree *item, struct fsck_options *options)
563570
has_dotdot |= !strcmp(name, "..");
564571
has_dotgit |= is_hfs_dotgit(name) || is_ntfs_dotgit(name);
565572
has_zero_pad |= *(char *)desc.buffer == '0';
573+
574+
if (is_hfs_dotgitmodules(name) || is_ntfs_dotgitmodules(name))
575+
oidset_insert(&gitmodules_found, oid);
576+
566577
if (update_tree_entry_gently(&desc)) {
567578
retval += report(options, &item->object, FSCK_MSG_BAD_TREE, "cannot be parsed as a tree");
568579
break;
@@ -936,3 +947,50 @@ int fsck_error_function(struct fsck_options *o,
936947
error("object %s: %s", describe_object(o, obj), message);
937948
return 1;
938949
}
950+
951+
int fsck_finish(struct fsck_options *options)
952+
{
953+
int ret = 0;
954+
struct oidset_iter iter;
955+
const struct object_id *oid;
956+
957+
oidset_iter_init(&gitmodules_found, &iter);
958+
while ((oid = oidset_iter_next(&iter))) {
959+
struct blob *blob;
960+
enum object_type type;
961+
unsigned long size;
962+
char *buf;
963+
964+
if (oidset_contains(&gitmodules_done, oid))
965+
continue;
966+
967+
blob = lookup_blob(oid);
968+
if (!blob) {
969+
ret |= report(options, &blob->object,
970+
FSCK_MSG_GITMODULES_BLOB,
971+
"non-blob found at .gitmodules");
972+
continue;
973+
}
974+
975+
buf = read_sha1_file(oid->hash, &type, &size);
976+
if (!buf) {
977+
ret |= report(options, &blob->object,
978+
FSCK_MSG_GITMODULES_MISSING,
979+
"unable to read .gitmodules blob");
980+
continue;
981+
}
982+
983+
if (type == OBJ_BLOB)
984+
ret |= fsck_blob(blob, buf, size, options);
985+
else
986+
ret |= report(options, &blob->object,
987+
FSCK_MSG_GITMODULES_BLOB,
988+
"non-blob found at .gitmodules");
989+
free(buf);
990+
}
991+
992+
993+
oidset_clear(&gitmodules_found);
994+
oidset_clear(&gitmodules_done);
995+
return ret;
996+
}

fsck.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,4 +53,11 @@ int fsck_walk(struct object *obj, void *data, struct fsck_options *options);
5353
int fsck_object(struct object *obj, void *data, unsigned long size,
5454
struct fsck_options *options);
5555

56+
/*
57+
* Some fsck checks are context-dependent, and may end up queued; run this
58+
* after completing all fsck_object() calls in order to resolve any remaining
59+
* checks.
60+
*/
61+
int fsck_finish(struct fsck_options *options);
62+
5663
#endif

0 commit comments

Comments
 (0)