Skip to content

Commit 8eca0b4

Browse files
Nicolas Pitregitster
authored andcommitted
implement some resilience against pack corruptions
We should be able to fall back to loose objects or alternative packs when a pack becomes corrupted. This is especially true when an object exists in one pack only as a delta but its base object is corrupted. Currently there is no way to retrieve the former object even if the later is available in another pack or loose. This patch allows for a delta to be resolved (with a performance cost) using a base object from a source other than the pack where that delta is located. Same thing for non-delta objects: rather than failing outright, a search is made in other packs or used loose when the currently active pack has it but corrupted. Of course git will become extremely noisy with error messages when that happens. However, if the operation succeeds nevertheless, a simple 'git repack -a -f -d' will "fix" the corrupted repository given that all corrupted objects have a good duplicate somewhere in the object store, possibly manually copied from another source. Signed-off-by: Nicolas Pitre <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 1f5c74f commit 8eca0b4

File tree

2 files changed

+78
-16
lines changed

2 files changed

+78
-16
lines changed

cache.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -641,6 +641,8 @@ extern struct packed_git {
641641
const void *index_data;
642642
size_t index_size;
643643
uint32_t num_objects;
644+
uint32_t num_bad_objects;
645+
unsigned char *bad_object_sha1;
644646
int index_version;
645647
time_t mtime;
646648
int pack_fd;

sha1_file.c

Lines changed: 76 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -818,6 +818,8 @@ struct packed_git *add_packed_git(const char *path, int path_len, int local)
818818
p->index_data = NULL;
819819
p->index_size = 0;
820820
p->num_objects = 0;
821+
p->num_bad_objects = 0;
822+
p->bad_object_sha1 = NULL;
821823
p->pack_size = st.st_size;
822824
p->next = NULL;
823825
p->windows = NULL;
@@ -982,6 +984,18 @@ void reprepare_packed_git(void)
982984
prepare_packed_git();
983985
}
984986

987+
static void mark_bad_packed_object(struct packed_git *p,
988+
const unsigned char *sha1)
989+
{
990+
unsigned i;
991+
for (i = 0; i < p->num_bad_objects; i++)
992+
if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i))
993+
return;
994+
p->bad_object_sha1 = xrealloc(p->bad_object_sha1, 20 * (p->num_bad_objects + 1));
995+
hashcpy(p->bad_object_sha1 + 20 * p->num_bad_objects, sha1);
996+
p->num_bad_objects++;
997+
}
998+
985999
int check_sha1_signature(const unsigned char *sha1, void *map, unsigned long size, const char *type)
9861000
{
9871001
unsigned char real_sha1[20];
@@ -1300,20 +1314,17 @@ static off_t get_delta_base(struct packed_git *p,
13001314
while (c & 128) {
13011315
base_offset += 1;
13021316
if (!base_offset || MSB(base_offset, 7))
1303-
die("offset value overflow for delta base object");
1317+
return 0; /* overflow */
13041318
c = base_info[used++];
13051319
base_offset = (base_offset << 7) + (c & 127);
13061320
}
13071321
base_offset = delta_obj_offset - base_offset;
13081322
if (base_offset >= delta_obj_offset)
1309-
die("delta base offset out of bound");
1323+
return 0; /* out of bound */
13101324
*curpos += used;
13111325
} else if (type == OBJ_REF_DELTA) {
13121326
/* The base entry _must_ be in the same pack */
13131327
base_offset = find_pack_entry_one(base_info, p);
1314-
if (!base_offset)
1315-
die("failed to find delta-pack base object %s",
1316-
sha1_to_hex(base_info));
13171328
*curpos += 20;
13181329
} else
13191330
die("I am totally screwed");
@@ -1406,6 +1417,9 @@ const char *packed_object_info_detail(struct packed_git *p,
14061417
return typename(type);
14071418
case OBJ_OFS_DELTA:
14081419
obj_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset);
1420+
if (!obj_offset)
1421+
die("pack %s contains bad delta base reference of type %s",
1422+
p->pack_name, typename(type));
14091423
if (*delta_chain_length == 0) {
14101424
revidx = find_pack_revindex(p, obj_offset);
14111425
hashcpy(base_sha1, nth_packed_object_sha1(p, revidx->nr));
@@ -1600,17 +1614,41 @@ static void *unpack_delta_entry(struct packed_git *p,
16001614
off_t base_offset;
16011615

16021616
base_offset = get_delta_base(p, w_curs, &curpos, *type, obj_offset);
1617+
if (!base_offset) {
1618+
error("failed to validate delta base reference "
1619+
"at offset %"PRIuMAX" from %s",
1620+
(uintmax_t)curpos, p->pack_name);
1621+
return NULL;
1622+
}
16031623
base = cache_or_unpack_entry(p, base_offset, &base_size, type, 0);
1604-
if (!base)
1605-
die("failed to read delta base object"
1606-
" at %"PRIuMAX" from %s",
1607-
(uintmax_t)base_offset, p->pack_name);
1624+
if (!base) {
1625+
/*
1626+
* We're probably in deep shit, but let's try to fetch
1627+
* the required base anyway from another pack or loose.
1628+
* This is costly but should happen only in the presence
1629+
* of a corrupted pack, and is better than failing outright.
1630+
*/
1631+
struct revindex_entry *revidx = find_pack_revindex(p, base_offset);
1632+
const unsigned char *base_sha1 =
1633+
nth_packed_object_sha1(p, revidx->nr);
1634+
error("failed to read delta base object %s"
1635+
" at offset %"PRIuMAX" from %s",
1636+
sha1_to_hex(base_sha1), (uintmax_t)base_offset,
1637+
p->pack_name);
1638+
mark_bad_packed_object(p, base_sha1);
1639+
base = read_sha1_file(base_sha1, type, &base_size);
1640+
if (!base)
1641+
return NULL;
1642+
}
16081643

16091644
delta_data = unpack_compressed_entry(p, w_curs, curpos, delta_size);
1610-
if (!delta_data)
1611-
die("failed to unpack compressed delta"
1612-
" at %"PRIuMAX" from %s",
1613-
(uintmax_t)curpos, p->pack_name);
1645+
if (!delta_data) {
1646+
error("failed to unpack compressed delta "
1647+
"at offset %"PRIuMAX" from %s",
1648+
(uintmax_t)curpos, p->pack_name);
1649+
free(base);
1650+
return NULL;
1651+
}
16141652
result = patch_delta(base, base_size,
16151653
delta_data, delta_size,
16161654
sizep);
@@ -1642,7 +1680,9 @@ void *unpack_entry(struct packed_git *p, off_t obj_offset,
16421680
data = unpack_compressed_entry(p, &w_curs, curpos, *sizep);
16431681
break;
16441682
default:
1645-
die("unknown object type %i in %s", *type, p->pack_name);
1683+
data = NULL;
1684+
error("unknown object type %i at offset %"PRIuMAX" in %s",
1685+
*type, (uintmax_t)obj_offset, p->pack_name);
16461686
}
16471687
unuse_pack(&w_curs);
16481688
return data;
@@ -1788,6 +1828,13 @@ static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e, cons
17881828
goto next;
17891829
}
17901830

1831+
if (p->num_bad_objects) {
1832+
unsigned i;
1833+
for (i = 0; i < p->num_bad_objects; i++)
1834+
if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i))
1835+
goto next;
1836+
}
1837+
17911838
offset = find_pack_entry_one(sha1, p);
17921839
if (offset) {
17931840
/*
@@ -1872,11 +1919,24 @@ static void *read_packed_sha1(const unsigned char *sha1,
18721919
enum object_type *type, unsigned long *size)
18731920
{
18741921
struct pack_entry e;
1922+
void *data;
18751923

18761924
if (!find_pack_entry(sha1, &e, NULL))
18771925
return NULL;
1878-
else
1879-
return cache_or_unpack_entry(e.p, e.offset, size, type, 1);
1926+
data = cache_or_unpack_entry(e.p, e.offset, size, type, 1);
1927+
if (!data) {
1928+
/*
1929+
* We're probably in deep shit, but let's try to fetch
1930+
* the required object anyway from another pack or loose.
1931+
* This should happen only in the presence of a corrupted
1932+
* pack, and is better than failing outright.
1933+
*/
1934+
error("failed to read object %s at offset %"PRIuMAX" from %s",
1935+
sha1_to_hex(sha1), (uintmax_t)e.offset, e.p->pack_name);
1936+
mark_bad_packed_object(e.p, sha1);
1937+
data = read_sha1_file(sha1, type, size);
1938+
}
1939+
return data;
18801940
}
18811941

18821942
/*

0 commit comments

Comments
 (0)