Skip to content

Commit 8b4c010

Browse files
jonathantanmygitster
authored andcommitted
sha1_file: support lazily fetching missing objects
Teach sha1_file to fetch objects from the remote configured in extensions.partialclone whenever an object is requested but missing. The fetching of objects can be suppressed through a global variable. This is used by fsck and index-pack. However, by default, such fetching is not suppressed. This is meant as a temporary measure to ensure that all Git commands work in such a situation. Future patches will update some commands to either tolerate missing objects (without fetching them) or be more efficient in fetching them. In order to determine the code changes in sha1_file.c necessary, I investigated the following: (1) functions in sha1_file.c that take in a hash, without the user regarding how the object is stored (loose or packed) (2) functions in packfile.c (because I need to check callers that know about the loose/packed distinction and operate on both differently, and ensure that they can handle the concept of objects that are neither loose nor packed) (1) is handled by the modification to sha1_object_info_extended(). For (2), I looked at for_each_packed_object and others. For for_each_packed_object, the callers either already work or are fixed in this patch: - reachable - only to find recent objects - builtin/fsck - already knows about missing objects - builtin/cat-file - warning message added in this commit Callers of the other functions do not need to be changed: - parse_pack_index - http - indirectly from http_get_info_packs - find_pack_entry_one - this searches a single pack that is provided as an argument; the caller already knows (through other means) that the sought object is in a specific pack - find_sha1_pack - fast-import - appears to be an optimization to not store a file if it is already in a pack - http-walker - to search through a struct alt_base - http-push - to search through remote packs - has_sha1_pack - builtin/fsck - already knows about promisor objects - builtin/count-objects - informational purposes only (check if loose object is also packed) - builtin/prune-packed - check if object to be pruned is packed (if not, don't prune it) - revision - used to exclude packed objects if requested by user - diff - just for optimization Signed-off-by: Jonathan Tan <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 88e2f9e commit 8b4c010

File tree

8 files changed

+99
-8
lines changed

8 files changed

+99
-8
lines changed

builtin/cat-file.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -475,6 +475,8 @@ static int batch_objects(struct batch_options *opt)
475475

476476
for_each_loose_object(batch_loose_object, &sa, 0);
477477
for_each_packed_object(batch_packed_object, &sa, 0);
478+
if (repository_format_partial_clone)
479+
warning("This repository has extensions.partialClone set. Some objects may not be loaded.");
478480

479481
cb.opt = opt;
480482
cb.expand = &data;

builtin/fetch-pack.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ int cmd_fetch_pack(int argc, const char **argv, const char *prefix)
5353
struct oid_array shallow = OID_ARRAY_INIT;
5454
struct string_list deepen_not = STRING_LIST_INIT_DUP;
5555

56+
fetch_if_missing = 0;
57+
5658
packet_trace_identity("fetch-pack");
5759

5860
memset(&args, 0, sizeof(args));

builtin/fsck.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -678,6 +678,9 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
678678
int i;
679679
struct alternate_object_database *alt;
680680

681+
/* fsck knows how to handle missing promisor objects */
682+
fetch_if_missing = 0;
683+
681684
errors_found = 0;
682685
check_replace_refs = 0;
683686

builtin/index-pack.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1657,6 +1657,12 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
16571657
unsigned foreign_nr = 1; /* zero is a "good" value, assume bad */
16581658
int report_end_of_input = 0;
16591659

1660+
/*
1661+
* index-pack never needs to fetch missing objects, since it only
1662+
* accesses the repo to do hash collision checks
1663+
*/
1664+
fetch_if_missing = 0;
1665+
16601666
if (argc == 2 && !strcmp(argv[1], "-h"))
16611667
usage(index_pack_usage);
16621668

cache.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1727,6 +1727,14 @@ struct object_info {
17271727
#define OBJECT_INFO_QUICK 8
17281728
extern int sha1_object_info_extended(const unsigned char *, struct object_info *, unsigned flags);
17291729

1730+
/*
1731+
* Set this to 0 to prevent sha1_object_info_extended() from fetching missing
1732+
* blobs. This has a difference only if extensions.partialClone is set.
1733+
*
1734+
* Its default value is 1.
1735+
*/
1736+
extern int fetch_if_missing;
1737+
17301738
/* Dumb servers support */
17311739
extern int update_server_info(int);
17321740

fetch-object.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@ void fetch_object(const char *remote_name, const unsigned char *sha1)
1010
struct remote *remote;
1111
struct transport *transport;
1212
struct ref *ref;
13+
int original_fetch_if_missing = fetch_if_missing;
1314

15+
fetch_if_missing = 0;
1416
remote = remote_get(remote_name);
1517
if (!remote->url[0])
1618
die(_("Remote with no URL"));
@@ -21,4 +23,5 @@ void fetch_object(const char *remote_name, const unsigned char *sha1)
2123
transport_set_option(transport, TRANS_OPT_FROM_PROMISOR, "1");
2224
transport_set_option(transport, TRANS_OPT_NO_DEPENDENTS, "1");
2325
transport_fetch_refs(transport, ref);
26+
fetch_if_missing = original_fetch_if_missing;
2427
}

sha1_file.c

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "mergesort.h"
3030
#include "quote.h"
3131
#include "packfile.h"
32+
#include "fetch-object.h"
3233

3334
const unsigned char null_sha1[GIT_MAX_RAWSZ];
3435
const struct object_id null_oid;
@@ -1144,6 +1145,8 @@ static int sha1_loose_object_info(const unsigned char *sha1,
11441145
return (status < 0) ? status : 0;
11451146
}
11461147

1148+
int fetch_if_missing = 1;
1149+
11471150
int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi, unsigned flags)
11481151
{
11491152
static struct object_info blank_oi = OBJECT_INFO_INIT;
@@ -1152,6 +1155,7 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
11521155
const unsigned char *real = (flags & OBJECT_INFO_LOOKUP_REPLACE) ?
11531156
lookup_replace_object(sha1) :
11541157
sha1;
1158+
int already_retried = 0;
11551159

11561160
if (!oi)
11571161
oi = &blank_oi;
@@ -1176,19 +1180,32 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
11761180
}
11771181
}
11781182

1179-
if (!find_pack_entry(real, &e)) {
1183+
while (1) {
1184+
if (find_pack_entry(real, &e))
1185+
break;
1186+
11801187
/* Most likely it's a loose object. */
11811188
if (!sha1_loose_object_info(real, oi, flags))
11821189
return 0;
11831190

11841191
/* Not a loose object; someone else may have just packed it. */
1185-
if (flags & OBJECT_INFO_QUICK) {
1186-
return -1;
1187-
} else {
1188-
reprepare_packed_git();
1189-
if (!find_pack_entry(real, &e))
1190-
return -1;
1192+
reprepare_packed_git();
1193+
if (find_pack_entry(real, &e))
1194+
break;
1195+
1196+
/* Check if it is a missing object */
1197+
if (fetch_if_missing && repository_format_partial_clone &&
1198+
!already_retried) {
1199+
/*
1200+
* TODO Investigate haveing fetch_object() return
1201+
* TODO error/success and stopping the music here.
1202+
*/
1203+
fetch_object(repository_format_partial_clone, real);
1204+
already_retried = 1;
1205+
continue;
11911206
}
1207+
1208+
return -1;
11921209
}
11931210

11941211
if (oi == &blank_oi)
@@ -1197,7 +1214,6 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
11971214
* information below, so return early.
11981215
*/
11991216
return 0;
1200-
12011217
rtype = packed_object_info(e.p, e.offset, oi);
12021218
if (rtype < 0) {
12031219
mark_bad_packed_object(e.p, real);

t/t0410-partial-clone.sh

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,4 +138,55 @@ test_expect_success 'missing CLI object, but promised, passes fsck' '
138138
git -C repo fsck "$A"
139139
'
140140

141+
test_expect_success 'fetching of missing objects' '
142+
rm -rf repo &&
143+
test_create_repo server &&
144+
test_commit -C server foo &&
145+
git -C server repack -a -d --write-bitmap-index &&
146+
147+
git clone "file://$(pwd)/server" repo &&
148+
HASH=$(git -C repo rev-parse foo) &&
149+
rm -rf repo/.git/objects/* &&
150+
151+
git -C repo config core.repositoryformatversion 1 &&
152+
git -C repo config extensions.partialclone "origin" &&
153+
git -C repo cat-file -p "$HASH" &&
154+
155+
# Ensure that the .promisor file is written, and check that its
156+
# associated packfile contains the object
157+
ls repo/.git/objects/pack/pack-*.promisor >promisorlist &&
158+
test_line_count = 1 promisorlist &&
159+
IDX=$(cat promisorlist | sed "s/promisor$/idx/") &&
160+
git verify-pack --verbose "$IDX" | grep "$HASH"
161+
'
162+
163+
LIB_HTTPD_PORT=12345 # default port, 410, cannot be used as non-root
164+
. "$TEST_DIRECTORY"/lib-httpd.sh
165+
start_httpd
166+
167+
test_expect_success 'fetching of missing objects from an HTTP server' '
168+
rm -rf repo &&
169+
SERVER="$HTTPD_DOCUMENT_ROOT_PATH/server" &&
170+
test_create_repo "$SERVER" &&
171+
test_commit -C "$SERVER" foo &&
172+
git -C "$SERVER" repack -a -d --write-bitmap-index &&
173+
174+
git clone $HTTPD_URL/smart/server repo &&
175+
HASH=$(git -C repo rev-parse foo) &&
176+
rm -rf repo/.git/objects/* &&
177+
178+
git -C repo config core.repositoryformatversion 1 &&
179+
git -C repo config extensions.partialclone "origin" &&
180+
git -C repo cat-file -p "$HASH" &&
181+
182+
# Ensure that the .promisor file is written, and check that its
183+
# associated packfile contains the object
184+
ls repo/.git/objects/pack/pack-*.promisor >promisorlist &&
185+
test_line_count = 1 promisorlist &&
186+
IDX=$(cat promisorlist | sed "s/promisor$/idx/") &&
187+
git verify-pack --verbose "$IDX" | grep "$HASH"
188+
'
189+
190+
stop_httpd
191+
141192
test_done

0 commit comments

Comments
 (0)