Skip to content

Commit e8356e2

Browse files
bk2204gitster
authored andcommitted
fast-import: add options for rewriting submodules
When converting a repository using submodules from one hash algorithm to another, it is necessary to rewrite the submodules from the old algorithm to the new algorithm, since only references to submodules, not their contents, are written to the fast-export stream. Without rewriting the submodules, fast-import fails with an "Invalid dataref" error when encountering a submodule in another algorithm. Add a pair of options, --rewrite-submodules-from and --rewrite-submodules-to, that take a list of marks produced by fast-export and fast-import, respectively, when processing the submodule. Use these marks to map the submodule commits from the old algorithm to the new algorithm. We read marks into two corresponding struct mark_set objects and then perform a mapping from the old to the new using a hash table. This lets us reuse the same mark parsing code that is used elsewhere and allows us to efficiently read and match marks based on their ID, since mark files need not be sorted. Note that because we're using a khash table for the object IDs, and this table copies values of struct object_id instead of taking references to them, it's necessary to zero the struct object_id values that we use to insert and look up in the table. Otherwise, we would end up with SHA-1 values that don't match because of whatever stack garbage might be left in the unused area. Signed-off-by: brian m. carlson <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent f7529e6 commit e8356e2

File tree

3 files changed

+236
-5
lines changed

3 files changed

+236
-5
lines changed

Documentation/git-fast-import.txt

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,26 @@ Locations of Marks Files
122122
Relative and non-relative marks may be combined by interweaving
123123
--(no-)-relative-marks with the --(import|export)-marks= options.
124124

125+
Submodule Rewriting
126+
~~~~~~~~~~~~~~~~~~~
127+
128+
--rewrite-submodules-from=<name>:<file>::
129+
--rewrite-submodules-to=<name>:<file>::
130+
Rewrite the object IDs for the submodule specified by <name> from the values
131+
used in the from <file> to those used in the to <file>. The from marks should
132+
have been created by `git fast-export`, and the to marks should have been
133+
created by `git fast-import` when importing that same submodule.
134+
+
135+
<name> may be any arbitrary string not containing a colon character, but the
136+
same value must be used with both options when specifying corresponding marks.
137+
Multiple submodules may be specified with different values for <name>. It is an
138+
error not to use these options in corresponding pairs.
139+
+
140+
These options are primarily useful when converting a repository from one hash
141+
algorithm to another; without them, fast-import will fail if it encounters a
142+
submodule because it has no way of writing the object ID into the new hash
143+
algorithm.
144+
125145
Performance and Compression Tuning
126146
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
127147

fast-import.c

Lines changed: 107 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "object-store.h"
1919
#include "mem-pool.h"
2020
#include "commit-reach.h"
21+
#include "khash.h"
2122

2223
#define PACK_ID_BITS 16
2324
#define MAX_PACK_ID ((1<<PACK_ID_BITS)-1)
@@ -53,6 +54,7 @@ struct object_entry_pool {
5354

5455
struct mark_set {
5556
union {
57+
struct object_id *oids[1024];
5658
struct object_entry *marked[1024];
5759
struct mark_set *sets[1024];
5860
} data;
@@ -225,6 +227,11 @@ static int allow_unsafe_features;
225227
/* Signal handling */
226228
static volatile sig_atomic_t checkpoint_requested;
227229

230+
/* Submodule marks */
231+
static struct string_list sub_marks_from = STRING_LIST_INIT_DUP;
232+
static struct string_list sub_marks_to = STRING_LIST_INIT_DUP;
233+
static kh_oid_map_t *sub_oid_map;
234+
228235
/* Where to write output of cat-blob commands */
229236
static int cat_blob_fd = STDOUT_FILENO;
230237

@@ -1731,6 +1738,11 @@ static void insert_object_entry(struct mark_set *s, struct object_id *oid, uintm
17311738
insert_mark(s, mark, e);
17321739
}
17331740

1741+
static void insert_oid_entry(struct mark_set *s, struct object_id *oid, uintmax_t mark)
1742+
{
1743+
insert_mark(s, mark, xmemdupz(oid, sizeof(*oid)));
1744+
}
1745+
17341746
static void read_mark_file(struct mark_set *s, FILE *f, mark_set_inserter_t inserter)
17351747
{
17361748
char line[512];
@@ -1739,13 +1751,17 @@ static void read_mark_file(struct mark_set *s, FILE *f, mark_set_inserter_t inse
17391751
char *end;
17401752
struct object_id oid;
17411753

1754+
/* Ensure SHA-1 objects are padded with zeros. */
1755+
memset(oid.hash, 0, sizeof(oid.hash));
1756+
17421757
end = strchr(line, '\n');
17431758
if (line[0] != ':' || !end)
17441759
die("corrupt mark line: %s", line);
17451760
*end = 0;
17461761
mark = strtoumax(line + 1, &end, 10);
17471762
if (!mark || end == line + 1
1748-
|| *end != ' ' || get_oid_hex(end + 1, &oid))
1763+
|| *end != ' '
1764+
|| get_oid_hex_any(end + 1, &oid) == GIT_HASH_UNKNOWN)
17491765
die("corrupt mark line: %s", line);
17501766
inserter(s, &oid, mark);
17511767
}
@@ -2146,6 +2162,30 @@ static uintmax_t change_note_fanout(struct tree_entry *root,
21462162
return do_change_note_fanout(root, root, hex_oid, 0, path, 0, fanout);
21472163
}
21482164

2165+
static int parse_mapped_oid_hex(const char *hex, struct object_id *oid, const char **end)
2166+
{
2167+
int algo;
2168+
khiter_t it;
2169+
2170+
/* Make SHA-1 object IDs have all-zero padding. */
2171+
memset(oid->hash, 0, sizeof(oid->hash));
2172+
2173+
algo = parse_oid_hex_any(hex, oid, end);
2174+
if (algo == GIT_HASH_UNKNOWN)
2175+
return -1;
2176+
2177+
it = kh_get_oid_map(sub_oid_map, *oid);
2178+
/* No such object? */
2179+
if (it == kh_end(sub_oid_map)) {
2180+
/* If we're using the same algorithm, pass it through. */
2181+
if (hash_algos[algo].format_id == the_hash_algo->format_id)
2182+
return 0;
2183+
return -1;
2184+
}
2185+
oidcpy(oid, kh_value(sub_oid_map, it));
2186+
return 0;
2187+
}
2188+
21492189
/*
21502190
* Given a pointer into a string, parse a mark reference:
21512191
*
@@ -2232,7 +2272,7 @@ static void file_change_m(const char *p, struct branch *b)
22322272
inline_data = 1;
22332273
oe = NULL; /* not used with inline_data, but makes gcc happy */
22342274
} else {
2235-
if (parse_oid_hex(p, &oid, &p))
2275+
if (parse_mapped_oid_hex(p, &oid, &p))
22362276
die("Invalid dataref: %s", command_buf.buf);
22372277
oe = find_object(&oid);
22382278
if (*p++ != ' ')
@@ -2406,7 +2446,7 @@ static void note_change_n(const char *p, struct branch *b, unsigned char *old_fa
24062446
inline_data = 1;
24072447
oe = NULL; /* not used with inline_data, but makes gcc happy */
24082448
} else {
2409-
if (parse_oid_hex(p, &oid, &p))
2449+
if (parse_mapped_oid_hex(p, &oid, &p))
24102450
die("Invalid dataref: %s", command_buf.buf);
24112451
oe = find_object(&oid);
24122452
if (*p++ != ' ')
@@ -2941,7 +2981,7 @@ static void parse_cat_blob(const char *p)
29412981
die("Unknown mark: %s", command_buf.buf);
29422982
oidcpy(&oid, &oe->idx.oid);
29432983
} else {
2944-
if (parse_oid_hex(p, &oid, &p))
2984+
if (parse_mapped_oid_hex(p, &oid, &p))
29452985
die("Invalid dataref: %s", command_buf.buf);
29462986
if (*p)
29472987
die("Garbage after SHA1: %s", command_buf.buf);
@@ -3005,6 +3045,42 @@ static struct object_entry *dereference(struct object_entry *oe,
30053045
return find_object(oid);
30063046
}
30073047

3048+
static void insert_mapped_mark(uintmax_t mark, void *object, void *cbp)
3049+
{
3050+
struct object_id *fromoid = object;
3051+
struct object_id *tooid = find_mark(cbp, mark);
3052+
int ret;
3053+
khiter_t it;
3054+
3055+
it = kh_put_oid_map(sub_oid_map, *fromoid, &ret);
3056+
/* We've already seen this object. */
3057+
if (ret == 0)
3058+
return;
3059+
kh_value(sub_oid_map, it) = tooid;
3060+
}
3061+
3062+
static void build_mark_map_one(struct mark_set *from, struct mark_set *to)
3063+
{
3064+
for_each_mark(from, 0, insert_mapped_mark, to);
3065+
}
3066+
3067+
static void build_mark_map(struct string_list *from, struct string_list *to)
3068+
{
3069+
struct string_list_item *fromp, *top;
3070+
3071+
sub_oid_map = kh_init_oid_map();
3072+
3073+
for_each_string_list_item(fromp, from) {
3074+
top = string_list_lookup(to, fromp->string);
3075+
if (!fromp->util) {
3076+
die(_("Missing from marks for submodule '%s'"), fromp->string);
3077+
} else if (!top || !top->util) {
3078+
die(_("Missing to marks for submodule '%s'"), fromp->string);
3079+
}
3080+
build_mark_map_one(fromp->util, top->util);
3081+
}
3082+
}
3083+
30083084
static struct object_entry *parse_treeish_dataref(const char **p)
30093085
{
30103086
struct object_id oid;
@@ -3016,7 +3092,7 @@ static struct object_entry *parse_treeish_dataref(const char **p)
30163092
die("Unknown mark: %s", command_buf.buf);
30173093
oidcpy(&oid, &e->idx.oid);
30183094
} else { /* <sha1> */
3019-
if (parse_oid_hex(*p, &oid, p))
3095+
if (parse_mapped_oid_hex(*p, &oid, p))
30203096
die("Invalid dataref: %s", command_buf.buf);
30213097
e = find_object(&oid);
30223098
if (*(*p)++ != ' ')
@@ -3222,6 +3298,26 @@ static void option_export_pack_edges(const char *edges)
32223298
pack_edges = xfopen(edges, "a");
32233299
}
32243300

3301+
static void option_rewrite_submodules(const char *arg, struct string_list *list)
3302+
{
3303+
struct mark_set *ms;
3304+
FILE *fp;
3305+
char *s = xstrdup(arg);
3306+
char *f = strchr(s, ':');
3307+
if (!f)
3308+
die(_("Expected format name:filename for submodule rewrite option"));
3309+
*f = '\0';
3310+
f++;
3311+
ms = xcalloc(1, sizeof(*ms));
3312+
string_list_insert(list, s)->util = ms;
3313+
3314+
fp = fopen(f, "r");
3315+
if (!fp)
3316+
die_errno("cannot read '%s'", f);
3317+
read_mark_file(ms, fp, insert_oid_entry);
3318+
fclose(fp);
3319+
}
3320+
32253321
static int parse_one_option(const char *option)
32263322
{
32273323
if (skip_prefix(option, "max-pack-size=", &option)) {
@@ -3284,6 +3380,11 @@ static int parse_one_feature(const char *feature, int from_stream)
32843380
option_export_marks(arg);
32853381
} else if (!strcmp(feature, "alias")) {
32863382
; /* Don't die - this feature is supported */
3383+
} else if (skip_prefix(feature, "rewrite-submodules-to=", &arg)) {
3384+
option_rewrite_submodules(arg, &sub_marks_to);
3385+
} else if (skip_prefix(feature, "rewrite-submodules-from=", &arg)) {
3386+
option_rewrite_submodules(arg, &sub_marks_from);
3387+
} else if (skip_prefix(feature, "rewrite-submodules-from=", &arg)) {
32873388
} else if (!strcmp(feature, "get-mark")) {
32883389
; /* Don't die - this feature is supported */
32893390
} else if (!strcmp(feature, "cat-blob")) {
@@ -3389,6 +3490,7 @@ static void parse_argv(void)
33893490
seen_data_command = 1;
33903491
if (import_marks_file)
33913492
read_marks();
3493+
build_mark_map(&sub_marks_from, &sub_marks_to);
33923494
}
33933495

33943496
int cmd_main(int argc, const char **argv)

t/t9300-fast-import.sh

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3382,4 +3382,113 @@ test_expect_success 'X: handling encoding' '
33823382
git log -1 --format=%B encoding | grep $(printf "\317\200")
33833383
'
33843384

3385+
###
3386+
### series Y (submodules and hash algorithms)
3387+
###
3388+
3389+
cat >Y-sub-input <<\Y_INPUT_END
3390+
blob
3391+
mark :1
3392+
data 4
3393+
foo
3394+
3395+
reset refs/heads/master
3396+
commit refs/heads/master
3397+
mark :2
3398+
author Full Name <[email protected]> 1000000000 +0100
3399+
committer Full Name <[email protected]> 1000000000 +0100
3400+
data 24
3401+
Test submodule commit 1
3402+
M 100644 :1 file
3403+
3404+
blob
3405+
mark :3
3406+
data 8
3407+
foo
3408+
bar
3409+
3410+
commit refs/heads/master
3411+
mark :4
3412+
author Full Name <[email protected]> 1000000001 +0100
3413+
committer Full Name <[email protected]> 1000000001 +0100
3414+
data 24
3415+
Test submodule commit 2
3416+
from :2
3417+
M 100644 :3 file
3418+
Y_INPUT_END
3419+
3420+
# Note that the submodule object IDs are intentionally not translated.
3421+
cat >Y-main-input <<\Y_INPUT_END
3422+
blob
3423+
mark :1
3424+
data 4
3425+
foo
3426+
3427+
reset refs/heads/master
3428+
commit refs/heads/master
3429+
mark :2
3430+
author Full Name <[email protected]> 2000000000 +0100
3431+
committer Full Name <[email protected]> 2000000000 +0100
3432+
data 14
3433+
Test commit 1
3434+
M 100644 :1 file
3435+
3436+
blob
3437+
mark :3
3438+
data 73
3439+
[submodule "sub1"]
3440+
path = sub1
3441+
url = https://void.example.com/main.git
3442+
3443+
commit refs/heads/master
3444+
mark :4
3445+
author Full Name <[email protected]> 2000000001 +0100
3446+
committer Full Name <[email protected]> 2000000001 +0100
3447+
data 14
3448+
Test commit 2
3449+
from :2
3450+
M 100644 :3 .gitmodules
3451+
M 160000 0712c5be7cf681388e355ef47525aaf23aee1a6d sub1
3452+
3453+
blob
3454+
mark :5
3455+
data 8
3456+
foo
3457+
bar
3458+
3459+
commit refs/heads/master
3460+
mark :6
3461+
author Full Name <[email protected]> 2000000002 +0100
3462+
committer Full Name <[email protected]> 2000000002 +0100
3463+
data 14
3464+
Test commit 3
3465+
from :4
3466+
M 100644 :5 file
3467+
M 160000 ff729f5e62f72c0c3978207d9a80e5f3a65f14d7 sub1
3468+
Y_INPUT_END
3469+
3470+
cat >Y-marks <<\Y_INPUT_END
3471+
:2 0712c5be7cf681388e355ef47525aaf23aee1a6d
3472+
:4 ff729f5e62f72c0c3978207d9a80e5f3a65f14d7
3473+
Y_INPUT_END
3474+
3475+
test_expect_success 'Y: setup' '
3476+
test_oid_cache <<-EOF
3477+
Ymaster sha1:9afed2f9161ddf416c0a1863b8b0725b00070504
3478+
Ymaster sha256:c0a1010da1df187b2e287654793df01b464bd6f8e3f17fc1481a7dadf84caee3
3479+
EOF
3480+
'
3481+
3482+
test_expect_success 'Y: rewrite submodules' '
3483+
git init main1 &&
3484+
(
3485+
cd main1 &&
3486+
git init sub2 &&
3487+
git -C sub2 fast-import --export-marks=../sub2-marks <../Y-sub-input &&
3488+
git fast-import --rewrite-submodules-from=sub:../Y-marks \
3489+
--rewrite-submodules-to=sub:sub2-marks <../Y-main-input &&
3490+
test "$(git rev-parse master)" = "$(test_oid Ymaster)"
3491+
)
3492+
'
3493+
33853494
test_done

0 commit comments

Comments
 (0)