Skip to content

Commit 1eced8b

Browse files
committed
Merge branch 'bc/sha-256-part-1-of-4' into pu
SHA-256 transition continues. Tentatively I dropped the bits about signed tag, as it depended on a reverted change to the gpg interface API. * bc/sha-256-part-1-of-4: (22 commits) fast-import: add options for rewriting submodules fast-import: add a generic function to iterate over marks fast-import: make find_marks work on any mark set fast-import: add helper function for inserting mark object entries fast-import: permit reading multiple marks files commit: use expected signature header for SHA-256 worktree: allow repository version 1 init-db: move writing repo version into a function builtin/init-db: add environment variable for new repo hash builtin/init-db: allow specifying hash algorithm on command line setup: allow check_repository_format to read repository format t/helper: make repository tests hash independent t/helper: initialize repository if necessary t/helper/test-dump-split-index: initialize git repository t6300: make hash algorithm independent t6300: abstract away SHA-1-specific constants t: use hash-specific lookup tables to define test constants repository: require a build flag to use SHA-256 hex: add functions to parse hex object IDs in any algorithm hex: introduce parsing variants taking hash algorithms ...
2 parents 2b4bed0 + 1bdca81 commit 1eced8b

28 files changed

+623
-141
lines changed

Documentation/git-fast-import.txt

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,26 @@ Locations of Marks Files
122122
Relative and non-relative marks may be combined by interweaving
123123
--(no-)-relative-marks with the --(import|export)-marks= options.
124124

125+
Submodule Rewriting
126+
~~~~~~~~~~~~~~~~~~~
127+
128+
--rewrite-submodules-from=<name>:<file>::
129+
--rewrite-submodules-to=<name>:<file>::
130+
Rewrite the object IDs for the submodule specified by <name> from the values
131+
used in the from <file> to those used in the to <file>. The from marks should
132+
have been created by `git fast-export`, and the to marks should have been
133+
created by `git fast-import` when importing that same submodule.
134+
+
135+
<name> may be any arbitrary string not containing a colon character, but the
136+
same value must be used with both options when specifying corresponding marks.
137+
Multiple submodules may be specified with different values for <name>. It is an
138+
error not to use these options in corresponding pairs.
139+
+
140+
These options are primarily useful when converting a repository from one hash
141+
algorithm to another; without them, fast-import will fail if it encounters a
142+
submodule because it has no way of writing the object ID into the new hash
143+
algorithm.
144+
125145
Performance and Compression Tuning
126146
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
127147

Documentation/git-init.txt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ SYNOPSIS
1010
--------
1111
[verse]
1212
'git init' [-q | --quiet] [--bare] [--template=<template_directory>]
13-
[--separate-git-dir <git dir>]
13+
[--separate-git-dir <git dir>] [--object-format=<format]
1414
[--shared[=<permissions>]] [directory]
1515

1616

@@ -48,6 +48,11 @@ Only print error and warning messages; all other output will be suppressed.
4848
Create a bare repository. If `GIT_DIR` environment is not set, it is set to the
4949
current working directory.
5050

51+
--object-format=<format>::
52+
53+
Specify the given object format (hash algorithm) for the repository. The valid
54+
values are 'sha1' and (if enabled) 'sha256'. 'sha1' is the default.
55+
5156
--template=<template_directory>::
5257

5358
Specify the directory from which templates will be used. (See the "TEMPLATE

Documentation/git.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -493,6 +493,12 @@ double-quotes and respecting backslash escapes. E.g., the value
493493
details. This variable has lower precedence than other path
494494
variables such as GIT_INDEX_FILE, GIT_OBJECT_DIRECTORY...
495495

496+
`GIT_DEFAULT_HASH_ALGORITHM`::
497+
If this variable is set, the default hash algorithm for new
498+
repositories will be set to this value. This value is currently
499+
ignored when cloning; the setting of the remote repository
500+
is used instead. The default is "sha1".
501+
496502
Git Commits
497503
~~~~~~~~~~~
498504
`GIT_AUTHOR_NAME`::

builtin/clone.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1110,7 +1110,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
11101110
}
11111111
}
11121112

1113-
init_db(git_dir, real_git_dir, option_template, INIT_DB_QUIET);
1113+
init_db(git_dir, real_git_dir, option_template, GIT_HASH_UNKNOWN, INIT_DB_QUIET);
11141114

11151115
if (real_git_dir)
11161116
git_dir = real_git_dir;

builtin/commit.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1667,7 +1667,7 @@ int cmd_commit(int argc, const char **argv, const char *prefix)
16671667
}
16681668

16691669
if (amend) {
1670-
const char *exclude_gpgsig[2] = { "gpgsig", NULL };
1670+
const char *exclude_gpgsig[3] = { "gpgsig", "gpgsig-sha256", NULL };
16711671
extra = read_commit_extra_headers(current_head, exclude_gpgsig);
16721672
} else {
16731673
struct commit_extra_header **tail = &extra;

builtin/init-db.c

Lines changed: 65 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
#define TEST_FILEMODE 1
2121
#endif
2222

23+
#define GIT_DEFAULT_HASH_ENVIRONMENT "GIT_DEFAULT_HASH"
24+
2325
static int init_is_bare_repository = 0;
2426
static int init_shared_repository = -1;
2527
static const char *init_db_template_dir;
@@ -176,13 +178,36 @@ static int needs_work_tree_config(const char *git_dir, const char *work_tree)
176178
return 1;
177179
}
178180

181+
void initialize_repository_version(int hash_algo)
182+
{
183+
char repo_version_string[10];
184+
int repo_version = GIT_REPO_VERSION;
185+
186+
#ifndef ENABLE_SHA256
187+
if (hash_algo != GIT_HASH_SHA1)
188+
die(_("The hash algorithm %s is not supported in this build."), hash_algos[hash_algo].name);
189+
#endif
190+
191+
if (hash_algo != GIT_HASH_SHA1)
192+
repo_version = GIT_REPO_VERSION_READ;
193+
194+
/* This forces creation of new config file */
195+
xsnprintf(repo_version_string, sizeof(repo_version_string),
196+
"%d", repo_version);
197+
git_config_set("core.repositoryformatversion", repo_version_string);
198+
199+
if (hash_algo != GIT_HASH_SHA1)
200+
git_config_set("extensions.objectformat",
201+
hash_algos[hash_algo].name);
202+
}
203+
179204
static int create_default_files(const char *template_path,
180-
const char *original_git_dir)
205+
const char *original_git_dir,
206+
const struct repository_format *fmt)
181207
{
182208
struct stat st1;
183209
struct strbuf buf = STRBUF_INIT;
184210
char *path;
185-
char repo_version_string[10];
186211
char junk[2];
187212
int reinit;
188213
int filemode;
@@ -244,10 +269,7 @@ static int create_default_files(const char *template_path,
244269
exit(1);
245270
}
246271

247-
/* This forces creation of new config file */
248-
xsnprintf(repo_version_string, sizeof(repo_version_string),
249-
"%d", GIT_REPO_VERSION);
250-
git_config_set("core.repositoryformatversion", repo_version_string);
272+
initialize_repository_version(fmt->hash_algo);
251273

252274
/* Check filemode trustability */
253275
path = git_path_buf(&buf, "config");
@@ -340,12 +362,33 @@ static void separate_git_dir(const char *git_dir, const char *git_link)
340362
write_file(git_link, "gitdir: %s", git_dir);
341363
}
342364

365+
static void validate_hash_algorithm(struct repository_format *repo_fmt, int hash)
366+
{
367+
const char *env = getenv(GIT_DEFAULT_HASH_ENVIRONMENT);
368+
/*
369+
* If we already have an initialized repo, don't allow the user to
370+
* specify a different algorithm, as that could cause corruption.
371+
* Otherwise, if the user has specified one on the command line, use it.
372+
*/
373+
if (repo_fmt->version >= 0 && hash != GIT_HASH_UNKNOWN && hash != repo_fmt->hash_algo)
374+
die(_("attempt to reinitialize repository with different hash"));
375+
else if (hash != GIT_HASH_UNKNOWN)
376+
repo_fmt->hash_algo = hash;
377+
else if (env) {
378+
int env_algo = hash_algo_by_name(env);
379+
if (env_algo == GIT_HASH_UNKNOWN)
380+
die(_("unknown hash algorithm '%s'"), env);
381+
repo_fmt->hash_algo = env_algo;
382+
}
383+
}
384+
343385
int init_db(const char *git_dir, const char *real_git_dir,
344-
const char *template_dir, unsigned int flags)
386+
const char *template_dir, int hash, unsigned int flags)
345387
{
346388
int reinit;
347389
int exist_ok = flags & INIT_DB_EXIST_OK;
348390
char *original_git_dir = real_pathdup(git_dir, 1);
391+
struct repository_format repo_fmt = REPOSITORY_FORMAT_INIT;
349392

350393
if (real_git_dir) {
351394
struct stat st;
@@ -378,9 +421,11 @@ int init_db(const char *git_dir, const char *real_git_dir,
378421
* config file, so this will not fail. What we are catching
379422
* is an attempt to reinitialize new repository with an old tool.
380423
*/
381-
check_repository_format();
424+
check_repository_format(&repo_fmt);
382425

383-
reinit = create_default_files(template_dir, original_git_dir);
426+
validate_hash_algorithm(&repo_fmt, hash);
427+
428+
reinit = create_default_files(template_dir, original_git_dir, &repo_fmt);
384429

385430
create_object_directory();
386431

@@ -482,6 +527,8 @@ int cmd_init_db(int argc, const char **argv, const char *prefix)
482527
const char *work_tree;
483528
const char *template_dir = NULL;
484529
unsigned int flags = 0;
530+
const char *object_format = NULL;
531+
int hash_algo = GIT_HASH_UNKNOWN;
485532
const struct option init_db_options[] = {
486533
OPT_STRING(0, "template", &template_dir, N_("template-directory"),
487534
N_("directory from which templates will be used")),
@@ -494,6 +541,8 @@ int cmd_init_db(int argc, const char **argv, const char *prefix)
494541
OPT_BIT('q', "quiet", &flags, N_("be quiet"), INIT_DB_QUIET),
495542
OPT_STRING(0, "separate-git-dir", &real_git_dir, N_("gitdir"),
496543
N_("separate git dir from working tree")),
544+
OPT_STRING(0, "object-format", &object_format, N_("hash"),
545+
N_("specify the hash algorithm to use")),
497546
OPT_END()
498547
};
499548

@@ -546,6 +595,12 @@ int cmd_init_db(int argc, const char **argv, const char *prefix)
546595
free(cwd);
547596
}
548597

598+
if (object_format) {
599+
hash_algo = hash_algo_by_name(object_format);
600+
if (hash_algo == GIT_HASH_UNKNOWN)
601+
die(_("unknown hash algorithm '%s'"), object_format);
602+
}
603+
549604
if (init_shared_repository != -1)
550605
set_shared_repository(init_shared_repository);
551606

@@ -597,5 +652,5 @@ int cmd_init_db(int argc, const char **argv, const char *prefix)
597652
UNLEAK(work_tree);
598653

599654
flags |= INIT_DB_EXIST_OK;
600-
return init_db(git_dir, real_git_dir, template_dir, flags);
655+
return init_db(git_dir, real_git_dir, template_dir, hash_algo, flags);
601656
}

builtin/pack-objects.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -880,7 +880,7 @@ static void write_reused_pack_one(size_t pos, struct hashfile *out,
880880
len = encode_in_pack_object_header(header, sizeof(header),
881881
OBJ_REF_DELTA, size);
882882
hashwrite(out, header, len);
883-
hashwrite(out, base_oid.hash, 20);
883+
hashwrite(out, base_oid.hash, the_hash_algo->rawsz);
884884
copy_pack_data(out, reuse_packfile, w_curs, cur, next - cur);
885885
return;
886886
}

cache.h

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -627,7 +627,9 @@ int path_inside_repo(const char *prefix, const char *path);
627627
#define INIT_DB_EXIST_OK 0x0002
628628

629629
int init_db(const char *git_dir, const char *real_git_dir,
630-
const char *template_dir, unsigned int flags);
630+
const char *template_dir, int hash_algo,
631+
unsigned int flags);
632+
void initialize_repository_version(int hash_algo);
631633

632634
void sanitize_stdfds(void);
633635
int daemonize(void);
@@ -1086,8 +1088,10 @@ int verify_repository_format(const struct repository_format *format,
10861088
* and die if it is a version we don't understand. Generally one would
10871089
* set_git_dir() before calling this, and use it only for "are we in a valid
10881090
* repo?".
1091+
*
1092+
* If successful and fmt is not NULL, fill fmt with data.
10891093
*/
1090-
void check_repository_format(void);
1094+
void check_repository_format(struct repository_format *fmt);
10911095

10921096
#define MTIME_CHANGED 0x0001
10931097
#define CTIME_CHANGED 0x0002
@@ -1479,6 +1483,9 @@ int set_disambiguate_hint_config(const char *var, const char *value);
14791483
int get_sha1_hex(const char *hex, unsigned char *sha1);
14801484
int get_oid_hex(const char *hex, struct object_id *sha1);
14811485

1486+
/* Like get_oid_hex, but for an arbitrary hash algorithm. */
1487+
int get_oid_hex_algop(const char *hex, struct object_id *oid, const struct git_hash_algo *algop);
1488+
14821489
/*
14831490
* Read `len` pairs of hexadecimal digits from `hex` and write the
14841491
* values to `binary` as `len` bytes. Return 0 on success, or -1 if
@@ -1514,6 +1521,20 @@ char *oid_to_hex(const struct object_id *oid); /* same static buffer */
15141521
*/
15151522
int parse_oid_hex(const char *hex, struct object_id *oid, const char **end);
15161523

1524+
/* Like parse_oid_hex, but for an arbitrary hash algorithm. */
1525+
int parse_oid_hex_algop(const char *hex, struct object_id *oid, const char **end,
1526+
const struct git_hash_algo *algo);
1527+
1528+
1529+
/*
1530+
* These functions work like get_oid_hex and parse_oid_hex, but they will parse
1531+
* a hex value for any algorithm. The algorithm is detected based on the length
1532+
* and the algorithm in use is returned. If this is not a hex object ID in any
1533+
* algorithm, returns GIT_HASH_UNKNOWN.
1534+
*/
1535+
int get_oid_hex_any(const char *hex, struct object_id *oid);
1536+
int parse_oid_hex_any(const char *hex, struct object_id *oid, const char **end);
1537+
15171538
/*
15181539
* This reads short-hand syntax that not only evaluates to a commit
15191540
* object name, but also can act as if the end user spelled the name

commit.c

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -972,14 +972,22 @@ struct commit *get_fork_point(const char *refname, struct commit *commit)
972972
return ret;
973973
}
974974

975-
static const char gpg_sig_header[] = "gpgsig";
976-
static const int gpg_sig_header_len = sizeof(gpg_sig_header) - 1;
975+
/*
976+
* Indexed by hash algorithm identifier.
977+
*/
978+
static const char *gpg_sig_headers[] = {
979+
NULL,
980+
"gpgsig",
981+
"gpgsig-sha256",
982+
};
977983

978984
static int do_sign_commit(struct strbuf *buf, const char *keyid)
979985
{
980986
struct strbuf sig = STRBUF_INIT;
981987
int inspos, copypos;
982988
const char *eoh;
989+
const char *gpg_sig_header = gpg_sig_headers[hash_algo_by_ptr(the_hash_algo)];
990+
int gpg_sig_header_len = strlen(gpg_sig_header);
983991

984992
/* find the end of the header */
985993
eoh = strstr(buf->buf, "\n\n");
@@ -1021,6 +1029,8 @@ int parse_signed_commit(const struct commit *commit,
10211029
const char *buffer = get_commit_buffer(commit, &size);
10221030
int in_signature, saw_signature = -1;
10231031
const char *line, *tail;
1032+
const char *gpg_sig_header = gpg_sig_headers[hash_algo_by_ptr(the_hash_algo)];
1033+
int gpg_sig_header_len = strlen(gpg_sig_header);
10241034

10251035
line = buffer;
10261036
tail = buffer + size;
@@ -1067,11 +1077,17 @@ int remove_signature(struct strbuf *buf)
10671077

10681078
if (in_signature && line[0] == ' ')
10691079
sig_end = next;
1070-
else if (starts_with(line, gpg_sig_header) &&
1071-
line[gpg_sig_header_len] == ' ') {
1072-
sig_start = line;
1073-
sig_end = next;
1074-
in_signature = 1;
1080+
else if (starts_with(line, "gpgsig")) {
1081+
int i;
1082+
for (i = 1; i < GIT_HASH_NALGOS; i++) {
1083+
const char *p;
1084+
if (skip_prefix(line, gpg_sig_headers[i], &p) &&
1085+
*p == ' ') {
1086+
sig_start = line;
1087+
sig_end = next;
1088+
in_signature = 1;
1089+
}
1090+
}
10751091
} else {
10761092
if (*line == '\n')
10771093
/* dump the whole remainder of the buffer */

config.mak.dev

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ DEVELOPER_CFLAGS += -Wstrict-prototypes
1616
DEVELOPER_CFLAGS += -Wunused
1717
DEVELOPER_CFLAGS += -Wvla
1818

19+
DEVELOPER_CFLAGS += -DENABLE_SHA256
20+
1921
ifndef COMPILER_FEATURES
2022
COMPILER_FEATURES := $(shell ./detect-compiler $(CC))
2123
endif

csum-file.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ void hashfile_checkpoint(struct hashfile *f, struct hashfile_checkpoint *checkpo
157157
{
158158
hashflush(f);
159159
checkpoint->offset = f->total;
160-
checkpoint->ctx = f->ctx;
160+
the_hash_algo->clone_fn(&checkpoint->ctx, &f->ctx);
161161
}
162162

163163
int hashfile_truncate(struct hashfile *f, struct hashfile_checkpoint *checkpoint)

0 commit comments

Comments
 (0)