Skip to content

Commit d42a2fb

Browse files
newrengitster
authored andcommitted
fast-import: add new --date-format=raw-permissive format
There are multiple repositories in the wild with random, invalid timezones. Most notably is a commit from rails.git with a timezone of "+051800"[1]. A few searches will find other repos with that same invalid timezone as well. Further, Peff reports that GitHub relaxed their fsck checks in August 2011 to accept any timezone value[2], and there have been multiple reports to filter-repo about fast-import crashing while trying to import their existing repositories since they had timezone values such as "-7349423" and "-43455309"[3]. The existing check on timezone values inside fast-import may prove useful for people who are crafting fast-import input by hand or with a new script. For them, the check may help them avoid accidentally recording invalid dates. (Note that this check is rather simplistic and there are still several forms of invalid dates that fast-import does not check for: dates in the future, timezone values with minutes that are not divisible by 15, and timezone values with minutes that are 60 or greater.) While this simple check may have some value for those users, other users or tools will want to import existing repositories as-is. Provide a --date-format=raw-permissive format that will not error out on these otherwise invalid timezones so that such existing repositories can be imported. [1] rails/rails@4cf9497 [2] https://lore.kernel.org/git/[email protected]/ [3] newren/git-filter-repo#88 Signed-off-by: Elijah Newren <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent af6b65d commit d42a2fb

File tree

3 files changed

+57
-5
lines changed

3 files changed

+57
-5
lines changed

Documentation/git-fast-import.txt

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,14 @@ by users who are located in the same location and time zone. In this
273273
case a reasonable offset from UTC could be assumed.
274274
+
275275
Unlike the `rfc2822` format, this format is very strict. Any
276-
variation in formatting will cause fast-import to reject the value.
276+
variation in formatting will cause fast-import to reject the value,
277+
and some sanity checks on the numeric values may also be performed.
278+
279+
`raw-permissive`::
280+
This is the same as `raw` except that no sanity checks on
281+
the numeric epoch and local offset are performed. This can
282+
be useful when trying to filter or import an existing history
283+
with e.g. bogus timezone values.
277284

278285
`rfc2822`::
279286
This is the standard email format as described by RFC 2822.

fast-import.c

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ struct hash_list {
121121

122122
typedef enum {
123123
WHENSPEC_RAW = 1,
124+
WHENSPEC_RAW_PERMISSIVE,
124125
WHENSPEC_RFC2822,
125126
WHENSPEC_NOW
126127
} whenspec_type;
@@ -1874,7 +1875,7 @@ static int parse_data(struct strbuf *sb, uintmax_t limit, uintmax_t *len_res)
18741875
return 1;
18751876
}
18761877

1877-
static int validate_raw_date(const char *src, struct strbuf *result)
1878+
static int validate_raw_date(const char *src, struct strbuf *result, int strict)
18781879
{
18791880
const char *orig_src = src;
18801881
char *endp;
@@ -1883,7 +1884,11 @@ static int validate_raw_date(const char *src, struct strbuf *result)
18831884
errno = 0;
18841885

18851886
num = strtoul(src, &endp, 10);
1886-
/* NEEDSWORK: perhaps check for reasonable values? */
1887+
/*
1888+
* NEEDSWORK: perhaps check for reasonable values? For example, we
1889+
* could error on values representing times more than a
1890+
* day in the future.
1891+
*/
18871892
if (errno || endp == src || *endp != ' ')
18881893
return -1;
18891894

@@ -1892,7 +1897,13 @@ static int validate_raw_date(const char *src, struct strbuf *result)
18921897
return -1;
18931898

18941899
num = strtoul(src + 1, &endp, 10);
1895-
if (errno || endp == src + 1 || *endp || 1400 < num)
1900+
/*
1901+
* NEEDSWORK: check for brokenness other than num > 1400, such as
1902+
* (num % 100) >= 60, or ((num % 100) % 15) != 0 ?
1903+
*/
1904+
if (errno || endp == src + 1 || *endp || /* did not parse */
1905+
(strict && (1400 < num)) /* parsed a broken timezone */
1906+
)
18961907
return -1;
18971908

18981909
strbuf_addstr(result, orig_src);
@@ -1926,7 +1937,11 @@ static char *parse_ident(const char *buf)
19261937

19271938
switch (whenspec) {
19281939
case WHENSPEC_RAW:
1929-
if (validate_raw_date(ltgt, &ident) < 0)
1940+
if (validate_raw_date(ltgt, &ident, 1) < 0)
1941+
die("Invalid raw date \"%s\" in ident: %s", ltgt, buf);
1942+
break;
1943+
case WHENSPEC_RAW_PERMISSIVE:
1944+
if (validate_raw_date(ltgt, &ident, 0) < 0)
19301945
die("Invalid raw date \"%s\" in ident: %s", ltgt, buf);
19311946
break;
19321947
case WHENSPEC_RFC2822:
@@ -3161,6 +3176,8 @@ static void option_date_format(const char *fmt)
31613176
{
31623177
if (!strcmp(fmt, "raw"))
31633178
whenspec = WHENSPEC_RAW;
3179+
else if (!strcmp(fmt, "raw-permissive"))
3180+
whenspec = WHENSPEC_RAW_PERMISSIVE;
31643181
else if (!strcmp(fmt, "rfc2822"))
31653182
whenspec = WHENSPEC_RFC2822;
31663183
else if (!strcmp(fmt, "now"))

t/t9300-fast-import.sh

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,34 @@ test_expect_success 'B: accept empty committer' '
410410
test -z "$out"
411411
'
412412

413+
test_expect_success 'B: reject invalid timezone' '
414+
cat >input <<-INPUT_END &&
415+
commit refs/heads/invalid-timezone
416+
committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> 1234567890 +051800
417+
data <<COMMIT
418+
empty commit
419+
COMMIT
420+
INPUT_END
421+
422+
test_when_finished "git update-ref -d refs/heads/invalid-timezone" &&
423+
test_must_fail git fast-import <input
424+
'
425+
426+
test_expect_success 'B: accept invalid timezone with raw-permissive' '
427+
cat >input <<-INPUT_END &&
428+
commit refs/heads/invalid-timezone
429+
committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> 1234567890 +051800
430+
data <<COMMIT
431+
empty commit
432+
COMMIT
433+
INPUT_END
434+
435+
git init invalid-timezone &&
436+
git -C invalid-timezone fast-import --date-format=raw-permissive <input &&
437+
git -C invalid-timezone cat-file -p invalid-timezone >out &&
438+
grep "1234567890 [+]051800" out
439+
'
440+
413441
test_expect_success 'B: accept and fixup committer with no name' '
414442
cat >input <<-INPUT_END &&
415443
commit refs/heads/empty-committer-2

0 commit comments

Comments
 (0)