@@ -280,6 +280,7 @@ struct recent_command
280
280
/* Configured limits on output */
281
281
static unsigned long max_depth = 10 ;
282
282
static off_t max_packsize = (1LL << 32 ) - 1 ;
283
+ static uintmax_t big_file_threshold = 512 * 1024 * 1024 ;
283
284
static int force_update ;
284
285
static int pack_compression_level = Z_DEFAULT_COMPRESSION ;
285
286
static int pack_compression_seen ;
@@ -1003,7 +1004,7 @@ static void cycle_packfile(void)
1003
1004
1004
1005
static size_t encode_header (
1005
1006
enum object_type type ,
1006
- size_t size ,
1007
+ uintmax_t size ,
1007
1008
unsigned char * hdr )
1008
1009
{
1009
1010
int n = 1 ;
@@ -1159,6 +1160,118 @@ static int store_object(
1159
1160
return 0 ;
1160
1161
}
1161
1162
1163
+ static void truncate_pack (off_t to )
1164
+ {
1165
+ if (ftruncate (pack_data -> pack_fd , to )
1166
+ || lseek (pack_data -> pack_fd , to , SEEK_SET ) != to )
1167
+ die_errno ("cannot truncate pack to skip duplicate" );
1168
+ pack_size = to ;
1169
+ }
1170
+
1171
+ static void stream_blob (uintmax_t len , unsigned char * sha1out , uintmax_t mark )
1172
+ {
1173
+ size_t in_sz = 64 * 1024 , out_sz = 64 * 1024 ;
1174
+ unsigned char * in_buf = xmalloc (in_sz );
1175
+ unsigned char * out_buf = xmalloc (out_sz );
1176
+ struct object_entry * e ;
1177
+ unsigned char sha1 [20 ];
1178
+ unsigned long hdrlen ;
1179
+ off_t offset ;
1180
+ git_SHA_CTX c ;
1181
+ z_stream s ;
1182
+ int status = Z_OK ;
1183
+
1184
+ /* Determine if we should auto-checkpoint. */
1185
+ if ((pack_size + 60 + len ) > max_packsize
1186
+ || (pack_size + 60 + len ) < pack_size )
1187
+ cycle_packfile ();
1188
+
1189
+ offset = pack_size ;
1190
+
1191
+ hdrlen = snprintf ((char * )out_buf , out_sz , "blob %" PRIuMAX , len ) + 1 ;
1192
+ if (out_sz <= hdrlen )
1193
+ die ("impossibly large object header" );
1194
+
1195
+ git_SHA1_Init (& c );
1196
+ git_SHA1_Update (& c , out_buf , hdrlen );
1197
+
1198
+ memset (& s , 0 , sizeof (s ));
1199
+ deflateInit (& s , pack_compression_level );
1200
+
1201
+ hdrlen = encode_header (OBJ_BLOB , len , out_buf );
1202
+ if (out_sz <= hdrlen )
1203
+ die ("impossibly large object header" );
1204
+
1205
+ s .next_out = out_buf + hdrlen ;
1206
+ s .avail_out = out_sz - hdrlen ;
1207
+
1208
+ while (status != Z_STREAM_END ) {
1209
+ if (0 < len && !s .avail_in ) {
1210
+ size_t cnt = in_sz < len ? in_sz : (size_t )len ;
1211
+ size_t n = fread (in_buf , 1 , cnt , stdin );
1212
+ if (!n && feof (stdin ))
1213
+ die ("EOF in data (%" PRIuMAX " bytes remaining)" , len );
1214
+
1215
+ git_SHA1_Update (& c , in_buf , n );
1216
+ s .next_in = in_buf ;
1217
+ s .avail_in = n ;
1218
+ len -= n ;
1219
+ }
1220
+
1221
+ status = deflate (& s , len ? 0 : Z_FINISH );
1222
+
1223
+ if (!s .avail_out || status == Z_STREAM_END ) {
1224
+ size_t n = s .next_out - out_buf ;
1225
+ write_or_die (pack_data -> pack_fd , out_buf , n );
1226
+ pack_size += n ;
1227
+ s .next_out = out_buf ;
1228
+ s .avail_out = out_sz ;
1229
+ }
1230
+
1231
+ switch (status ) {
1232
+ case Z_OK :
1233
+ case Z_BUF_ERROR :
1234
+ case Z_STREAM_END :
1235
+ continue ;
1236
+ default :
1237
+ die ("unexpected deflate failure: %d" , status );
1238
+ }
1239
+ }
1240
+ deflateEnd (& s );
1241
+ git_SHA1_Final (sha1 , & c );
1242
+
1243
+ if (sha1out )
1244
+ hashcpy (sha1out , sha1 );
1245
+
1246
+ e = insert_object (sha1 );
1247
+
1248
+ if (mark )
1249
+ insert_mark (mark , e );
1250
+
1251
+ if (e -> offset ) {
1252
+ duplicate_count_by_type [OBJ_BLOB ]++ ;
1253
+ truncate_pack (offset );
1254
+
1255
+ } else if (find_sha1_pack (sha1 , packed_git )) {
1256
+ e -> type = OBJ_BLOB ;
1257
+ e -> pack_id = MAX_PACK_ID ;
1258
+ e -> offset = 1 ; /* just not zero! */
1259
+ duplicate_count_by_type [OBJ_BLOB ]++ ;
1260
+ truncate_pack (offset );
1261
+
1262
+ } else {
1263
+ e -> depth = 0 ;
1264
+ e -> type = OBJ_BLOB ;
1265
+ e -> pack_id = pack_id ;
1266
+ e -> offset = offset ;
1267
+ object_count ++ ;
1268
+ object_count_by_type [OBJ_BLOB ]++ ;
1269
+ }
1270
+
1271
+ free (in_buf );
1272
+ free (out_buf );
1273
+ }
1274
+
1162
1275
/* All calls must be guarded by find_object() or find_mark() to
1163
1276
* ensure the 'struct object_entry' passed was written by this
1164
1277
* process instance. We unpack the entry by the offset, avoiding
@@ -1704,7 +1817,7 @@ static void parse_mark(void)
1704
1817
next_mark = 0 ;
1705
1818
}
1706
1819
1707
- static void parse_data (struct strbuf * sb )
1820
+ static int parse_data (struct strbuf * sb , uintmax_t limit , uintmax_t * len_res )
1708
1821
{
1709
1822
strbuf_reset (sb );
1710
1823
@@ -1728,9 +1841,15 @@ static void parse_data(struct strbuf *sb)
1728
1841
free (term );
1729
1842
}
1730
1843
else {
1731
- size_t n = 0 , length ;
1844
+ uintmax_t len = strtoumax (command_buf .buf + 5 , NULL , 10 );
1845
+ size_t n = 0 , length = (size_t )len ;
1732
1846
1733
- length = strtoul (command_buf .buf + 5 , NULL , 10 );
1847
+ if (limit && limit < len ) {
1848
+ * len_res = len ;
1849
+ return 0 ;
1850
+ }
1851
+ if (length < len )
1852
+ die ("data is too large to use in this context" );
1734
1853
1735
1854
while (n < length ) {
1736
1855
size_t s = strbuf_fread (sb , length - n , stdin );
@@ -1742,6 +1861,7 @@ static void parse_data(struct strbuf *sb)
1742
1861
}
1743
1862
1744
1863
skip_optional_lf ();
1864
+ return 1 ;
1745
1865
}
1746
1866
1747
1867
static int validate_raw_date (const char * src , char * result , int maxlen )
@@ -1806,14 +1926,32 @@ static char *parse_ident(const char *buf)
1806
1926
return ident ;
1807
1927
}
1808
1928
1809
- static void parse_new_blob (void )
1929
+ static void parse_and_store_blob (
1930
+ struct last_object * last ,
1931
+ unsigned char * sha1out ,
1932
+ uintmax_t mark )
1810
1933
{
1811
1934
static struct strbuf buf = STRBUF_INIT ;
1935
+ uintmax_t len ;
1812
1936
1937
+ if (parse_data (& buf , big_file_threshold , & len ))
1938
+ store_object (OBJ_BLOB , & buf , last , sha1out , mark );
1939
+ else {
1940
+ if (last ) {
1941
+ strbuf_release (& last -> data );
1942
+ last -> offset = 0 ;
1943
+ last -> depth = 0 ;
1944
+ }
1945
+ stream_blob (len , sha1out , mark );
1946
+ skip_optional_lf ();
1947
+ }
1948
+ }
1949
+
1950
+ static void parse_new_blob (void )
1951
+ {
1813
1952
read_next_command ();
1814
1953
parse_mark ();
1815
- parse_data (& buf );
1816
- store_object (OBJ_BLOB , & buf , & last_blob , NULL , next_mark );
1954
+ parse_and_store_blob (& last_blob , NULL , next_mark );
1817
1955
}
1818
1956
1819
1957
static void unload_one_branch (void )
@@ -1924,15 +2062,12 @@ static void file_change_m(struct branch *b)
1924
2062
* another repository.
1925
2063
*/
1926
2064
} else if (inline_data ) {
1927
- static struct strbuf buf = STRBUF_INIT ;
1928
-
1929
2065
if (p != uq .buf ) {
1930
2066
strbuf_addstr (& uq , p );
1931
2067
p = uq .buf ;
1932
2068
}
1933
2069
read_next_command ();
1934
- parse_data (& buf );
1935
- store_object (OBJ_BLOB , & buf , & last_blob , sha1 , 0 );
2070
+ parse_and_store_blob (& last_blob , sha1 , 0 );
1936
2071
} else if (oe ) {
1937
2072
if (oe -> type != OBJ_BLOB )
1938
2073
die ("Not a blob (actually a %s): %s" ,
@@ -2058,15 +2193,12 @@ static void note_change_n(struct branch *b)
2058
2193
die ("Invalid ref name or SHA1 expression: %s" , p );
2059
2194
2060
2195
if (inline_data ) {
2061
- static struct strbuf buf = STRBUF_INIT ;
2062
-
2063
2196
if (p != uq .buf ) {
2064
2197
strbuf_addstr (& uq , p );
2065
2198
p = uq .buf ;
2066
2199
}
2067
2200
read_next_command ();
2068
- parse_data (& buf );
2069
- store_object (OBJ_BLOB , & buf , & last_blob , sha1 , 0 );
2201
+ parse_and_store_blob (& last_blob , sha1 , 0 );
2070
2202
} else if (oe ) {
2071
2203
if (oe -> type != OBJ_BLOB )
2072
2204
die ("Not a blob (actually a %s): %s" ,
@@ -2232,7 +2364,7 @@ static void parse_new_commit(void)
2232
2364
}
2233
2365
if (!committer )
2234
2366
die ("Expected committer but didn't get one" );
2235
- parse_data (& msg );
2367
+ parse_data (& msg , 0 , NULL );
2236
2368
read_next_command ();
2237
2369
parse_from (b );
2238
2370
merge_list = parse_merge (& merge_count );
@@ -2353,7 +2485,7 @@ static void parse_new_tag(void)
2353
2485
tagger = NULL ;
2354
2486
2355
2487
/* tag payload/message */
2356
- parse_data (& msg );
2488
+ parse_data (& msg , 0 , NULL );
2357
2489
2358
2490
/* build the tag object */
2359
2491
strbuf_reset (& new_data );
@@ -2473,11 +2605,15 @@ static int git_pack_config(const char *k, const char *v, void *cb)
2473
2605
pack_compression_seen = 1 ;
2474
2606
return 0 ;
2475
2607
}
2608
+ if (!strcmp (k , "core.bigfilethreshold" )) {
2609
+ long n = git_config_int (k , v );
2610
+ big_file_threshold = 0 < n ? n : 0 ;
2611
+ }
2476
2612
return git_default_config (k , v , cb );
2477
2613
}
2478
2614
2479
2615
static const char fast_import_usage [] =
2480
- "git fast-import [--date-format=f] [--max-pack-size=n] [--depth=n] [--active-branches=n] [--export-marks=marks.file]" ;
2616
+ "git fast-import [--date-format=f] [--max-pack-size=n] [--big-file-threshold=n] [-- depth=n] [--active-branches=n] [--export-marks=marks.file]" ;
2481
2617
2482
2618
int main (int argc , const char * * argv )
2483
2619
{
@@ -2518,7 +2654,12 @@ int main(int argc, const char **argv)
2518
2654
}
2519
2655
else if (!prefixcmp (a , "--max-pack-size=" ))
2520
2656
max_packsize = strtoumax (a + 16 , NULL , 0 ) * 1024 * 1024 ;
2521
- else if (!prefixcmp (a , "--depth=" )) {
2657
+ else if (!prefixcmp (a , "--big-file-threshold=" )) {
2658
+ unsigned long v ;
2659
+ if (!git_parse_ulong (a + 21 , & v ))
2660
+ usage (fast_import_usage );
2661
+ big_file_threshold = v ;
2662
+ } else if (!prefixcmp (a , "--depth=" )) {
2522
2663
max_depth = strtoul (a + 8 , NULL , 0 );
2523
2664
if (max_depth > MAX_DEPTH )
2524
2665
die ("--depth cannot exceed %u" , MAX_DEPTH );
0 commit comments