4
4
#include "lockfile.h"
5
5
#include "packfile.h"
6
6
#include "object-store.h"
7
+ #include "packfile.h"
7
8
#include "midx.h"
8
9
9
10
#define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */
@@ -182,12 +183,21 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len,
182
183
packs -> list [packs -> nr ] = add_packed_git (full_path ,
183
184
full_path_len ,
184
185
0 );
186
+
185
187
if (!packs -> list [packs -> nr ]) {
186
188
warning (_ ("failed to add packfile '%s'" ),
187
189
full_path );
188
190
return ;
189
191
}
190
192
193
+ if (open_pack_index (packs -> list [packs -> nr ])) {
194
+ warning (_ ("failed to open pack-index '%s'" ),
195
+ full_path );
196
+ close_pack (packs -> list [packs -> nr ]);
197
+ FREE_AND_NULL (packs -> list [packs -> nr ]);
198
+ return ;
199
+ }
200
+
191
201
packs -> names [packs -> nr ] = xstrdup (file_name );
192
202
packs -> pack_name_concat_len += strlen (file_name ) + 1 ;
193
203
packs -> nr ++ ;
@@ -228,6 +238,119 @@ static void sort_packs_by_name(char **pack_names, uint32_t nr_packs, uint32_t *p
228
238
free (pairs );
229
239
}
230
240
241
+ struct pack_midx_entry {
242
+ struct object_id oid ;
243
+ uint32_t pack_int_id ;
244
+ time_t pack_mtime ;
245
+ uint64_t offset ;
246
+ };
247
+
248
+ static int midx_oid_compare (const void * _a , const void * _b )
249
+ {
250
+ const struct pack_midx_entry * a = (const struct pack_midx_entry * )_a ;
251
+ const struct pack_midx_entry * b = (const struct pack_midx_entry * )_b ;
252
+ int cmp = oidcmp (& a -> oid , & b -> oid );
253
+
254
+ if (cmp )
255
+ return cmp ;
256
+
257
+ if (a -> pack_mtime > b -> pack_mtime )
258
+ return -1 ;
259
+ else if (a -> pack_mtime < b -> pack_mtime )
260
+ return 1 ;
261
+
262
+ return a -> pack_int_id - b -> pack_int_id ;
263
+ }
264
+
265
+ static void fill_pack_entry (uint32_t pack_int_id ,
266
+ struct packed_git * p ,
267
+ uint32_t cur_object ,
268
+ struct pack_midx_entry * entry )
269
+ {
270
+ if (!nth_packed_object_oid (& entry -> oid , p , cur_object ))
271
+ die (_ ("failed to locate object %d in packfile" ), cur_object );
272
+
273
+ entry -> pack_int_id = pack_int_id ;
274
+ entry -> pack_mtime = p -> mtime ;
275
+
276
+ entry -> offset = nth_packed_object_offset (p , cur_object );
277
+ }
278
+
279
+ /*
280
+ * It is possible to artificially get into a state where there are many
281
+ * duplicate copies of objects. That can create high memory pressure if
282
+ * we are to create a list of all objects before de-duplication. To reduce
283
+ * this memory pressure without a significant performance drop, automatically
284
+ * group objects by the first byte of their object id. Use the IDX fanout
285
+ * tables to group the data, copy to a local array, then sort.
286
+ *
287
+ * Copy only the de-duplicated entries (selected by most-recent modified time
288
+ * of a packfile containing the object).
289
+ */
290
+ static struct pack_midx_entry * get_sorted_entries (struct packed_git * * p ,
291
+ uint32_t * perm ,
292
+ uint32_t nr_packs ,
293
+ uint32_t * nr_objects )
294
+ {
295
+ uint32_t cur_fanout , cur_pack , cur_object ;
296
+ uint32_t alloc_fanout , alloc_objects , total_objects = 0 ;
297
+ struct pack_midx_entry * entries_by_fanout = NULL ;
298
+ struct pack_midx_entry * deduplicated_entries = NULL ;
299
+
300
+ for (cur_pack = 0 ; cur_pack < nr_packs ; cur_pack ++ )
301
+ total_objects += p [cur_pack ]-> num_objects ;
302
+
303
+ /*
304
+ * As we de-duplicate by fanout value, we expect the fanout
305
+ * slices to be evenly distributed, with some noise. Hence,
306
+ * allocate slightly more than one 256th.
307
+ */
308
+ alloc_objects = alloc_fanout = total_objects > 3200 ? total_objects / 200 : 16 ;
309
+
310
+ ALLOC_ARRAY (entries_by_fanout , alloc_fanout );
311
+ ALLOC_ARRAY (deduplicated_entries , alloc_objects );
312
+ * nr_objects = 0 ;
313
+
314
+ for (cur_fanout = 0 ; cur_fanout < 256 ; cur_fanout ++ ) {
315
+ uint32_t nr_fanout = 0 ;
316
+
317
+ for (cur_pack = 0 ; cur_pack < nr_packs ; cur_pack ++ ) {
318
+ uint32_t start = 0 , end ;
319
+
320
+ if (cur_fanout )
321
+ start = get_pack_fanout (p [cur_pack ], cur_fanout - 1 );
322
+ end = get_pack_fanout (p [cur_pack ], cur_fanout );
323
+
324
+ for (cur_object = start ; cur_object < end ; cur_object ++ ) {
325
+ ALLOC_GROW (entries_by_fanout , nr_fanout + 1 , alloc_fanout );
326
+ fill_pack_entry (perm [cur_pack ], p [cur_pack ], cur_object , & entries_by_fanout [nr_fanout ]);
327
+ nr_fanout ++ ;
328
+ }
329
+ }
330
+
331
+ QSORT (entries_by_fanout , nr_fanout , midx_oid_compare );
332
+
333
+ /*
334
+ * The batch is now sorted by OID and then mtime (descending).
335
+ * Take only the first duplicate.
336
+ */
337
+ for (cur_object = 0 ; cur_object < nr_fanout ; cur_object ++ ) {
338
+ if (cur_object && !oidcmp (& entries_by_fanout [cur_object - 1 ].oid ,
339
+ & entries_by_fanout [cur_object ].oid ))
340
+ continue ;
341
+
342
+ ALLOC_GROW (deduplicated_entries , * nr_objects + 1 , alloc_objects );
343
+ memcpy (& deduplicated_entries [* nr_objects ],
344
+ & entries_by_fanout [cur_object ],
345
+ sizeof (struct pack_midx_entry ));
346
+ (* nr_objects )++ ;
347
+ }
348
+ }
349
+
350
+ free (entries_by_fanout );
351
+ return deduplicated_entries ;
352
+ }
353
+
231
354
static size_t write_midx_pack_names (struct hashfile * f ,
232
355
char * * pack_names ,
233
356
uint32_t num_packs )
@@ -271,6 +394,8 @@ int write_midx_file(const char *object_dir)
271
394
uint64_t written = 0 ;
272
395
uint32_t chunk_ids [MIDX_MAX_CHUNKS + 1 ];
273
396
uint64_t chunk_offsets [MIDX_MAX_CHUNKS + 1 ];
397
+ uint32_t nr_entries ;
398
+ struct pack_midx_entry * entries = NULL ;
274
399
275
400
midx_name = get_midx_filename (object_dir );
276
401
if (safe_create_leading_directories (midx_name )) {
@@ -296,6 +421,8 @@ int write_midx_file(const char *object_dir)
296
421
ALLOC_ARRAY (pack_perm , packs .nr );
297
422
sort_packs_by_name (packs .names , packs .nr , pack_perm );
298
423
424
+ entries = get_sorted_entries (packs .list , pack_perm , packs .nr , & nr_entries );
425
+
299
426
hold_lock_file_for_update (& lk , midx_name , LOCK_DIE_ON_ERROR );
300
427
f = hashfd (lk .tempfile -> fd , lk .tempfile -> filename .buf );
301
428
FREE_AND_NULL (midx_name );
@@ -365,5 +492,6 @@ int write_midx_file(const char *object_dir)
365
492
366
493
free (packs .list );
367
494
free (packs .names );
495
+ free (entries );
368
496
return 0 ;
369
497
}
0 commit comments