Skip to content

Commit 0c54cda

Browse files
committed
Merge branch 'jk/for-each-object-iteration'
The API to iterate over all objects learned to optionally list objects in the order they appear in packfiles, which helps locality of access if the caller accesses these objects while as objects are enumerated. * jk/for-each-object-iteration: for_each_*_object: move declarations to object-store.h cat-file: use a single strbuf for all output cat-file: split batch "buf" into two variables cat-file: use oidset check-and-insert cat-file: support "unordered" output for --batch-all-objects cat-file: rename batch_{loose,packed}_object callbacks t1006: test cat-file --batch-all-objects with duplicates for_each_packed_object: support iterating in pack-order for_each_*_object: give more comprehensive docstrings for_each_*_object: take flag arguments as enum for_each_*_object: store flag definitions in a single location
2 parents 42a6274 + 0889aae commit 0c54cda

File tree

10 files changed

+218
-111
lines changed

10 files changed

+218
-111
lines changed

Documentation/git-cat-file.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,16 @@ OPTIONS
104104
buffering; this is much more efficient when invoking
105105
`--batch-check` on a large number of objects.
106106

107+
--unordered::
108+
When `--batch-all-objects` is in use, visit objects in an
109+
order which may be more efficient for accessing the object
110+
contents than hash order. The exact details of the order are
111+
unspecified, but if you do not require a specific order, this
112+
should generally result in faster output, especially with
113+
`--batch`. Note that `cat-file` will still show each object
114+
only once, even if it is stored multiple times in the
115+
repository.
116+
107117
--allow-unknown-type::
108118
Allow -s or -t to query broken/corrupt objects of unknown type.
109119

builtin/cat-file.c

Lines changed: 81 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ struct batch_options {
2121
int print_contents;
2222
int buffer_output;
2323
int all_objects;
24+
int unordered;
2425
int cmdmode; /* may be 'w' or 'c' for --filters or --textconv */
2526
const char *format;
2627
};
@@ -337,11 +338,11 @@ static void print_object_or_die(struct batch_options *opt, struct expand_data *d
337338
}
338339
}
339340

340-
static void batch_object_write(const char *obj_name, struct batch_options *opt,
341+
static void batch_object_write(const char *obj_name,
342+
struct strbuf *scratch,
343+
struct batch_options *opt,
341344
struct expand_data *data)
342345
{
343-
struct strbuf buf = STRBUF_INIT;
344-
345346
if (!data->skip_object_info &&
346347
oid_object_info_extended(the_repository, &data->oid, &data->info,
347348
OBJECT_INFO_LOOKUP_REPLACE) < 0) {
@@ -351,18 +352,20 @@ static void batch_object_write(const char *obj_name, struct batch_options *opt,
351352
return;
352353
}
353354

354-
strbuf_expand(&buf, opt->format, expand_format, data);
355-
strbuf_addch(&buf, '\n');
356-
batch_write(opt, buf.buf, buf.len);
357-
strbuf_release(&buf);
355+
strbuf_reset(scratch);
356+
strbuf_expand(scratch, opt->format, expand_format, data);
357+
strbuf_addch(scratch, '\n');
358+
batch_write(opt, scratch->buf, scratch->len);
358359

359360
if (opt->print_contents) {
360361
print_object_or_die(opt, data);
361362
batch_write(opt, "\n", 1);
362363
}
363364
}
364365

365-
static void batch_one_object(const char *obj_name, struct batch_options *opt,
366+
static void batch_one_object(const char *obj_name,
367+
struct strbuf *scratch,
368+
struct batch_options *opt,
366369
struct expand_data *data)
367370
{
368371
struct object_context ctx;
@@ -404,42 +407,70 @@ static void batch_one_object(const char *obj_name, struct batch_options *opt,
404407
return;
405408
}
406409

407-
batch_object_write(obj_name, opt, data);
410+
batch_object_write(obj_name, scratch, opt, data);
408411
}
409412

410413
struct object_cb_data {
411414
struct batch_options *opt;
412415
struct expand_data *expand;
416+
struct oidset *seen;
417+
struct strbuf *scratch;
413418
};
414419

415420
static int batch_object_cb(const struct object_id *oid, void *vdata)
416421
{
417422
struct object_cb_data *data = vdata;
418423
oidcpy(&data->expand->oid, oid);
419-
batch_object_write(NULL, data->opt, data->expand);
424+
batch_object_write(NULL, data->scratch, data->opt, data->expand);
420425
return 0;
421426
}
422427

423-
static int batch_loose_object(const struct object_id *oid,
424-
const char *path,
425-
void *data)
428+
static int collect_loose_object(const struct object_id *oid,
429+
const char *path,
430+
void *data)
426431
{
427432
oid_array_append(data, oid);
428433
return 0;
429434
}
430435

431-
static int batch_packed_object(const struct object_id *oid,
432-
struct packed_git *pack,
433-
uint32_t pos,
434-
void *data)
436+
static int collect_packed_object(const struct object_id *oid,
437+
struct packed_git *pack,
438+
uint32_t pos,
439+
void *data)
435440
{
436441
oid_array_append(data, oid);
437442
return 0;
438443
}
439444

445+
static int batch_unordered_object(const struct object_id *oid, void *vdata)
446+
{
447+
struct object_cb_data *data = vdata;
448+
449+
if (oidset_insert(data->seen, oid))
450+
return 0;
451+
452+
return batch_object_cb(oid, data);
453+
}
454+
455+
static int batch_unordered_loose(const struct object_id *oid,
456+
const char *path,
457+
void *data)
458+
{
459+
return batch_unordered_object(oid, data);
460+
}
461+
462+
static int batch_unordered_packed(const struct object_id *oid,
463+
struct packed_git *pack,
464+
uint32_t pos,
465+
void *data)
466+
{
467+
return batch_unordered_object(oid, data);
468+
}
469+
440470
static int batch_objects(struct batch_options *opt)
441471
{
442-
struct strbuf buf = STRBUF_INIT;
472+
struct strbuf input = STRBUF_INIT;
473+
struct strbuf output = STRBUF_INIT;
443474
struct expand_data data;
444475
int save_warning;
445476
int retval = 0;
@@ -454,8 +485,9 @@ static int batch_objects(struct batch_options *opt)
454485
*/
455486
memset(&data, 0, sizeof(data));
456487
data.mark_query = 1;
457-
strbuf_expand(&buf, opt->format, expand_format, &data);
488+
strbuf_expand(&output, opt->format, expand_format, &data);
458489
data.mark_query = 0;
490+
strbuf_release(&output);
459491
if (opt->cmdmode)
460492
data.split_on_whitespace = 1;
461493

@@ -473,19 +505,37 @@ static int batch_objects(struct batch_options *opt)
473505
data.info.typep = &data.type;
474506

475507
if (opt->all_objects) {
476-
struct oid_array sa = OID_ARRAY_INIT;
477508
struct object_cb_data cb;
478509

479-
for_each_loose_object(batch_loose_object, &sa, 0);
480-
for_each_packed_object(batch_packed_object, &sa, 0);
481510
if (repository_format_partial_clone)
482511
warning("This repository has extensions.partialClone set. Some objects may not be loaded.");
483512

484513
cb.opt = opt;
485514
cb.expand = &data;
486-
oid_array_for_each_unique(&sa, batch_object_cb, &cb);
515+
cb.scratch = &output;
516+
517+
if (opt->unordered) {
518+
struct oidset seen = OIDSET_INIT;
519+
520+
cb.seen = &seen;
521+
522+
for_each_loose_object(batch_unordered_loose, &cb, 0);
523+
for_each_packed_object(batch_unordered_packed, &cb,
524+
FOR_EACH_OBJECT_PACK_ORDER);
525+
526+
oidset_clear(&seen);
527+
} else {
528+
struct oid_array sa = OID_ARRAY_INIT;
529+
530+
for_each_loose_object(collect_loose_object, &sa, 0);
531+
for_each_packed_object(collect_packed_object, &sa, 0);
532+
533+
oid_array_for_each_unique(&sa, batch_object_cb, &cb);
534+
535+
oid_array_clear(&sa);
536+
}
487537

488-
oid_array_clear(&sa);
538+
strbuf_release(&output);
489539
return 0;
490540
}
491541

@@ -499,25 +549,26 @@ static int batch_objects(struct batch_options *opt)
499549
save_warning = warn_on_object_refname_ambiguity;
500550
warn_on_object_refname_ambiguity = 0;
501551

502-
while (strbuf_getline(&buf, stdin) != EOF) {
552+
while (strbuf_getline(&input, stdin) != EOF) {
503553
if (data.split_on_whitespace) {
504554
/*
505555
* Split at first whitespace, tying off the beginning
506556
* of the string and saving the remainder (or NULL) in
507557
* data.rest.
508558
*/
509-
char *p = strpbrk(buf.buf, " \t");
559+
char *p = strpbrk(input.buf, " \t");
510560
if (p) {
511561
while (*p && strchr(" \t", *p))
512562
*p++ = '\0';
513563
}
514564
data.rest = p;
515565
}
516566

517-
batch_one_object(buf.buf, opt, &data);
567+
batch_one_object(input.buf, &output, opt, &data);
518568
}
519569

520-
strbuf_release(&buf);
570+
strbuf_release(&input);
571+
strbuf_release(&output);
521572
warn_on_object_refname_ambiguity = save_warning;
522573
return retval;
523574
}
@@ -586,6 +637,8 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
586637
N_("follow in-tree symlinks (used with --batch or --batch-check)")),
587638
OPT_BOOL(0, "batch-all-objects", &batch.all_objects,
588639
N_("show all objects with --batch or --batch-check")),
640+
OPT_BOOL(0, "unordered", &batch.unordered,
641+
N_("do not order --batch-all-objects output")),
589642
OPT_END()
590643
};
591644

builtin/prune-packed.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include "progress.h"
44
#include "parse-options.h"
55
#include "packfile.h"
6+
#include "object-store.h"
67

78
static const char * const prune_packed_usage[] = {
89
N_("git prune-packed [-n | --dry-run] [-q | --quiet]"),

cache.h

Lines changed: 0 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1575,62 +1575,6 @@ extern int odb_mkstemp(struct strbuf *temp_filename, const char *pattern);
15751575
*/
15761576
extern int odb_pack_keep(const char *name);
15771577

1578-
/*
1579-
* Iterate over the files in the loose-object parts of the object
1580-
* directory "path", triggering the following callbacks:
1581-
*
1582-
* - loose_object is called for each loose object we find.
1583-
*
1584-
* - loose_cruft is called for any files that do not appear to be
1585-
* loose objects. Note that we only look in the loose object
1586-
* directories "objects/[0-9a-f]{2}/", so we will not report
1587-
* "objects/foobar" as cruft.
1588-
*
1589-
* - loose_subdir is called for each top-level hashed subdirectory
1590-
* of the object directory (e.g., "$OBJDIR/f0"). It is called
1591-
* after the objects in the directory are processed.
1592-
*
1593-
* Any callback that is NULL will be ignored. Callbacks returning non-zero
1594-
* will end the iteration.
1595-
*
1596-
* In the "buf" variant, "path" is a strbuf which will also be used as a
1597-
* scratch buffer, but restored to its original contents before
1598-
* the function returns.
1599-
*/
1600-
typedef int each_loose_object_fn(const struct object_id *oid,
1601-
const char *path,
1602-
void *data);
1603-
typedef int each_loose_cruft_fn(const char *basename,
1604-
const char *path,
1605-
void *data);
1606-
typedef int each_loose_subdir_fn(unsigned int nr,
1607-
const char *path,
1608-
void *data);
1609-
int for_each_file_in_obj_subdir(unsigned int subdir_nr,
1610-
struct strbuf *path,
1611-
each_loose_object_fn obj_cb,
1612-
each_loose_cruft_fn cruft_cb,
1613-
each_loose_subdir_fn subdir_cb,
1614-
void *data);
1615-
int for_each_loose_file_in_objdir(const char *path,
1616-
each_loose_object_fn obj_cb,
1617-
each_loose_cruft_fn cruft_cb,
1618-
each_loose_subdir_fn subdir_cb,
1619-
void *data);
1620-
int for_each_loose_file_in_objdir_buf(struct strbuf *path,
1621-
each_loose_object_fn obj_cb,
1622-
each_loose_cruft_fn cruft_cb,
1623-
each_loose_subdir_fn subdir_cb,
1624-
void *data);
1625-
1626-
/*
1627-
* Iterate over loose objects in both the local
1628-
* repository and any alternates repositories (unless the
1629-
* LOCAL_ONLY flag is set).
1630-
*/
1631-
#define FOR_EACH_OBJECT_LOCAL_ONLY 0x1
1632-
extern int for_each_loose_object(each_loose_object_fn, void *, unsigned flags);
1633-
16341578
/*
16351579
* Set this to 0 to prevent sha1_object_info_extended() from fetching missing
16361580
* blobs. This has a difference only if extensions.partialClone is set.

commit-graph.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -730,7 +730,7 @@ void write_commit_graph(const char *obj_dir,
730730
die(_("error adding pack %s"), packname.buf);
731731
if (open_pack_index(p))
732732
die(_("error opening index for %s"), packname.buf);
733-
for_each_object_in_pack(p, add_packed_commits, &oids);
733+
for_each_object_in_pack(p, add_packed_commits, &oids, 0);
734734
close_pack(p);
735735
}
736736
strbuf_release(&packname);

0 commit comments

Comments
 (0)