Skip to content

Commit 1e43301

Browse files
matvoregitster
authored andcommitted
list-objects-filter: implement composite filters
Allow combining filters such that only objects accepted by all filters are shown. The motivation for this is to allow getting directory listings without also fetching blobs. This can be done by combining blob:none with tree:<depth>. There are massive repositories that have larger-than-expected trees - even if you include only a single commit. The current usage requires passing the filter to rev-list in the following form: --filter=<FILTER1> --filter=<FILTER2> ... Such usage is currently an error, so giving it a meaning is backwards- compatible. The URL-encoding scheme is being introduced before the repeated flag logic, and the user-facing documentation for URL-encoding is being withheld until the repeated flag feature is implemented. The URL-encoding is in general not meant to be used directly by the user, and it is better to describe the URL-encoding feature in terms of the repeated flag. Helped-by: Emily Shaffer <[email protected]> Helped-by: Jeff Hostetler <[email protected]> Helped-by: Junio C Hamano <[email protected]> Signed-off-by: Matthew DeVore <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent e1ac4a4 commit 1e43301

File tree

6 files changed

+441
-6
lines changed

6 files changed

+441
-6
lines changed

list-objects-filter-options.c

Lines changed: 104 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,12 @@
66
#include "list-objects.h"
77
#include "list-objects-filter.h"
88
#include "list-objects-filter-options.h"
9+
#include "url.h"
10+
11+
static int parse_combine_filter(
12+
struct list_objects_filter_options *filter_options,
13+
const char *arg,
14+
struct strbuf *errbuf);
915

1016
/*
1117
* Parse value of the argument to the "filter" keyword.
@@ -35,8 +41,6 @@ static int gently_parse_list_objects_filter(
3541
return 1;
3642
}
3743

38-
filter_options->filter_spec = strdup(arg);
39-
4044
if (!strcmp(arg, "blob:none")) {
4145
filter_options->choice = LOFC_BLOB_NONE;
4246
return 0;
@@ -77,6 +81,10 @@ static int gently_parse_list_objects_filter(
7781
_("sparse:path filters support has been dropped"));
7882
}
7983
return 1;
84+
85+
} else if (skip_prefix(arg, "combine:", &v0)) {
86+
return parse_combine_filter(filter_options, v0, errbuf);
87+
8088
}
8189
/*
8290
* Please update _git_fetch() in git-completion.bash when you
@@ -89,10 +97,95 @@ static int gently_parse_list_objects_filter(
8997
return 1;
9098
}
9199

100+
static const char *RESERVED_NON_WS = "~`!@#$^&*()[]{}\\;'\",<>?";
101+
102+
static int has_reserved_character(
103+
struct strbuf *sub_spec, struct strbuf *errbuf)
104+
{
105+
const char *c = sub_spec->buf;
106+
while (*c) {
107+
if (*c <= ' ' || strchr(RESERVED_NON_WS, *c)) {
108+
strbuf_addf(errbuf,
109+
"must escape char in sub-filter-spec: '%c'",
110+
*c);
111+
return 1;
112+
}
113+
c++;
114+
}
115+
116+
return 0;
117+
}
118+
119+
static int parse_combine_subfilter(
120+
struct list_objects_filter_options *filter_options,
121+
struct strbuf *subspec,
122+
struct strbuf *errbuf)
123+
{
124+
size_t new_index = filter_options->sub_nr++;
125+
char *decoded;
126+
int result;
127+
128+
ALLOC_GROW(filter_options->sub, filter_options->sub_nr,
129+
filter_options->sub_alloc);
130+
memset(&filter_options->sub[new_index], 0,
131+
sizeof(*filter_options->sub));
132+
133+
decoded = url_percent_decode(subspec->buf);
134+
135+
result = has_reserved_character(subspec, errbuf) ||
136+
gently_parse_list_objects_filter(
137+
&filter_options->sub[new_index], decoded, errbuf);
138+
139+
free(decoded);
140+
return result;
141+
}
142+
143+
static int parse_combine_filter(
144+
struct list_objects_filter_options *filter_options,
145+
const char *arg,
146+
struct strbuf *errbuf)
147+
{
148+
struct strbuf **subspecs = strbuf_split_str(arg, '+', 0);
149+
size_t sub;
150+
int result = 0;
151+
152+
if (!subspecs[0]) {
153+
strbuf_addf(errbuf,
154+
_("expected something after combine:"));
155+
result = 1;
156+
goto cleanup;
157+
}
158+
159+
for (sub = 0; subspecs[sub] && !result; sub++) {
160+
if (subspecs[sub + 1]) {
161+
/*
162+
* This is not the last subspec. Remove trailing "+" so
163+
* we can parse it.
164+
*/
165+
size_t last = subspecs[sub]->len - 1;
166+
assert(subspecs[sub]->buf[last] == '+');
167+
strbuf_remove(subspecs[sub], last, 1);
168+
}
169+
result = parse_combine_subfilter(
170+
filter_options, subspecs[sub], errbuf);
171+
}
172+
173+
filter_options->choice = LOFC_COMBINE;
174+
175+
cleanup:
176+
strbuf_list_free(subspecs);
177+
if (result) {
178+
list_objects_filter_release(filter_options);
179+
memset(filter_options, 0, sizeof(*filter_options));
180+
}
181+
return result;
182+
}
183+
92184
int parse_list_objects_filter(struct list_objects_filter_options *filter_options,
93185
const char *arg)
94186
{
95187
struct strbuf buf = STRBUF_INIT;
188+
filter_options->filter_spec = strdup(arg);
96189
if (gently_parse_list_objects_filter(filter_options, arg, &buf))
97190
die("%s", buf.buf);
98191
return 0;
@@ -129,8 +222,15 @@ void expand_list_objects_filter_spec(
129222
void list_objects_filter_release(
130223
struct list_objects_filter_options *filter_options)
131224
{
225+
size_t sub;
226+
227+
if (!filter_options)
228+
return;
132229
free(filter_options->filter_spec);
133230
free(filter_options->sparse_oid_value);
231+
for (sub = 0; sub < filter_options->sub_nr; sub++)
232+
list_objects_filter_release(&filter_options->sub[sub]);
233+
free(filter_options->sub);
134234
memset(filter_options, 0, sizeof(*filter_options));
135235
}
136236

@@ -174,6 +274,8 @@ void partial_clone_get_default_filter_spec(
174274
*/
175275
if (!core_partial_clone_filter_default)
176276
return;
277+
278+
filter_options->filter_spec = strdup(core_partial_clone_filter_default);
177279
gently_parse_list_objects_filter(filter_options,
178280
core_partial_clone_filter_default,
179281
&errbuf);

list-objects-filter-options.h

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ enum list_objects_filter_choice {
1313
LOFC_BLOB_LIMIT,
1414
LOFC_TREE_DEPTH,
1515
LOFC_SPARSE_OID,
16+
LOFC_COMBINE,
1617
LOFC__COUNT /* must be last */
1718
};
1819

@@ -38,13 +39,23 @@ struct list_objects_filter_options {
3839
unsigned int no_filter : 1;
3940

4041
/*
41-
* Parsed values (fields) from within the filter-spec. These are
42-
* choice-specific; not all values will be defined for any given
43-
* choice.
42+
* BEGIN choice-specific parsed values from within the filter-spec. Only
43+
* some values will be defined for any given choice.
4444
*/
45+
4546
struct object_id *sparse_oid_value;
4647
unsigned long blob_limit_value;
4748
unsigned long tree_exclude_depth;
49+
50+
/* LOFC_COMBINE values */
51+
52+
/* This array contains all the subfilters which this filter combines. */
53+
size_t sub_nr, sub_alloc;
54+
struct list_objects_filter_options *sub;
55+
56+
/*
57+
* END choice-specific parsed values.
58+
*/
4859
};
4960

5061
/* Normalized command line arguments */

list-objects-filter.c

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,14 @@
2626
*/
2727
#define FILTER_SHOWN_BUT_REVISIT (1<<21)
2828

29+
struct subfilter {
30+
struct filter *filter;
31+
struct oidset seen;
32+
struct oidset omits;
33+
struct object_id skip_tree;
34+
unsigned is_skipping_tree : 1;
35+
};
36+
2937
struct filter {
3038
enum list_objects_filter_result (*filter_object_fn)(
3139
struct repository *r,
@@ -36,6 +44,13 @@ struct filter {
3644
struct oidset *omits,
3745
void *filter_data);
3846

47+
/*
48+
* Optional. If this function is supplied and the filter needs to
49+
* collect omits, then this function is called once before free_fn is
50+
* called.
51+
*/
52+
void (*finalize_omits_fn)(struct oidset *omits, void *filter_data);
53+
3954
void (*free_fn)(void *filter_data);
4055

4156
void *filter_data;
@@ -471,6 +486,147 @@ static void filter_sparse_oid__init(
471486
filter->free_fn = filter_sparse_free;
472487
}
473488

489+
/* A filter which only shows objects shown by all sub-filters. */
490+
struct combine_filter_data {
491+
struct subfilter *sub;
492+
size_t nr;
493+
};
494+
495+
static int should_delegate(enum list_objects_filter_situation filter_situation,
496+
struct object *obj,
497+
struct subfilter *sub)
498+
{
499+
if (!sub->is_skipping_tree)
500+
return 1;
501+
if (filter_situation == LOFS_END_TREE &&
502+
oideq(&obj->oid, &sub->skip_tree)) {
503+
sub->is_skipping_tree = 0;
504+
return 1;
505+
}
506+
return 0;
507+
}
508+
509+
static enum list_objects_filter_result process_subfilter(
510+
struct repository *r,
511+
enum list_objects_filter_situation filter_situation,
512+
struct object *obj,
513+
const char *pathname,
514+
const char *filename,
515+
struct subfilter *sub)
516+
{
517+
enum list_objects_filter_result result;
518+
519+
/*
520+
* Check should_delegate before oidset_contains so that
521+
* is_skipping_tree gets unset even when the object is marked as seen.
522+
* As of this writing, no filter uses LOFR_MARK_SEEN on trees that also
523+
* uses LOFR_SKIP_TREE, so the ordering is only theoretically
524+
* important. Be cautious if you change the order of the below checks
525+
* and more filters have been added!
526+
*/
527+
if (!should_delegate(filter_situation, obj, sub))
528+
return LOFR_ZERO;
529+
if (oidset_contains(&sub->seen, &obj->oid))
530+
return LOFR_ZERO;
531+
532+
result = list_objects_filter__filter_object(
533+
r, filter_situation, obj, pathname, filename, sub->filter);
534+
535+
if (result & LOFR_MARK_SEEN)
536+
oidset_insert(&sub->seen, &obj->oid);
537+
538+
if (result & LOFR_SKIP_TREE) {
539+
sub->is_skipping_tree = 1;
540+
sub->skip_tree = obj->oid;
541+
}
542+
543+
return result;
544+
}
545+
546+
static enum list_objects_filter_result filter_combine(
547+
struct repository *r,
548+
enum list_objects_filter_situation filter_situation,
549+
struct object *obj,
550+
const char *pathname,
551+
const char *filename,
552+
struct oidset *omits,
553+
void *filter_data)
554+
{
555+
struct combine_filter_data *d = filter_data;
556+
enum list_objects_filter_result combined_result =
557+
LOFR_DO_SHOW | LOFR_MARK_SEEN | LOFR_SKIP_TREE;
558+
size_t sub;
559+
560+
for (sub = 0; sub < d->nr; sub++) {
561+
enum list_objects_filter_result sub_result = process_subfilter(
562+
r, filter_situation, obj, pathname, filename,
563+
&d->sub[sub]);
564+
if (!(sub_result & LOFR_DO_SHOW))
565+
combined_result &= ~LOFR_DO_SHOW;
566+
if (!(sub_result & LOFR_MARK_SEEN))
567+
combined_result &= ~LOFR_MARK_SEEN;
568+
if (!d->sub[sub].is_skipping_tree)
569+
combined_result &= ~LOFR_SKIP_TREE;
570+
}
571+
572+
return combined_result;
573+
}
574+
575+
static void filter_combine__free(void *filter_data)
576+
{
577+
struct combine_filter_data *d = filter_data;
578+
size_t sub;
579+
for (sub = 0; sub < d->nr; sub++) {
580+
list_objects_filter__free(d->sub[sub].filter);
581+
oidset_clear(&d->sub[sub].seen);
582+
if (d->sub[sub].omits.set.size)
583+
BUG("expected oidset to be cleared already");
584+
}
585+
free(d->sub);
586+
}
587+
588+
static void add_all(struct oidset *dest, struct oidset *src) {
589+
struct oidset_iter iter;
590+
struct object_id *src_oid;
591+
592+
oidset_iter_init(src, &iter);
593+
while ((src_oid = oidset_iter_next(&iter)) != NULL)
594+
oidset_insert(dest, src_oid);
595+
}
596+
597+
static void filter_combine__finalize_omits(
598+
struct oidset *omits,
599+
void *filter_data)
600+
{
601+
struct combine_filter_data *d = filter_data;
602+
size_t sub;
603+
604+
for (sub = 0; sub < d->nr; sub++) {
605+
add_all(omits, &d->sub[sub].omits);
606+
oidset_clear(&d->sub[sub].omits);
607+
}
608+
}
609+
610+
static void filter_combine__init(
611+
struct list_objects_filter_options *filter_options,
612+
struct filter* filter)
613+
{
614+
struct combine_filter_data *d = xcalloc(1, sizeof(*d));
615+
size_t sub;
616+
617+
d->nr = filter_options->sub_nr;
618+
d->sub = xcalloc(d->nr, sizeof(*d->sub));
619+
for (sub = 0; sub < d->nr; sub++)
620+
d->sub[sub].filter = list_objects_filter__init(
621+
filter->omits ? &d->sub[sub].omits : NULL,
622+
&filter_options->sub[sub]);
623+
624+
filter->filter_data = d;
625+
filter->filter_object_fn = filter_combine;
626+
filter->free_fn = filter_combine__free;
627+
filter->finalize_omits_fn = filter_combine__finalize_omits;
628+
}
629+
474630
typedef void (*filter_init_fn)(
475631
struct list_objects_filter_options *filter_options,
476632
struct filter *filter);
@@ -484,6 +640,7 @@ static filter_init_fn s_filters[] = {
484640
filter_blobs_limit__init,
485641
filter_trees_depth__init,
486642
filter_sparse_oid__init,
643+
filter_combine__init,
487644
};
488645

489646
struct filter *list_objects_filter__init(
@@ -535,6 +692,8 @@ void list_objects_filter__free(struct filter *filter)
535692
{
536693
if (!filter)
537694
return;
695+
if (filter->finalize_omits_fn && filter->omits)
696+
filter->finalize_omits_fn(filter->omits, filter->filter_data);
538697
filter->free_fn(filter->filter_data);
539698
free(filter);
540699
}

0 commit comments

Comments
 (0)