Skip to content

Commit 9ca6189

Browse files
larsxschneidergitster
authored andcommitted
convert: add "status=delayed" to filter process protocol
Some `clean` / `smudge` filters might require a significant amount of time to process a single blob. During this process the Git checkout operation is blocked and Git needs to wait until the filter is done to continue with the checkout. Teach the filter process protocol (introduced in edcc858) to accept the status "delayed" as response to a filter request. Upon this response Git continues with the checkout operation and asks the filter to process the blob again after all other blobs have been processed. Git has a multiple code paths that checkout a blob. Support delayed checkouts only in `clone` (in unpack-trees.c) and `checkout` operations. Signed-off-by: Lars Schneider <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent e05806d commit 9ca6189

File tree

9 files changed

+176
-18
lines changed

9 files changed

+176
-18
lines changed

Documentation/gitattributes.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,15 @@ packet: git< 0000 # empty content!
473473
packet: git< 0000 # empty list, keep "status=success" unchanged!
474474
------------------------
475475

476+
If the request cannot be fulfilled within a reasonable amount of time
477+
then the filter can respond with a "delayed" status and a flush packet.
478+
Git will perform the same request at a later point in time, again. The
479+
filter can delay a response multiple times for a single request.
480+
------------------------
481+
packet: git< status=delayed
482+
packet: git< 0000
483+
------------------------
484+
476485
In case the filter cannot or does not want to process the content,
477486
it is expected to respond with an "error" status.
478487
------------------------

builtin/checkout.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,7 @@ static int checkout_paths(const struct checkout_opts *opts,
369369
pos = skip_same_name(ce, pos) - 1;
370370
}
371371
}
372+
errs |= checkout_delayed_entries(&state);
372373

373374
if (write_locked_index(&the_index, lock_file, COMMIT_LOCK))
374375
die(_("unable to write new index file"));

cache.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1375,6 +1375,7 @@ struct checkout {
13751375

13761376
#define TEMPORARY_FILENAME_LENGTH 25
13771377
extern int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *topath);
1378+
extern int checkout_delayed_entries(const struct checkout *state);
13781379

13791380
struct cache_def {
13801381
struct strbuf path;

convert.c

Lines changed: 55 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include "quote.h"
55
#include "sigchain.h"
66
#include "pkt-line.h"
7+
#include "list.h"
78

89
/*
910
* convert.c - convert a file when checking it out and checking it in.
@@ -38,6 +39,13 @@ struct text_stat {
3839
unsigned printable, nonprintable;
3940
};
4041

42+
static LIST_HEAD(delayed_item_queue_head);
43+
44+
struct delayed_item {
45+
void* item;
46+
struct list_head node;
47+
};
48+
4149
static void gather_stats(const char *buf, unsigned long size, struct text_stat *stats)
4250
{
4351
unsigned long i;
@@ -672,7 +680,7 @@ static struct cmd2process *start_multi_file_filter(struct hashmap *hashmap, cons
672680
}
673681

674682
static int apply_multi_file_filter(const char *path, const char *src, size_t len,
675-
int fd, struct strbuf *dst, const char *cmd,
683+
int fd, struct strbuf *dst, int *delayed, const char *cmd,
676684
const unsigned int wanted_capability)
677685
{
678686
int err;
@@ -738,9 +746,14 @@ static int apply_multi_file_filter(const char *path, const char *src, size_t len
738746
goto done;
739747

740748
read_multi_file_filter_status(process->out, &filter_status);
741-
err = strcmp(filter_status.buf, "success");
742-
if (err)
749+
if (delayed && !strcmp(filter_status.buf, "delayed")) {
750+
*delayed = 1;
743751
goto done;
752+
} else {
753+
err = strcmp(filter_status.buf, "success");
754+
if (err)
755+
goto done;
756+
}
744757

745758
err = read_packetized_to_strbuf(process->out, &nbuf) < 0;
746759
if (err)
@@ -787,8 +800,8 @@ static struct convert_driver {
787800
} *user_convert, **user_convert_tail;
788801

789802
static int apply_filter(const char *path, const char *src, size_t len,
790-
int fd, struct strbuf *dst, struct convert_driver *drv,
791-
const unsigned int wanted_capability)
803+
int fd, struct strbuf *dst, int *delayed,
804+
struct convert_driver *drv, const unsigned int wanted_capability)
792805
{
793806
const char *cmd = NULL;
794807

@@ -806,7 +819,7 @@ static int apply_filter(const char *path, const char *src, size_t len,
806819
if (cmd && *cmd)
807820
return apply_single_file_filter(path, src, len, fd, dst, cmd);
808821
else if (drv->process && *drv->process)
809-
return apply_multi_file_filter(path, src, len, fd, dst, drv->process, wanted_capability);
822+
return apply_multi_file_filter(path, src, len, fd, dst, delayed, drv->process, wanted_capability);
810823

811824
return 0;
812825
}
@@ -1152,7 +1165,7 @@ int would_convert_to_git_filter_fd(const char *path)
11521165
if (!ca.drv->required)
11531166
return 0;
11541167

1155-
return apply_filter(path, NULL, 0, -1, NULL, ca.drv, CAP_CLEAN);
1168+
return apply_filter(path, NULL, 0, -1, NULL, NULL, ca.drv, CAP_CLEAN);
11561169
}
11571170

11581171
const char *get_convert_attr_ascii(const char *path)
@@ -1189,7 +1202,7 @@ int convert_to_git(const char *path, const char *src, size_t len,
11891202

11901203
convert_attrs(&ca, path);
11911204

1192-
ret |= apply_filter(path, src, len, -1, dst, ca.drv, CAP_CLEAN);
1205+
ret |= apply_filter(path, src, len, -1, dst, NULL, ca.drv, CAP_CLEAN);
11931206
if (!ret && ca.drv && ca.drv->required)
11941207
die("%s: clean filter '%s' failed", path, ca.drv->name);
11951208

@@ -1214,15 +1227,15 @@ void convert_to_git_filter_fd(const char *path, int fd, struct strbuf *dst,
12141227
assert(ca.drv);
12151228
assert(ca.drv->clean || ca.drv->process);
12161229

1217-
if (!apply_filter(path, NULL, 0, fd, dst, ca.drv, CAP_CLEAN))
1230+
if (!apply_filter(path, NULL, 0, fd, dst, NULL, ca.drv, CAP_CLEAN))
12181231
die("%s: clean filter '%s' failed", path, ca.drv->name);
12191232

12201233
crlf_to_git(path, dst->buf, dst->len, dst, ca.crlf_action, checksafe);
12211234
ident_to_git(path, dst->buf, dst->len, dst, ca.ident);
12221235
}
12231236

12241237
static int convert_to_working_tree_internal(const char *path, const char *src,
1225-
size_t len, struct strbuf *dst,
1238+
size_t len, struct strbuf *dst, int *delayed,
12261239
int normalizing)
12271240
{
12281241
int ret = 0, ret_filter = 0;
@@ -1248,21 +1261,50 @@ static int convert_to_working_tree_internal(const char *path, const char *src,
12481261
}
12491262
}
12501263

1251-
ret_filter = apply_filter(path, src, len, -1, dst, ca.drv, CAP_SMUDGE);
1264+
ret_filter = apply_filter(path, src, len, -1, dst, delayed, ca.drv, CAP_SMUDGE);
12521265
if (!ret_filter && ca.drv && ca.drv->required)
12531266
die("%s: smudge filter %s failed", path, ca.drv->name);
12541267

12551268
return ret | ret_filter;
12561269
}
12571270

1271+
int async_convert_to_working_tree(const char *path, const char *src,
1272+
size_t len, struct strbuf *dst, void *item)
1273+
{
1274+
int delayed = 0;
1275+
struct delayed_item *delayed_item;
1276+
if (convert_to_working_tree_internal(path, src, len, dst, &delayed, 0)) {
1277+
if (delayed) {
1278+
delayed_item = xmalloc(sizeof(*delayed_item));
1279+
delayed_item->item = item;
1280+
list_add_tail(&delayed_item->node, &delayed_item_queue_head);
1281+
return ASYNC_FILTER_DELAYED;
1282+
}
1283+
return ASYNC_FILTER_SUCCESS;
1284+
}
1285+
return ASYNC_FILTER_FAIL;
1286+
}
1287+
1288+
void* async_filter_finish(void)
1289+
{
1290+
struct delayed_item *head;
1291+
if (!list_empty(&delayed_item_queue_head)) {
1292+
head = list_first_entry(&delayed_item_queue_head,
1293+
struct delayed_item, node);
1294+
list_del(&head->node);
1295+
return head->item;
1296+
}
1297+
return NULL;
1298+
}
1299+
12581300
int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst)
12591301
{
1260-
return convert_to_working_tree_internal(path, src, len, dst, 0);
1302+
return convert_to_working_tree_internal(path, src, len, dst, NULL, 0);
12611303
}
12621304

12631305
int renormalize_buffer(const char *path, const char *src, size_t len, struct strbuf *dst)
12641306
{
1265-
int ret = convert_to_working_tree_internal(path, src, len, dst, 1);
1307+
int ret = convert_to_working_tree_internal(path, src, len, dst, NULL, 1);
12661308
if (ret) {
12671309
src = dst->buf;
12681310
len = dst->len;

convert.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,15 @@
44
#ifndef CONVERT_H
55
#define CONVERT_H
66

7+
enum async_filter {
8+
ASYNC_FILTER_SUCCESS = 0,
9+
ASYNC_FILTER_FAIL = 1,
10+
ASYNC_FILTER_DELAYED = 2
11+
};
12+
13+
extern enum async_filter async_filter;
14+
15+
716
enum safe_crlf {
817
SAFE_CRLF_FALSE = 0,
918
SAFE_CRLF_FAIL = 1,
@@ -42,6 +51,10 @@ extern int convert_to_git(const char *path, const char *src, size_t len,
4251
struct strbuf *dst, enum safe_crlf checksafe);
4352
extern int convert_to_working_tree(const char *path, const char *src,
4453
size_t len, struct strbuf *dst);
54+
extern int async_convert_to_working_tree(const char *path, const char *src,
55+
size_t len, struct strbuf *dst,
56+
void *item);
57+
extern void* async_filter_finish(void);
4558
extern int renormalize_buffer(const char *path, const char *src, size_t len,
4659
struct strbuf *dst);
4760
static inline int would_convert_to_git(const char *path)

entry.c

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -177,11 +177,17 @@ static int write_entry(struct cache_entry *ce,
177177
/*
178178
* Convert from git internal format to working tree format
179179
*/
180-
if (ce_mode_s_ifmt == S_IFREG &&
181-
convert_to_working_tree(ce->name, new, size, &buf)) {
182-
free(new);
183-
new = strbuf_detach(&buf, &newsize);
184-
size = newsize;
180+
if (ce_mode_s_ifmt == S_IFREG) {
181+
ret = async_convert_to_working_tree(ce->name, new, size, &buf, ce);
182+
if (ret == ASYNC_FILTER_SUCCESS) {
183+
free(new);
184+
new = strbuf_detach(&buf, &newsize);
185+
size = newsize;
186+
}
187+
else if (ret == ASYNC_FILTER_DELAYED) {
188+
free(new);
189+
goto finish;
190+
}
185191
}
186192

187193
fd = open_output_fd(path, ce, to_tempfile);
@@ -291,3 +297,16 @@ int checkout_entry(struct cache_entry *ce,
291297
create_directories(path.buf, path.len, state);
292298
return write_entry(ce, path.buf, state, 0);
293299
}
300+
301+
int checkout_delayed_entries(const struct checkout *state)
302+
{
303+
struct cache_entry *ce;
304+
int errs = 0;
305+
306+
while ((ce = async_filter_finish())) {
307+
ce->ce_flags &= ~CE_UPDATE;
308+
errs |= checkout_entry(ce, state, NULL);
309+
}
310+
311+
return errs;
312+
}

t/t0021-conversion.sh

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -701,4 +701,57 @@ test_expect_success PERL 'invalid process filter must fail (and not hang!)' '
701701
)
702702
'
703703

704+
test_expect_success PERL 'delayed checkout in process filter' '
705+
test_config_global filter.protocol.process "rot13-filter.pl clean smudge" &&
706+
test_config_global filter.protocol.required true &&
707+
rm -rf repo &&
708+
mkdir repo &&
709+
(
710+
cd repo &&
711+
git init &&
712+
echo "*.r filter=protocol" >.gitattributes &&
713+
cp "$TEST_ROOT/test.o" test.r &&
714+
cp "$TEST_ROOT/test.o" test-delay1.r &&
715+
cp "$TEST_ROOT/test.o" test-delay3.r &&
716+
git add . &&
717+
git commit -m "test commit 1"
718+
) &&
719+
720+
S=$(file_size repo/test.r) &&
721+
rm -rf repo-cloned &&
722+
filter_git clone repo repo-cloned &&
723+
cat >expected.log <<-EOF &&
724+
START
725+
init handshake complete
726+
IN: smudge test.r $S [OK] -- OUT: $S . [OK]
727+
IN: smudge test-delay1.r $S [OK] -- OUT: $S [DELAYED]
728+
IN: smudge test-delay1.r $S [OK] -- OUT: $S . [OK]
729+
IN: smudge test-delay3.r $S [OK] -- OUT: $S [DELAYED]
730+
IN: smudge test-delay3.r $S [OK] -- OUT: $S [DELAYED]
731+
IN: smudge test-delay3.r $S [OK] -- OUT: $S [DELAYED]
732+
IN: smudge test-delay3.r $S [OK] -- OUT: $S . [OK]
733+
STOP
734+
EOF
735+
test_cmp_count expected.log repo-cloned/rot13-filter.log &&
736+
737+
(
738+
cd repo-cloned &&
739+
rm *.r rot13-filter.log &&
740+
filter_git checkout . &&
741+
cat >expected.log <<-EOF &&
742+
START
743+
init handshake complete
744+
IN: smudge test.r $S [OK] -- OUT: $S . [OK]
745+
IN: smudge test-delay1.r $S [OK] -- OUT: $S [DELAYED]
746+
IN: smudge test-delay1.r $S [OK] -- OUT: $S . [OK]
747+
IN: smudge test-delay3.r $S [OK] -- OUT: $S [DELAYED]
748+
IN: smudge test-delay3.r $S [OK] -- OUT: $S [DELAYED]
749+
IN: smudge test-delay3.r $S [OK] -- OUT: $S [DELAYED]
750+
IN: smudge test-delay3.r $S [OK] -- OUT: $S . [OK]
751+
STOP
752+
EOF
753+
test_cmp_count expected.log rot13-filter.log
754+
)
755+
'
756+
704757
test_done

t/t0021/rot13-filter.pl

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@
1717
# operation then the filter signals that it cannot or does not want
1818
# to process the file and any file after that is processed with the
1919
# same command.
20+
# (5) If data with a pathname that is a key in the DELAY hash is
21+
# processed (e.g. 'test-delay1.r') then the filter signals n times
22+
# to Git that the processing is delayed (n being the value of the
23+
# DELAY hash key).
2024
#
2125

2226
use strict;
@@ -25,6 +29,12 @@
2529

2630
my $MAX_PACKET_CONTENT_SIZE = 65516;
2731
my @capabilities = @ARGV;
32+
my $DELAY3 = 3;
33+
my $DELAY1 = 1;
34+
35+
my %DELAY;
36+
$DELAY{'test-delay1.r'} = 1;
37+
$DELAY{'test-delay3.r'} = 3;
2838

2939
open my $debug, ">>", "rot13-filter.log" or die "cannot open log file: $!";
3040

@@ -166,6 +176,15 @@ sub packet_flush {
166176
packet_txt_write("status=abort");
167177
packet_flush();
168178
}
179+
elsif ( $command eq "smudge" and
180+
exists $DELAY{$pathname} and
181+
$DELAY{$pathname} > 0 ) {
182+
$DELAY{$pathname} = $DELAY{$pathname} - 1;
183+
print $debug "[DELAYED]\n";
184+
$debug->flush();
185+
packet_txt_write("status=delayed");
186+
packet_flush();
187+
}
169188
else {
170189
packet_txt_write("status=success");
171190
packet_flush();

unpack-trees.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,7 @@ static int check_updates(struct unpack_trees_options *o,
268268
}
269269
}
270270
}
271+
errs |= checkout_delayed_entries(&state);
271272
stop_progress(&progress);
272273
if (o->update)
273274
git_attr_set_direction(GIT_ATTR_CHECKIN, NULL);

0 commit comments

Comments
 (0)