Skip to content

Commit e17a4f5

Browse files
committed
Merge branch 'mt/parallel-checkout-part-1' into seen
Parallel checkout. * mt/parallel-checkout-part-1: ci: run test round with parallel-checkout enabled parallel-checkout: add tests related to .gitattributes parallel-checkout: add tests related to clone collisions parallel-checkout: add tests for basic operations checkout-index: add parallel checkout support builtin/checkout.c: complete parallel checkout support make_transient_cache_entry(): optionally alloc from mem_pool parallel-checkout: support progress displaying parallel-checkout: make it truly parallel unpack-trees: add basic support for parallel checkout entry: add checkout_entry_ca() which takes preloaded conv_attrs entry: move conv_attrs lookup up to checkout_entry() entry: extract cache_entry update from write_entry() entry: make fstat_output() and read_blob_entry() public entry: extract a header file for entry.c functions convert: add conv_attrs classification convert: add get_stream_filter_ca() variant convert: add [async_]convert_to_working_tree_ca() variants convert: make convert_attrs() and convert structs public
2 parents cd925a4 + 5ce40d9 commit e17a4f5

27 files changed

+1793
-152
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
/git-check-mailmap
3434
/git-check-ref-format
3535
/git-checkout
36+
/git-checkout--helper
3637
/git-checkout-index
3738
/git-cherry
3839
/git-cherry-pick

Documentation/config/checkout.txt

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,24 @@ will checkout the '<something>' branch on another remote,
1616
and by linkgit:git-worktree[1] when 'git worktree add' refers to a
1717
remote branch. This setting might be used for other checkout-like
1818
commands or functionality in the future.
19+
20+
checkout.workers::
21+
The number of parallel workers to use when updating the working tree.
22+
The default is one, i.e. sequential execution. If set to a value less
23+
than one, Git will use as many workers as the number of logical cores
24+
available. This setting and checkout.thresholdForParallelism affect all
25+
commands that perform checkout. E.g. checkout, switch, clone, reset,
26+
sparse-checkout, read-tree, etc.
27+
+
28+
Note: parallel checkout usually delivers better performance for repositories
29+
located on SSDs or over NFS. For repositories on spinning disks and/or machines
30+
with a small number of cores, the default sequential checkout often performs
31+
better. The size and compression level of a repository might also influence how
32+
well the parallel version performs.
33+
34+
checkout.thresholdForParallelism::
35+
When running parallel checkout with a small number of files, the cost
36+
of subprocess spawning and inter-process communication might outweigh
37+
the parallelization gains. This setting allows to define the minimum
38+
number of files for which parallel checkout should be attempted. The
39+
default is 100.

Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -950,6 +950,7 @@ LIB_OBJS += pack-revindex.o
950950
LIB_OBJS += pack-write.o
951951
LIB_OBJS += packfile.o
952952
LIB_OBJS += pager.o
953+
LIB_OBJS += parallel-checkout.o
953954
LIB_OBJS += parse-options-cb.o
954955
LIB_OBJS += parse-options.o
955956
LIB_OBJS += patch-delta.o
@@ -1068,6 +1069,7 @@ BUILTIN_OBJS += builtin/check-attr.o
10681069
BUILTIN_OBJS += builtin/check-ignore.o
10691070
BUILTIN_OBJS += builtin/check-mailmap.o
10701071
BUILTIN_OBJS += builtin/check-ref-format.o
1072+
BUILTIN_OBJS += builtin/checkout--helper.o
10711073
BUILTIN_OBJS += builtin/checkout-index.o
10721074
BUILTIN_OBJS += builtin/checkout.o
10731075
BUILTIN_OBJS += builtin/clean.o

apply.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "quote.h"
2222
#include "rerere.h"
2323
#include "apply.h"
24+
#include "entry.h"
2425

2526
struct gitdiff_data {
2627
struct strbuf *root;

builtin.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ int cmd_bugreport(int argc, const char **argv, const char *prefix);
123123
int cmd_bundle(int argc, const char **argv, const char *prefix);
124124
int cmd_cat_file(int argc, const char **argv, const char *prefix);
125125
int cmd_checkout(int argc, const char **argv, const char *prefix);
126+
int cmd_checkout__helper(int argc, const char **argv, const char *prefix);
126127
int cmd_checkout_index(int argc, const char **argv, const char *prefix);
127128
int cmd_check_attr(int argc, const char **argv, const char *prefix);
128129
int cmd_check_ignore(int argc, const char **argv, const char *prefix);

builtin/checkout--helper.c

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
#include "builtin.h"
2+
#include "config.h"
3+
#include "entry.h"
4+
#include "parallel-checkout.h"
5+
#include "parse-options.h"
6+
#include "pkt-line.h"
7+
8+
static void packet_to_pc_item(char *line, int len,
9+
struct parallel_checkout_item *pc_item)
10+
{
11+
struct pc_item_fixed_portion *fixed_portion;
12+
char *encoding, *variant;
13+
14+
if (len < sizeof(struct pc_item_fixed_portion))
15+
BUG("checkout worker received too short item (got %dB, exp %dB)",
16+
len, (int)sizeof(struct pc_item_fixed_portion));
17+
18+
fixed_portion = (struct pc_item_fixed_portion *)line;
19+
20+
if (len - sizeof(struct pc_item_fixed_portion) !=
21+
fixed_portion->name_len + fixed_portion->working_tree_encoding_len)
22+
BUG("checkout worker received corrupted item");
23+
24+
variant = line + sizeof(struct pc_item_fixed_portion);
25+
26+
/*
27+
* Note: the main process uses zero length to communicate that the
28+
* encoding is NULL. There is no use case in actually sending an empty
29+
* string since it's considered as NULL when ca.working_tree_encoding
30+
* is set at git_path_check_encoding().
31+
*/
32+
if (fixed_portion->working_tree_encoding_len) {
33+
encoding = xmemdupz(variant,
34+
fixed_portion->working_tree_encoding_len);
35+
variant += fixed_portion->working_tree_encoding_len;
36+
} else {
37+
encoding = NULL;
38+
}
39+
40+
memset(pc_item, 0, sizeof(*pc_item));
41+
pc_item->ce = make_empty_transient_cache_entry(fixed_portion->name_len, NULL);
42+
pc_item->ce->ce_namelen = fixed_portion->name_len;
43+
pc_item->ce->ce_mode = fixed_portion->ce_mode;
44+
memcpy(pc_item->ce->name, variant, pc_item->ce->ce_namelen);
45+
oidcpy(&pc_item->ce->oid, &fixed_portion->oid);
46+
47+
pc_item->id = fixed_portion->id;
48+
pc_item->ca.crlf_action = fixed_portion->crlf_action;
49+
pc_item->ca.ident = fixed_portion->ident;
50+
pc_item->ca.working_tree_encoding = encoding;
51+
}
52+
53+
static void report_result(struct parallel_checkout_item *pc_item)
54+
{
55+
struct pc_item_result res = { 0 };
56+
size_t size;
57+
58+
res.id = pc_item->id;
59+
res.status = pc_item->status;
60+
61+
if (pc_item->status == PC_ITEM_WRITTEN) {
62+
res.st = pc_item->st;
63+
size = sizeof(res);
64+
} else {
65+
size = PC_ITEM_RESULT_BASE_SIZE;
66+
}
67+
68+
packet_write(1, (const char *)&res, size);
69+
}
70+
71+
/* Free the worker-side malloced data, but not pc_item itself. */
72+
static void release_pc_item_data(struct parallel_checkout_item *pc_item)
73+
{
74+
free((char *)pc_item->ca.working_tree_encoding);
75+
discard_cache_entry(pc_item->ce);
76+
}
77+
78+
static void worker_loop(struct checkout *state)
79+
{
80+
struct parallel_checkout_item *items = NULL;
81+
size_t i, nr = 0, alloc = 0;
82+
83+
while (1) {
84+
int len;
85+
char *line = packet_read_line(0, &len);
86+
87+
if (!line)
88+
break;
89+
90+
ALLOC_GROW(items, nr + 1, alloc);
91+
packet_to_pc_item(line, len, &items[nr++]);
92+
}
93+
94+
for (i = 0; i < nr; ++i) {
95+
struct parallel_checkout_item *pc_item = &items[i];
96+
write_pc_item(pc_item, state);
97+
report_result(pc_item);
98+
release_pc_item_data(pc_item);
99+
}
100+
101+
packet_flush(1);
102+
103+
free(items);
104+
}
105+
106+
static const char * const checkout_helper_usage[] = {
107+
N_("git checkout--helper [<options>]"),
108+
NULL
109+
};
110+
111+
int cmd_checkout__helper(int argc, const char **argv, const char *prefix)
112+
{
113+
struct checkout state = CHECKOUT_INIT;
114+
struct option checkout_helper_options[] = {
115+
OPT_STRING(0, "prefix", &state.base_dir, N_("string"),
116+
N_("when creating files, prepend <string>")),
117+
OPT_END()
118+
};
119+
120+
if (argc == 2 && !strcmp(argv[1], "-h"))
121+
usage_with_options(checkout_helper_usage,
122+
checkout_helper_options);
123+
124+
git_config(git_default_config, NULL);
125+
argc = parse_options(argc, argv, prefix, checkout_helper_options,
126+
checkout_helper_usage, 0);
127+
if (argc > 0)
128+
usage_with_options(checkout_helper_usage, checkout_helper_options);
129+
130+
if (state.base_dir)
131+
state.base_dir_len = strlen(state.base_dir);
132+
133+
/*
134+
* Setting this on worker won't actually update the index. We just need
135+
* to pretend so to induce the checkout machinery to stat() the written
136+
* entries.
137+
*/
138+
state.refresh_cache = 1;
139+
140+
worker_loop(&state);
141+
return 0;
142+
}

builtin/checkout-index.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
#include "quote.h"
1212
#include "cache-tree.h"
1313
#include "parse-options.h"
14+
#include "entry.h"
15+
#include "parallel-checkout.h"
1416

1517
#define CHECKOUT_ALL 4
1618
static int nul_term_line;
@@ -159,6 +161,7 @@ int cmd_checkout_index(int argc, const char **argv, const char *prefix)
159161
int prefix_length;
160162
int force = 0, quiet = 0, not_new = 0;
161163
int index_opt = 0;
164+
int pc_workers, pc_threshold;
162165
struct option builtin_checkout_index_options[] = {
163166
OPT_BOOL('a', "all", &all,
164167
N_("check out all files in the index")),
@@ -213,6 +216,14 @@ int cmd_checkout_index(int argc, const char **argv, const char *prefix)
213216
hold_locked_index(&lock_file, LOCK_DIE_ON_ERROR);
214217
}
215218

219+
if (!to_tempfile)
220+
get_parallel_checkout_configs(&pc_workers, &pc_threshold);
221+
else
222+
pc_workers = 1;
223+
224+
if (pc_workers > 1)
225+
init_parallel_checkout();
226+
216227
/* Check out named files first */
217228
for (i = 0; i < argc; i++) {
218229
const char *arg = argv[i];
@@ -255,6 +266,12 @@ int cmd_checkout_index(int argc, const char **argv, const char *prefix)
255266
if (all)
256267
checkout_all(prefix, prefix_length);
257268

269+
if (pc_workers > 1) {
270+
/* Errors were already reported */
271+
run_parallel_checkout(&state, pc_workers, pc_threshold,
272+
NULL, NULL);
273+
}
274+
258275
if (is_lock_file_locked(&lock_file) &&
259276
write_locked_index(&the_index, &lock_file, COMMIT_LOCK))
260277
die("Unable to write new index file");

builtin/checkout.c

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
#include "unpack-trees.h"
2727
#include "wt-status.h"
2828
#include "xdiff-interface.h"
29+
#include "entry.h"
30+
#include "parallel-checkout.h"
2931

3032
static const char * const checkout_usage[] = {
3133
N_("git checkout [<options>] <branch>"),
@@ -229,7 +231,8 @@ static int checkout_stage(int stage, const struct cache_entry *ce, int pos,
229231
return error(_("path '%s' does not have their version"), ce->name);
230232
}
231233

232-
static int checkout_merged(int pos, const struct checkout *state, int *nr_checkouts)
234+
static int checkout_merged(int pos, const struct checkout *state,
235+
int *nr_checkouts, struct mem_pool *ce_mem_pool)
233236
{
234237
struct cache_entry *ce = active_cache[pos];
235238
const char *path = ce->name;
@@ -290,11 +293,10 @@ static int checkout_merged(int pos, const struct checkout *state, int *nr_checko
290293
if (write_object_file(result_buf.ptr, result_buf.size, blob_type, &oid))
291294
die(_("Unable to add merge result for '%s'"), path);
292295
free(result_buf.ptr);
293-
ce = make_transient_cache_entry(mode, &oid, path, 2);
296+
ce = make_transient_cache_entry(mode, &oid, path, 2, ce_mem_pool);
294297
if (!ce)
295298
die(_("make_cache_entry failed for path '%s'"), path);
296299
status = checkout_entry(ce, state, NULL, nr_checkouts);
297-
discard_cache_entry(ce);
298300
return status;
299301
}
300302

@@ -358,16 +360,22 @@ static int checkout_worktree(const struct checkout_opts *opts,
358360
int nr_checkouts = 0, nr_unmerged = 0;
359361
int errs = 0;
360362
int pos;
363+
int pc_workers, pc_threshold;
364+
struct mem_pool ce_mem_pool;
361365

362366
state.force = 1;
363367
state.refresh_cache = 1;
364368
state.istate = &the_index;
365369

370+
mem_pool_init(&ce_mem_pool, 0);
371+
get_parallel_checkout_configs(&pc_workers, &pc_threshold);
366372
init_checkout_metadata(&state.meta, info->refname,
367373
info->commit ? &info->commit->object.oid : &info->oid,
368374
NULL);
369375

370376
enable_delayed_checkout(&state);
377+
if (pc_workers > 1)
378+
init_parallel_checkout();
371379
for (pos = 0; pos < active_nr; pos++) {
372380
struct cache_entry *ce = active_cache[pos];
373381
if (ce->ce_flags & CE_MATCHED) {
@@ -383,10 +391,15 @@ static int checkout_worktree(const struct checkout_opts *opts,
383391
&nr_checkouts, opts->overlay_mode);
384392
else if (opts->merge)
385393
errs |= checkout_merged(pos, &state,
386-
&nr_unmerged);
394+
&nr_unmerged,
395+
&ce_mem_pool);
387396
pos = skip_same_name(ce, pos) - 1;
388397
}
389398
}
399+
if (pc_workers > 1)
400+
errs |= run_parallel_checkout(&state, pc_workers, pc_threshold,
401+
NULL, NULL);
402+
mem_pool_discard(&ce_mem_pool, should_validate_cache_entries());
390403
remove_marked_cache_entries(&the_index, 1);
391404
remove_scheduled_dirs();
392405
errs |= finish_delayed_checkout(&state, &nr_checkouts);

builtin/difftool.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "lockfile.h"
2424
#include "object-store.h"
2525
#include "dir.h"
26+
#include "entry.h"
2627

2728
static int trust_exit_code;
2829

@@ -322,7 +323,7 @@ static int checkout_path(unsigned mode, struct object_id *oid,
322323
struct cache_entry *ce;
323324
int ret;
324325

325-
ce = make_transient_cache_entry(mode, oid, path, 0);
326+
ce = make_transient_cache_entry(mode, oid, path, 0, NULL);
326327
ret = checkout_entry(ce, state, NULL, NULL);
327328

328329
discard_cache_entry(ce);

cache.h

Lines changed: 5 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -355,16 +355,16 @@ struct cache_entry *make_empty_cache_entry(struct index_state *istate,
355355
size_t name_len);
356356

357357
/*
358-
* Create a cache_entry that is not intended to be added to an index.
359-
* Caller is responsible for discarding the cache_entry
360-
* with `discard_cache_entry`.
358+
* Create a cache_entry that is not intended to be added to an index. If mp is
359+
* not NULL, the entry is allocated within the given memory pool. Caller is
360+
* responsible for discarding the cache_entry with `discard_cache_entry`.
361361
*/
362362
struct cache_entry *make_transient_cache_entry(unsigned int mode,
363363
const struct object_id *oid,
364364
const char *path,
365-
int stage);
365+
int stage, struct mem_pool *mp);
366366

367-
struct cache_entry *make_empty_transient_cache_entry(size_t name_len);
367+
struct cache_entry *make_empty_transient_cache_entry(size_t len, struct mem_pool *mp);
368368

369369
/*
370370
* Discard cache entry.
@@ -1705,30 +1705,6 @@ const char *show_ident_date(const struct ident_split *id,
17051705
*/
17061706
int ident_cmp(const struct ident_split *, const struct ident_split *);
17071707

1708-
struct checkout {
1709-
struct index_state *istate;
1710-
const char *base_dir;
1711-
int base_dir_len;
1712-
struct delayed_checkout *delayed_checkout;
1713-
struct checkout_metadata meta;
1714-
unsigned force:1,
1715-
quiet:1,
1716-
not_new:1,
1717-
clone:1,
1718-
refresh_cache:1;
1719-
};
1720-
#define CHECKOUT_INIT { NULL, "" }
1721-
1722-
#define TEMPORARY_FILENAME_LENGTH 25
1723-
int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *topath, int *nr_checkouts);
1724-
void enable_delayed_checkout(struct checkout *state);
1725-
int finish_delayed_checkout(struct checkout *state, int *nr_checkouts);
1726-
/*
1727-
* Unlink the last component and schedule the leading directories for
1728-
* removal, such that empty directories get removed.
1729-
*/
1730-
void unlink_entry(const struct cache_entry *ce);
1731-
17321708
struct cache_def {
17331709
struct strbuf path;
17341710
int flags;

0 commit comments

Comments
 (0)