Skip to content

Commit a43b001

Browse files
committed
Merge branch 'ds/sparse-lstat-caching'
The code to deal with modified paths that are out-of-cone in a sparsely checked out working tree has been optimized. * ds/sparse-lstat-caching: sparse-index: improve lstat caching of sparse paths sparse-index: count lstat() calls sparse-index: use strbuf in path_found() sparse-index: refactor path_found() sparse-checkout: refactor skip worktree retry logic
2 parents 125e389 + 114bff7 commit a43b001

File tree

1 file changed

+164
-52
lines changed

1 file changed

+164
-52
lines changed

sparse-index.c

Lines changed: 164 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -439,96 +439,208 @@ void ensure_correct_sparsity(struct index_state *istate)
439439
ensure_full_index(istate);
440440
}
441441

442-
static int path_found(const char *path, const char **dirname, size_t *dir_len,
443-
int *dir_found)
442+
struct path_found_data {
443+
/**
444+
* The path stored in 'dir', if non-empty, corresponds to the most-
445+
* recent path that we checked where:
446+
*
447+
* 1. The path should be a directory, according to the index.
448+
* 2. The path does not exist.
449+
* 3. The parent path _does_ exist. (This may be the root of the
450+
* working directory.)
451+
*/
452+
struct strbuf dir;
453+
size_t lstat_count;
454+
};
455+
456+
#define PATH_FOUND_DATA_INIT { \
457+
.dir = STRBUF_INIT \
458+
}
459+
460+
static void clear_path_found_data(struct path_found_data *data)
461+
{
462+
strbuf_release(&data->dir);
463+
}
464+
465+
/**
466+
* Return the length of the longest common substring that ends in a
467+
* slash ('/') to indicate the longest common parent directory. Returns
468+
* zero if no common directory exists.
469+
*/
470+
static size_t max_common_dir_prefix(const char *path1, const char *path2)
471+
{
472+
size_t common_prefix = 0;
473+
for (size_t i = 0; path1[i] && path2[i]; i++) {
474+
if (path1[i] != path2[i])
475+
break;
476+
477+
/*
478+
* If they agree at a directory separator, then add one
479+
* to make sure it is included in the common prefix string.
480+
*/
481+
if (path1[i] == '/')
482+
common_prefix = i + 1;
483+
}
484+
485+
return common_prefix;
486+
}
487+
488+
static int path_found(const char *path, struct path_found_data *data)
444489
{
445490
struct stat st;
446-
char *newdir;
447-
char *tmp;
491+
size_t common_prefix;
448492

449493
/*
450-
* If dirname corresponds to a directory that doesn't exist, and this
451-
* path starts with dirname, then path can't exist.
494+
* If data->dir is non-empty, then it contains a path that doesn't
495+
* exist, including an ending slash ('/'). If it is a prefix of 'path',
496+
* then we can return 0.
452497
*/
453-
if (!*dir_found && !memcmp(path, *dirname, *dir_len))
498+
if (data->dir.len && !memcmp(path, data->dir.buf, data->dir.len))
454499
return 0;
455500

456501
/*
457-
* If path itself exists, return 1.
502+
* Otherwise, we must check if the current path exists. If it does, then
503+
* return 1. The cached directory will be skipped until we come across
504+
* a missing path again.
458505
*/
506+
data->lstat_count++;
459507
if (!lstat(path, &st))
460508
return 1;
461509

462510
/*
463-
* Otherwise, path does not exist so we'll return 0...but we'll first
464-
* determine some info about its parent directory so we can avoid
465-
* lstat calls for future cache entries.
511+
* At this point, we know that 'path' doesn't exist, and we know that
512+
* the parent directory of 'data->dir' does exist. Let's set 'data->dir'
513+
* to be the top-most non-existing directory of 'path'. If the first
514+
* parent of 'path' exists, then we will act as though 'path'
515+
* corresponds to a directory (by adding a slash).
466516
*/
467-
newdir = strrchr(path, '/');
468-
if (!newdir)
469-
return 0; /* Didn't find a parent dir; just return 0 now. */
517+
common_prefix = max_common_dir_prefix(path, data->dir.buf);
470518

471519
/*
472-
* If path starts with directory (which we already lstat'ed and found),
473-
* then no need to lstat parent directory again.
520+
* At this point, 'path' and 'data->dir' have a common existing parent
521+
* directory given by path[0..common_prefix] (which could have length 0).
522+
* We "grow" the data->dir buffer by checking for existing directories
523+
* along 'path'.
474524
*/
475-
if (*dir_found && *dirname && memcmp(path, *dirname, *dir_len))
476-
return 0;
477525

478-
/* Free previous dirname, and cache path's dirname */
479-
*dirname = path;
480-
*dir_len = newdir - path + 1;
526+
strbuf_setlen(&data->dir, common_prefix);
527+
while (1) {
528+
/* Find the next directory in 'path'. */
529+
const char *rest = path + data->dir.len;
530+
const char *next_slash = strchr(rest, '/');
531+
532+
/*
533+
* If there are no more slashes, then 'path' doesn't contain a
534+
* non-existent _parent_ directory. Set 'data->dir' to be equal
535+
* to 'path' plus an additional slash, so it can be used for
536+
* caching in the future. The filename of 'path' is considered
537+
* a non-existent directory.
538+
*
539+
* Note: if "{path}/" exists as a directory, then it will never
540+
* appear as a prefix of other callers to this method, assuming
541+
* the context from the clear_skip_worktree... methods. If this
542+
* method is reused, then this must be reconsidered.
543+
*/
544+
if (!next_slash) {
545+
strbuf_addstr(&data->dir, rest);
546+
strbuf_addch(&data->dir, '/');
547+
break;
548+
}
549+
550+
/*
551+
* Now that we have a slash, let's grow 'data->dir' to include
552+
* this slash, then test if we should stop.
553+
*/
554+
strbuf_add(&data->dir, rest, next_slash - rest + 1);
481555

482-
tmp = xstrndup(path, *dir_len);
483-
*dir_found = !lstat(tmp, &st);
484-
free(tmp);
556+
/* If the parent dir doesn't exist, then stop here. */
557+
data->lstat_count++;
558+
if (lstat(data->dir.buf, &st))
559+
return 0;
560+
}
485561

562+
/*
563+
* At this point, 'data->dir' is equal to 'path' plus a slash character,
564+
* and the parent directory of 'path' definitely exists. Moreover, we
565+
* know that 'path' doesn't exist, or we would have returned 1 earlier.
566+
*/
486567
return 0;
487568
}
488569

489-
void clear_skip_worktree_from_present_files(struct index_state *istate)
570+
static int clear_skip_worktree_from_present_files_sparse(struct index_state *istate)
490571
{
491-
const char *last_dirname = NULL;
492-
size_t dir_len = 0;
493-
int dir_found = 1;
494-
495-
int i;
496-
int path_count[2] = {0, 0};
497-
int restarted = 0;
572+
struct path_found_data data = PATH_FOUND_DATA_INIT;
498573

499-
if (!core_apply_sparse_checkout ||
500-
sparse_expect_files_outside_of_patterns)
501-
return;
574+
int path_count = 0;
575+
int to_restart = 0;
502576

503-
trace2_region_enter("index", "clear_skip_worktree_from_present_files",
577+
trace2_region_enter("index", "clear_skip_worktree_from_present_files_sparse",
504578
istate->repo);
505-
restart:
506-
for (i = 0; i < istate->cache_nr; i++) {
579+
for (int i = 0; i < istate->cache_nr; i++) {
507580
struct cache_entry *ce = istate->cache[i];
508581

509582
if (ce_skip_worktree(ce)) {
510-
path_count[restarted]++;
511-
if (path_found(ce->name, &last_dirname, &dir_len, &dir_found)) {
583+
path_count++;
584+
if (path_found(ce->name, &data)) {
512585
if (S_ISSPARSEDIR(ce->ce_mode)) {
513-
if (restarted)
514-
BUG("ensure-full-index did not fully flatten?");
515-
ensure_full_index(istate);
516-
restarted = 1;
517-
goto restart;
586+
to_restart = 1;
587+
break;
518588
}
519589
ce->ce_flags &= ~CE_SKIP_WORKTREE;
520590
}
521591
}
522592
}
523593

524-
if (path_count[0])
525-
trace2_data_intmax("index", istate->repo,
526-
"sparse_path_count", path_count[0]);
527-
if (restarted)
528-
trace2_data_intmax("index", istate->repo,
529-
"sparse_path_count_full", path_count[1]);
530-
trace2_region_leave("index", "clear_skip_worktree_from_present_files",
594+
trace2_data_intmax("index", istate->repo,
595+
"sparse_path_count", path_count);
596+
trace2_data_intmax("index", istate->repo,
597+
"sparse_lstat_count", data.lstat_count);
598+
trace2_region_leave("index", "clear_skip_worktree_from_present_files_sparse",
599+
istate->repo);
600+
clear_path_found_data(&data);
601+
return to_restart;
602+
}
603+
604+
static void clear_skip_worktree_from_present_files_full(struct index_state *istate)
605+
{
606+
struct path_found_data data = PATH_FOUND_DATA_INIT;
607+
608+
int path_count = 0;
609+
610+
trace2_region_enter("index", "clear_skip_worktree_from_present_files_full",
611+
istate->repo);
612+
for (int i = 0; i < istate->cache_nr; i++) {
613+
struct cache_entry *ce = istate->cache[i];
614+
615+
if (S_ISSPARSEDIR(ce->ce_mode))
616+
BUG("ensure-full-index did not fully flatten?");
617+
618+
if (ce_skip_worktree(ce)) {
619+
path_count++;
620+
if (path_found(ce->name, &data))
621+
ce->ce_flags &= ~CE_SKIP_WORKTREE;
622+
}
623+
}
624+
625+
trace2_data_intmax("index", istate->repo,
626+
"full_path_count", path_count);
627+
trace2_data_intmax("index", istate->repo,
628+
"full_lstat_count", data.lstat_count);
629+
trace2_region_leave("index", "clear_skip_worktree_from_present_files_full",
531630
istate->repo);
631+
clear_path_found_data(&data);
632+
}
633+
634+
void clear_skip_worktree_from_present_files(struct index_state *istate)
635+
{
636+
if (!core_apply_sparse_checkout ||
637+
sparse_expect_files_outside_of_patterns)
638+
return;
639+
640+
if (clear_skip_worktree_from_present_files_sparse(istate)) {
641+
ensure_full_index(istate);
642+
clear_skip_worktree_from_present_files_full(istate);
643+
}
532644
}
533645

534646
/*

0 commit comments

Comments
 (0)