Skip to content

Commit e03a501

Browse files
committed
Merge branch 'jc/ls-files-killed-optim' into maint
"git ls-files -k" needs to crawl only the part of the working tree that may overlap the paths in the index to find killed files, but shared code with the logic to find all the untracked files, which made it unnecessarily inefficient. * jc/ls-files-killed-optim: dir.c::test_one_path(): work around directory_exists_in_index_icase() breakage t3010: update to demonstrate "ls-files -k" optimization pitfalls ls-files -k: a directory only can be killed if the index has a non-directory dir.c: use the cache_* macro to access the current index
2 parents 74051fa + 680be04 commit e03a501

File tree

4 files changed

+67
-17
lines changed

4 files changed

+67
-17
lines changed

builtin/ls-files.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,8 @@ static void show_files(struct dir_struct *dir)
219219

220220
/* For cached/deleted files we don't need to even do the readdir */
221221
if (show_others || show_killed) {
222+
if (!show_others)
223+
dir->flags |= DIR_COLLECT_KILLED_ONLY;
222224
fill_directory(dir, pathspec);
223225
if (show_others)
224226
show_other_files(dir);

dir.c

Lines changed: 44 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -472,15 +472,14 @@ static void *read_skip_worktree_file_from_index(const char *path, size_t *size)
472472
unsigned long sz;
473473
enum object_type type;
474474
void *data;
475-
struct index_state *istate = &the_index;
476475

477476
len = strlen(path);
478-
pos = index_name_pos(istate, path, len);
477+
pos = cache_name_pos(path, len);
479478
if (pos < 0)
480479
return NULL;
481-
if (!ce_skip_worktree(istate->cache[pos]))
480+
if (!ce_skip_worktree(active_cache[pos]))
482481
return NULL;
483-
data = read_sha1_file(istate->cache[pos]->sha1, &type, &sz);
482+
data = read_sha1_file(active_cache[pos]->sha1, &type, &sz);
484483
if (!data || type != OBJ_BLOB) {
485484
free(data);
486485
return NULL;
@@ -927,13 +926,13 @@ enum exist_status {
927926
};
928927

929928
/*
930-
* Do not use the alphabetically stored index to look up
929+
* Do not use the alphabetically sorted index to look up
931930
* the directory name; instead, use the case insensitive
932931
* name hash.
933932
*/
934933
static enum exist_status directory_exists_in_index_icase(const char *dirname, int len)
935934
{
936-
const struct cache_entry *ce = index_name_exists(&the_index, dirname, len + 1, ignore_case);
935+
const struct cache_entry *ce = cache_name_exists(dirname, len + 1, ignore_case);
937936
unsigned char endchar;
938937

939938
if (!ce)
@@ -1175,14 +1174,51 @@ static enum path_treatment treat_one_path(struct dir_struct *dir,
11751174
int dtype, struct dirent *de)
11761175
{
11771176
int exclude;
1177+
int has_path_in_index = !!cache_name_exists(path->buf, path->len, ignore_case);
1178+
11781179
if (dtype == DT_UNKNOWN)
11791180
dtype = get_dtype(de, path->buf, path->len);
11801181

11811182
/* Always exclude indexed files */
1182-
if (dtype != DT_DIR &&
1183-
cache_name_exists(path->buf, path->len, ignore_case))
1183+
if (dtype != DT_DIR && has_path_in_index)
11841184
return path_none;
11851185

1186+
/*
1187+
* When we are looking at a directory P in the working tree,
1188+
* there are three cases:
1189+
*
1190+
* (1) P exists in the index. Everything inside the directory P in
1191+
* the working tree needs to go when P is checked out from the
1192+
* index.
1193+
*
1194+
* (2) P does not exist in the index, but there is P/Q in the index.
1195+
* We know P will stay a directory when we check out the contents
1196+
* of the index, but we do not know yet if there is a directory
1197+
* P/Q in the working tree to be killed, so we need to recurse.
1198+
*
1199+
* (3) P does not exist in the index, and there is no P/Q in the index
1200+
* to require P to be a directory, either. Only in this case, we
1201+
* know that everything inside P will not be killed without
1202+
* recursing.
1203+
*/
1204+
if ((dir->flags & DIR_COLLECT_KILLED_ONLY) &&
1205+
(dtype == DT_DIR) &&
1206+
!has_path_in_index) {
1207+
/*
1208+
* NEEDSWORK: directory_exists_in_index_icase()
1209+
* assumes that one byte past the given path is
1210+
* readable and has '/', which needs to be fixed, but
1211+
* until then, work it around in the caller.
1212+
*/
1213+
strbuf_addch(path, '/');
1214+
if (directory_exists_in_index(path->buf, path->len - 1) ==
1215+
index_nonexistent) {
1216+
strbuf_setlen(path, path->len - 1);
1217+
return path_none;
1218+
}
1219+
strbuf_setlen(path, path->len - 1);
1220+
}
1221+
11861222
exclude = is_excluded(dir, path->buf, &dtype);
11871223

11881224
/*

dir.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,8 @@ struct dir_struct {
8080
DIR_HIDE_EMPTY_DIRECTORIES = 1<<2,
8181
DIR_NO_GITLINKS = 1<<3,
8282
DIR_COLLECT_IGNORED = 1<<4,
83-
DIR_SHOW_IGNORED_TOO = 1<<5
83+
DIR_SHOW_IGNORED_TOO = 1<<5,
84+
DIR_COLLECT_KILLED_ONLY = 1<<6
8485
} flags;
8586
struct dir_entry **entries;
8687
struct dir_entry **ignored;

t/t3010-ls-files-killed-modified.sh

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ This test prepares the following in the cache:
1111
path1 - a symlink
1212
path2/file2 - a file in a directory
1313
path3/file3 - a file in a directory
14+
pathx/ju - a file in a directory
1415
submod1/ - a submodule
1516
submod2/ - another submodule
1617
@@ -23,6 +24,7 @@ and the following on the filesystem:
2324
path4 - a file
2425
path5 - a symlink
2526
path6/file6 - a file in a directory
27+
pathx/ju/nk - a file in a directory to be killed
2628
submod1/ - a submodule (modified from the cache)
2729
submod2/ - a submodule (matches the cache)
2830
@@ -44,14 +46,15 @@ modified without reporting path9 and path10. submod1 is also modified.
4446
test_expect_success 'git update-index --add to add various paths.' '
4547
date >path0 &&
4648
test_ln_s_add xyzzy path1 &&
47-
mkdir path2 path3 &&
49+
mkdir path2 path3 pathx &&
4850
date >path2/file2 &&
4951
date >path3/file3 &&
52+
>pathx/ju &&
5053
: >path7 &&
5154
date >path8 &&
5255
: >path9 &&
5356
date >path10 &&
54-
git update-index --add -- path0 path?/file? path7 path8 path9 path10 &&
57+
git update-index --add -- path0 path?/file? pathx/ju path7 path8 path9 path10 &&
5558
for i in 1 2
5659
do
5760
git init submod$i &&
@@ -77,24 +80,31 @@ test_expect_success 'git ls-files -k to show killed files.' '
7780
date >path3 &&
7881
date >path5
7982
fi &&
80-
mkdir path0 path1 path6 &&
83+
mkdir -p path0 path1 path6 pathx/ju &&
8184
date >path0/file0 &&
8285
date >path1/file1 &&
8386
date >path6/file6 &&
8487
date >path7 &&
8588
: >path8 &&
8689
: >path9 &&
8790
touch path10 &&
88-
git ls-files -k >.output
89-
'
90-
91-
test_expect_success 'validate git ls-files -k output.' '
92-
cat >.expected <<-\EOF &&
91+
>pathx/ju/nk &&
92+
cat >.expected <<-\EOF
9393
path0/file0
9494
path1/file1
9595
path2
9696
path3
97+
pathx/ju/nk
9798
EOF
99+
'
100+
101+
test_expect_success 'git ls-files -k output (w/o icase)' '
102+
git ls-files -k >.output
103+
test_cmp .expected .output
104+
'
105+
106+
test_expect_success 'git ls-files -k output (w/ icase)' '
107+
git -c core.ignorecase=true ls-files -k >.output
98108
test_cmp .expected .output
99109
'
100110

@@ -110,6 +120,7 @@ test_expect_success 'validate git ls-files -m output.' '
110120
path3/file3
111121
path7
112122
path8
123+
pathx/ju
113124
submod1
114125
EOF
115126
test_cmp .expected .output

0 commit comments

Comments
 (0)