Skip to content

Commit e26e8bf

Browse files
matheustavaresgitster
authored andcommitted
grep: honor sparse checkout patterns
One of the main uses for a sparse checkout is to allow users to focus on the subset of files in a repository in which they are interested. But git-grep currently ignores the sparsity patterns and reports all matches found outside this subset, which kind of goes in the opposite direction. There are some use cases for ignoring the sparsity patterns and the next commit will add an option to obtain this behavior, but here we start by making grep honor the sparsity boundaries in every case where this is relevant: - git grep in worktree - git grep --cached - git grep $REVISION For the worktree and cached cases, we iterate over paths without the SKIP_WORKTREE bit set, and limit our searches to these paths. For the $REVISION case, we limit the paths we search to those that match the sparsity patterns. (We do not check the SKIP_WORKTREE bit for the $REVISION case, because $REVISION may contain paths that do not exist in HEAD and thus for which we have no SKIP_WORKTREE bit to consult. The sparsity patterns tell us how the SKIP_WORKTREE bit would be set if we were to check out $REVISION, so we consult those. Also, we don't use the sparsity patterns with the worktree or cached cases, both because we have a bit we can check directly and more efficiently, and because unmerged entries from a merge or a rebase could cause more files to temporarily be present than the sparsity patterns would normally select.) Note that there is a special case here: `git grep $TREE`. In this case, we cannot know whether $TREE corresponds to the root of the repository or some sub-tree, and thus there is no way for us to know which sparsity patterns, if any, apply. So the $TREE case will not use sparsity patterns or any SKIP_WORKTREE bits and will instead always search all files within the $TREE. Signed-off-by: Matheus Tavares <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 92bca22 commit e26e8bf

File tree

3 files changed

+312
-17
lines changed

3 files changed

+312
-17
lines changed

builtin/grep.c

Lines changed: 117 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -410,7 +410,7 @@ static int grep_cache(struct grep_opt *opt,
410410
const struct pathspec *pathspec, int cached);
411411
static int grep_tree(struct grep_opt *opt, const struct pathspec *pathspec,
412412
struct tree_desc *tree, struct strbuf *base, int tn_len,
413-
int check_attr);
413+
int is_root_tree);
414414

415415
static int grep_submodule(struct grep_opt *opt,
416416
const struct pathspec *pathspec,
@@ -508,6 +508,10 @@ static int grep_cache(struct grep_opt *opt,
508508

509509
for (nr = 0; nr < repo->index->cache_nr; nr++) {
510510
const struct cache_entry *ce = repo->index->cache[nr];
511+
512+
if (ce_skip_worktree(ce))
513+
continue;
514+
511515
strbuf_setlen(&name, name_base_len);
512516
strbuf_addstr(&name, ce->name);
513517

@@ -520,8 +524,7 @@ static int grep_cache(struct grep_opt *opt,
520524
* cache entry are identical, even if worktree file has
521525
* been modified, so use cache version instead
522526
*/
523-
if (cached || (ce->ce_flags & CE_VALID) ||
524-
ce_skip_worktree(ce)) {
527+
if (cached || (ce->ce_flags & CE_VALID)) {
525528
if (ce_stage(ce) || ce_intent_to_add(ce))
526529
continue;
527530
hit |= grep_oid(opt, &ce->oid, name.buf,
@@ -552,9 +555,76 @@ static int grep_cache(struct grep_opt *opt,
552555
return hit;
553556
}
554557

555-
static int grep_tree(struct grep_opt *opt, const struct pathspec *pathspec,
556-
struct tree_desc *tree, struct strbuf *base, int tn_len,
557-
int check_attr)
558+
static struct pattern_list *get_sparsity_patterns(struct repository *repo)
559+
{
560+
struct pattern_list *patterns;
561+
char *sparse_file;
562+
int sparse_config, cone_config;
563+
564+
if (repo_config_get_bool(repo, "core.sparsecheckout", &sparse_config) ||
565+
!sparse_config) {
566+
return NULL;
567+
}
568+
569+
sparse_file = repo_git_path(repo, "info/sparse-checkout");
570+
patterns = xcalloc(1, sizeof(*patterns));
571+
572+
if (repo_config_get_bool(repo, "core.sparsecheckoutcone", &cone_config))
573+
cone_config = 0;
574+
patterns->use_cone_patterns = cone_config;
575+
576+
if (add_patterns_from_file_to_list(sparse_file, "", 0, patterns, NULL)) {
577+
if (file_exists(sparse_file)) {
578+
warning(_("failed to load sparse-checkout file: '%s'"),
579+
sparse_file);
580+
}
581+
free(sparse_file);
582+
free(patterns);
583+
return NULL;
584+
}
585+
586+
free(sparse_file);
587+
return patterns;
588+
}
589+
590+
static int path_in_sparse_checkout(struct strbuf *path, int prefix_len,
591+
unsigned int entry_mode,
592+
struct index_state *istate,
593+
struct pattern_list *sparsity,
594+
enum pattern_match_result parent_match,
595+
enum pattern_match_result *match)
596+
{
597+
int dtype = DT_UNKNOWN;
598+
int is_dir = S_ISDIR(entry_mode);
599+
600+
if (parent_match == MATCHED_RECURSIVE) {
601+
*match = parent_match;
602+
return 1;
603+
}
604+
605+
if (is_dir && !is_dir_sep(path->buf[path->len - 1]))
606+
strbuf_addch(path, '/');
607+
608+
*match = path_matches_pattern_list(path->buf, path->len,
609+
path->buf + prefix_len, &dtype,
610+
sparsity, istate);
611+
if (*match == UNDECIDED)
612+
*match = parent_match;
613+
614+
if (is_dir)
615+
strbuf_trim_trailing_dir_sep(path);
616+
617+
if (*match == NOT_MATCHED &&
618+
(!is_dir || (is_dir && sparsity->use_cone_patterns)))
619+
return 0;
620+
621+
return 1;
622+
}
623+
624+
static int do_grep_tree(struct grep_opt *opt, const struct pathspec *pathspec,
625+
struct tree_desc *tree, struct strbuf *base, int tn_len,
626+
int check_attr, struct pattern_list *sparsity,
627+
enum pattern_match_result default_sparsity_match)
558628
{
559629
struct repository *repo = opt->repo;
560630
int hit = 0;
@@ -570,6 +640,7 @@ static int grep_tree(struct grep_opt *opt, const struct pathspec *pathspec,
570640

571641
while (tree_entry(tree, &entry)) {
572642
int te_len = tree_entry_len(&entry);
643+
enum pattern_match_result sparsity_match = 0;
573644

574645
if (match != all_entries_interesting) {
575646
strbuf_addstr(&name, base->buf + tn_len);
@@ -586,6 +657,19 @@ static int grep_tree(struct grep_opt *opt, const struct pathspec *pathspec,
586657

587658
strbuf_add(base, entry.path, te_len);
588659

660+
if (sparsity) {
661+
struct strbuf path = STRBUF_INIT;
662+
strbuf_addstr(&path, base->buf + tn_len);
663+
664+
if (!path_in_sparse_checkout(&path, old_baselen - tn_len,
665+
entry.mode, repo->index,
666+
sparsity, default_sparsity_match,
667+
&sparsity_match)) {
668+
strbuf_setlen(base, old_baselen);
669+
continue;
670+
}
671+
}
672+
589673
if (S_ISREG(entry.mode)) {
590674
hit |= grep_oid(opt, &entry.oid, base->buf, tn_len,
591675
check_attr ? base->buf + tn_len : NULL);
@@ -602,8 +686,8 @@ static int grep_tree(struct grep_opt *opt, const struct pathspec *pathspec,
602686

603687
strbuf_addch(base, '/');
604688
init_tree_desc(&sub, data, size);
605-
hit |= grep_tree(opt, pathspec, &sub, base, tn_len,
606-
check_attr);
689+
hit |= do_grep_tree(opt, pathspec, &sub, base, tn_len,
690+
check_attr, sparsity, sparsity_match);
607691
free(data);
608692
} else if (recurse_submodules && S_ISGITLINK(entry.mode)) {
609693
hit |= grep_submodule(opt, pathspec, &entry.oid,
@@ -621,6 +705,31 @@ static int grep_tree(struct grep_opt *opt, const struct pathspec *pathspec,
621705
return hit;
622706
}
623707

708+
/*
709+
* Note: sparsity patterns and paths' attributes will only be considered if
710+
* is_root_tree has true value. (Otherwise, we cannot properly perform pattern
711+
* matching on paths.)
712+
*/
713+
static int grep_tree(struct grep_opt *opt, const struct pathspec *pathspec,
714+
struct tree_desc *tree, struct strbuf *base, int tn_len,
715+
int is_root_tree)
716+
{
717+
struct pattern_list *patterns = NULL;
718+
int ret;
719+
720+
if (is_root_tree)
721+
patterns = get_sparsity_patterns(opt->repo);
722+
723+
ret = do_grep_tree(opt, pathspec, tree, base, tn_len, is_root_tree,
724+
patterns, 0);
725+
726+
if (patterns) {
727+
clear_pattern_list(patterns);
728+
free(patterns);
729+
}
730+
return ret;
731+
}
732+
624733
static int grep_object(struct grep_opt *opt, const struct pathspec *pathspec,
625734
struct object *obj, const char *name, const char *path)
626735
{

t/t7011-skip-worktree-reading.sh

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -109,15 +109,6 @@ test_expect_success 'ls-files --modified' '
109109
test -z "$(git ls-files -m)"
110110
'
111111

112-
test_expect_success 'grep with skip-worktree file' '
113-
git update-index --no-skip-worktree 1 &&
114-
echo test > 1 &&
115-
git update-index 1 &&
116-
git update-index --skip-worktree 1 &&
117-
rm 1 &&
118-
test "$(git grep --no-ext-grep test)" = "1:test"
119-
'
120-
121112
echo ":000000 100644 $ZERO_OID $EMPTY_BLOB A 1" > expected
122113
test_expect_success 'diff-index does not examine skip-worktree absent entries' '
123114
setup_absent &&

t/t7817-grep-sparse-checkout.sh

Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
#!/bin/sh
2+
3+
test_description='grep in sparse checkout
4+
5+
This test creates a repo with the following structure:
6+
7+
.
8+
|-- a
9+
|-- b
10+
|-- dir
11+
| `-- c
12+
|-- sub
13+
| |-- A
14+
| | `-- a
15+
| `-- B
16+
| `-- b
17+
`-- sub2
18+
`-- a
19+
20+
Where the outer repository has non-cone mode sparsity patterns, sub is a
21+
submodule with cone mode sparsity patterns and sub2 is a submodule that is
22+
excluded by the superproject sparsity patterns. The resulting sparse checkout
23+
should leave the following structure in the working tree:
24+
25+
.
26+
|-- a
27+
|-- sub
28+
| `-- B
29+
| `-- b
30+
`-- sub2
31+
`-- a
32+
33+
But note that sub2 should have the SKIP_WORKTREE bit set.
34+
'
35+
36+
. ./test-lib.sh
37+
38+
test_expect_success 'setup' '
39+
echo "text" >a &&
40+
echo "text" >b &&
41+
mkdir dir &&
42+
echo "text" >dir/c &&
43+
44+
git init sub &&
45+
(
46+
cd sub &&
47+
mkdir A B &&
48+
echo "text" >A/a &&
49+
echo "text" >B/b &&
50+
git add A B &&
51+
git commit -m sub &&
52+
git sparse-checkout init --cone &&
53+
git sparse-checkout set B
54+
) &&
55+
56+
git init sub2 &&
57+
(
58+
cd sub2 &&
59+
echo "text" >a &&
60+
git add a &&
61+
git commit -m sub2
62+
) &&
63+
64+
git submodule add ./sub &&
65+
git submodule add ./sub2 &&
66+
git add a b dir &&
67+
git commit -m super &&
68+
git sparse-checkout init --no-cone &&
69+
git sparse-checkout set "/*" "!b" "!/*/" "sub" &&
70+
71+
git tag -am tag-to-commit tag-to-commit HEAD &&
72+
tree=$(git rev-parse HEAD^{tree}) &&
73+
git tag -am tag-to-tree tag-to-tree $tree &&
74+
75+
test_path_is_missing b &&
76+
test_path_is_missing dir &&
77+
test_path_is_missing sub/A &&
78+
test_path_is_file a &&
79+
test_path_is_file sub/B/b &&
80+
test_path_is_file sub2/a
81+
'
82+
83+
# The test below checks a special case: the sparsity patterns exclude '/b'
84+
# and sparse checkout is enabled, but the path exists in the working tree (e.g.
85+
# manually created after `git sparse-checkout init`). In this case, grep should
86+
# skip it.
87+
test_expect_success 'grep in working tree should honor sparse checkout' '
88+
cat >expect <<-EOF &&
89+
a:text
90+
EOF
91+
echo "new-text" >b &&
92+
test_when_finished "rm b" &&
93+
git grep "text" >actual &&
94+
test_cmp expect actual
95+
'
96+
97+
test_expect_success 'grep unmerged file despite not matching sparsity patterns' '
98+
cat >expect <<-EOF &&
99+
b:modified-b-in-branchX
100+
b:modified-b-in-branchY
101+
EOF
102+
test_when_finished "test_might_fail git merge --abort && \
103+
git checkout master" &&
104+
105+
git sparse-checkout disable &&
106+
git checkout -b branchY master &&
107+
test_commit modified-b-in-branchY b &&
108+
git checkout -b branchX master &&
109+
test_commit modified-b-in-branchX b &&
110+
111+
git sparse-checkout init &&
112+
test_path_is_missing b &&
113+
test_must_fail git merge branchY &&
114+
git grep "modified-b" >actual &&
115+
test_cmp expect actual
116+
'
117+
118+
test_expect_success 'grep --cached should honor sparse checkout' '
119+
cat >expect <<-EOF &&
120+
a:text
121+
EOF
122+
git grep --cached "text" >actual &&
123+
test_cmp expect actual
124+
'
125+
126+
test_expect_success 'grep <commit-ish> should honor sparse checkout' '
127+
commit=$(git rev-parse HEAD) &&
128+
cat >expect_commit <<-EOF &&
129+
$commit:a:text
130+
EOF
131+
cat >expect_tag-to-commit <<-EOF &&
132+
tag-to-commit:a:text
133+
EOF
134+
git grep "text" $commit >actual_commit &&
135+
test_cmp expect_commit actual_commit &&
136+
git grep "text" tag-to-commit >actual_tag-to-commit &&
137+
test_cmp expect_tag-to-commit actual_tag-to-commit
138+
'
139+
140+
test_expect_success 'grep <tree-ish> should ignore sparsity patterns' '
141+
commit=$(git rev-parse HEAD) &&
142+
tree=$(git rev-parse HEAD^{tree}) &&
143+
cat >expect_tree <<-EOF &&
144+
$tree:a:text
145+
$tree:b:text
146+
$tree:dir/c:text
147+
EOF
148+
cat >expect_tag-to-tree <<-EOF &&
149+
tag-to-tree:a:text
150+
tag-to-tree:b:text
151+
tag-to-tree:dir/c:text
152+
EOF
153+
git grep "text" $tree >actual_tree &&
154+
test_cmp expect_tree actual_tree &&
155+
git grep "text" tag-to-tree >actual_tag-to-tree &&
156+
test_cmp expect_tag-to-tree actual_tag-to-tree
157+
'
158+
159+
# Note that sub2/ is present in the worktree but it is excluded by the sparsity
160+
# patterns, so grep should not recurse into it.
161+
test_expect_success 'grep --recurse-submodules should honor sparse checkout in submodule' '
162+
cat >expect <<-EOF &&
163+
a:text
164+
sub/B/b:text
165+
EOF
166+
git grep --recurse-submodules "text" >actual &&
167+
test_cmp expect actual
168+
'
169+
170+
test_expect_success 'grep --recurse-submodules --cached should honor sparse checkout in submodule' '
171+
cat >expect <<-EOF &&
172+
a:text
173+
sub/B/b:text
174+
EOF
175+
git grep --recurse-submodules --cached "text" >actual &&
176+
test_cmp expect actual
177+
'
178+
179+
test_expect_success 'grep --recurse-submodules <commit-ish> should honor sparse checkout in submodule' '
180+
commit=$(git rev-parse HEAD) &&
181+
cat >expect_commit <<-EOF &&
182+
$commit:a:text
183+
$commit:sub/B/b:text
184+
EOF
185+
cat >expect_tag-to-commit <<-EOF &&
186+
tag-to-commit:a:text
187+
tag-to-commit:sub/B/b:text
188+
EOF
189+
git grep --recurse-submodules "text" $commit >actual_commit &&
190+
test_cmp expect_commit actual_commit &&
191+
git grep --recurse-submodules "text" tag-to-commit >actual_tag-to-commit &&
192+
test_cmp expect_tag-to-commit actual_tag-to-commit
193+
'
194+
195+
test_done

0 commit comments

Comments
 (0)