Skip to content

Commit d316af0

Browse files
avargitster
authored andcommitted
grep tests: move binary pattern tests into their own file
Move the tests for "-f <file>" where "<file>" contains a NUL byte pattern into their own file. I added most of these tests in 966be95 ("grep: add tests to fix blind spots with \0 patterns", 2017-05-20). Whether a regex engine supports matching binary content is very different from whether it matches binary patterns. Since 2f89522 ("regex: add regexec_buf() that can work on a non NUL-terminated string", 2016-09-21) we've required REG_STARTEND of our regex engines so we can match binary content, but only the PCRE v2 engine can sensibly match binary patterns. Since 9ecedde ("Use kwset in grep", 2011-08-21) we've been punting patterns containing NUL-byte and considering them fixed, except in cases where "--ignore-case" is provided and they're non-ASCII, see 5c1ebcc ("grep/icase: avoid kwsset on literal non-ascii strings", 2016-06-25). Subsequent commits will change this behavior. Signed-off-by: Ævar Arnfjörð Bjarmason <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 471dac5 commit d316af0

File tree

2 files changed

+114
-101
lines changed

2 files changed

+114
-101
lines changed

t/t7815-grep-binary.sh

Lines changed: 0 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -4,41 +4,6 @@ test_description='git grep in binary files'
44

55
. ./test-lib.sh
66

7-
nul_match () {
8-
matches=$1
9-
flags=$2
10-
pattern=$3
11-
pattern_human=$(echo "$pattern" | sed 's/Q/<NUL>/g')
12-
13-
if test "$matches" = 1
14-
then
15-
test_expect_success "git grep -f f $flags '$pattern_human' a" "
16-
printf '$pattern' | q_to_nul >f &&
17-
git grep -f f $flags a
18-
"
19-
elif test "$matches" = 0
20-
then
21-
test_expect_success "git grep -f f $flags '$pattern_human' a" "
22-
printf '$pattern' | q_to_nul >f &&
23-
test_must_fail git grep -f f $flags a
24-
"
25-
elif test "$matches" = T1
26-
then
27-
test_expect_failure "git grep -f f $flags '$pattern_human' a" "
28-
printf '$pattern' | q_to_nul >f &&
29-
git grep -f f $flags a
30-
"
31-
elif test "$matches" = T0
32-
then
33-
test_expect_failure "git grep -f f $flags '$pattern_human' a" "
34-
printf '$pattern' | q_to_nul >f &&
35-
test_must_fail git grep -f f $flags a
36-
"
37-
else
38-
test_expect_success "PANIC: Test framework error. Unknown matches value $matches" 'false'
39-
fi
40-
}
41-
427
test_expect_success 'setup' "
438
echo 'binaryQfileQm[*]cQ*æQð' | q_to_nul >a &&
449
git add a &&
@@ -102,72 +67,6 @@ test_expect_failure 'git grep .fi a' '
10267
git grep .fi a
10368
'
10469

105-
nul_match 1 '-F' 'yQf'
106-
nul_match 0 '-F' 'yQx'
107-
nul_match 1 '-Fi' 'YQf'
108-
nul_match 0 '-Fi' 'YQx'
109-
nul_match 1 '' 'yQf'
110-
nul_match 0 '' 'yQx'
111-
nul_match 1 '' 'æQð'
112-
nul_match 1 '-F' 'eQm[*]c'
113-
nul_match 1 '-Fi' 'EQM[*]C'
114-
115-
# Regex patterns that would match but shouldn't with -F
116-
nul_match 0 '-F' 'yQ[f]'
117-
nul_match 0 '-F' '[y]Qf'
118-
nul_match 0 '-Fi' 'YQ[F]'
119-
nul_match 0 '-Fi' '[Y]QF'
120-
nul_match 0 '-F' 'æQ[ð]'
121-
nul_match 0 '-F' '[æ]Qð'
122-
nul_match 0 '-Fi' 'ÆQ[Ð]'
123-
nul_match 0 '-Fi' '[Æ]QÐ'
124-
125-
# kwset is disabled on -i & non-ASCII. No way to match non-ASCII \0
126-
# patterns case-insensitively.
127-
nul_match T1 '-i' 'ÆQÐ'
128-
129-
# \0 implicitly disables regexes. This is an undocumented internal
130-
# limitation.
131-
nul_match T1 '' 'yQ[f]'
132-
nul_match T1 '' '[y]Qf'
133-
nul_match T1 '-i' 'YQ[F]'
134-
nul_match T1 '-i' '[Y]Qf'
135-
nul_match T1 '' 'æQ[ð]'
136-
nul_match T1 '' '[æ]Qð'
137-
nul_match T1 '-i' 'ÆQ[Ð]'
138-
139-
# ... because of \0 implicitly disabling regexes regexes that
140-
# should/shouldn't match don't do the right thing.
141-
nul_match T1 '' 'eQm.*cQ'
142-
nul_match T1 '-i' 'EQM.*cQ'
143-
nul_match T0 '' 'eQm[*]c'
144-
nul_match T0 '-i' 'EQM[*]C'
145-
146-
# Due to the REG_STARTEND extension when kwset() is disabled on -i &
147-
# non-ASCII the string will be matched in its entirety, but the
148-
# pattern will be cut off at the first \0.
149-
nul_match 0 '-i' 'NOMATCHQð'
150-
nul_match T0 '-i' '[Æ]QNOMATCH'
151-
nul_match T0 '-i' '[æ]QNOMATCH'
152-
# Matches, but for the wrong reasons, just stops at [æ]
153-
nul_match 1 '-i' '[Æ]Qð'
154-
nul_match 1 '-i' '[æ]Qð'
155-
156-
# Ensure that the matcher doesn't regress to something that stops at
157-
# \0
158-
nul_match 0 '-F' 'yQ[f]'
159-
nul_match 0 '-Fi' 'YQ[F]'
160-
nul_match 0 '' 'yQNOMATCH'
161-
nul_match 0 '' 'QNOMATCH'
162-
nul_match 0 '-i' 'YQNOMATCH'
163-
nul_match 0 '-i' 'QNOMATCH'
164-
nul_match 0 '-F' 'æQ[ð]'
165-
nul_match 0 '-Fi' 'ÆQ[Ð]'
166-
nul_match 0 '' 'yQNÓMATCH'
167-
nul_match 0 '' 'QNÓMATCH'
168-
nul_match 0 '-i' 'YQNÓMATCH'
169-
nul_match 0 '-i' 'QNÓMATCH'
170-
17170
test_expect_success 'grep respects binary diff attribute' '
17271
echo text >t &&
17372
git add t &&

t/t7816-grep-binary-pattern.sh

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
#!/bin/sh
2+
3+
test_description='git grep with a binary pattern files'
4+
5+
. ./test-lib.sh
6+
7+
nul_match () {
8+
matches=$1
9+
flags=$2
10+
pattern=$3
11+
pattern_human=$(echo "$pattern" | sed 's/Q/<NUL>/g')
12+
13+
if test "$matches" = 1
14+
then
15+
test_expect_success "git grep -f f $flags '$pattern_human' a" "
16+
printf '$pattern' | q_to_nul >f &&
17+
git grep -f f $flags a
18+
"
19+
elif test "$matches" = 0
20+
then
21+
test_expect_success "git grep -f f $flags '$pattern_human' a" "
22+
printf '$pattern' | q_to_nul >f &&
23+
test_must_fail git grep -f f $flags a
24+
"
25+
elif test "$matches" = T1
26+
then
27+
test_expect_failure "git grep -f f $flags '$pattern_human' a" "
28+
printf '$pattern' | q_to_nul >f &&
29+
git grep -f f $flags a
30+
"
31+
elif test "$matches" = T0
32+
then
33+
test_expect_failure "git grep -f f $flags '$pattern_human' a" "
34+
printf '$pattern' | q_to_nul >f &&
35+
test_must_fail git grep -f f $flags a
36+
"
37+
else
38+
test_expect_success "PANIC: Test framework error. Unknown matches value $matches" 'false'
39+
fi
40+
}
41+
42+
test_expect_success 'setup' "
43+
echo 'binaryQfileQm[*]cQ*æQð' | q_to_nul >a &&
44+
git add a &&
45+
git commit -m.
46+
"
47+
48+
nul_match 1 '-F' 'yQf'
49+
nul_match 0 '-F' 'yQx'
50+
nul_match 1 '-Fi' 'YQf'
51+
nul_match 0 '-Fi' 'YQx'
52+
nul_match 1 '' 'yQf'
53+
nul_match 0 '' 'yQx'
54+
nul_match 1 '' 'æQð'
55+
nul_match 1 '-F' 'eQm[*]c'
56+
nul_match 1 '-Fi' 'EQM[*]C'
57+
58+
# Regex patterns that would match but shouldn't with -F
59+
nul_match 0 '-F' 'yQ[f]'
60+
nul_match 0 '-F' '[y]Qf'
61+
nul_match 0 '-Fi' 'YQ[F]'
62+
nul_match 0 '-Fi' '[Y]QF'
63+
nul_match 0 '-F' 'æQ[ð]'
64+
nul_match 0 '-F' '[æ]Qð'
65+
nul_match 0 '-Fi' 'ÆQ[Ð]'
66+
nul_match 0 '-Fi' '[Æ]QÐ'
67+
68+
# kwset is disabled on -i & non-ASCII. No way to match non-ASCII \0
69+
# patterns case-insensitively.
70+
nul_match T1 '-i' 'ÆQÐ'
71+
72+
# \0 implicitly disables regexes. This is an undocumented internal
73+
# limitation.
74+
nul_match T1 '' 'yQ[f]'
75+
nul_match T1 '' '[y]Qf'
76+
nul_match T1 '-i' 'YQ[F]'
77+
nul_match T1 '-i' '[Y]Qf'
78+
nul_match T1 '' 'æQ[ð]'
79+
nul_match T1 '' '[æ]Qð'
80+
nul_match T1 '-i' 'ÆQ[Ð]'
81+
82+
# ... because of \0 implicitly disabling regexes regexes that
83+
# should/shouldn't match don't do the right thing.
84+
nul_match T1 '' 'eQm.*cQ'
85+
nul_match T1 '-i' 'EQM.*cQ'
86+
nul_match T0 '' 'eQm[*]c'
87+
nul_match T0 '-i' 'EQM[*]C'
88+
89+
# Due to the REG_STARTEND extension when kwset() is disabled on -i &
90+
# non-ASCII the string will be matched in its entirety, but the
91+
# pattern will be cut off at the first \0.
92+
nul_match 0 '-i' 'NOMATCHQð'
93+
nul_match T0 '-i' '[Æ]QNOMATCH'
94+
nul_match T0 '-i' '[æ]QNOMATCH'
95+
# Matches, but for the wrong reasons, just stops at [æ]
96+
nul_match 1 '-i' '[Æ]Qð'
97+
nul_match 1 '-i' '[æ]Qð'
98+
99+
# Ensure that the matcher doesn't regress to something that stops at
100+
# \0
101+
nul_match 0 '-F' 'yQ[f]'
102+
nul_match 0 '-Fi' 'YQ[F]'
103+
nul_match 0 '' 'yQNOMATCH'
104+
nul_match 0 '' 'QNOMATCH'
105+
nul_match 0 '-i' 'YQNOMATCH'
106+
nul_match 0 '-i' 'QNOMATCH'
107+
nul_match 0 '-F' 'æQ[ð]'
108+
nul_match 0 '-Fi' 'ÆQ[Ð]'
109+
nul_match 0 '' 'yQNÓMATCH'
110+
nul_match 0 '' 'QNÓMATCH'
111+
nul_match 0 '-i' 'YQNÓMATCH'
112+
nul_match 0 '-i' 'QNÓMATCH'
113+
114+
test_done

0 commit comments

Comments
 (0)