|
2 | 2 |
|
3 | 3 | test_description='git grep with a binary pattern files'
|
4 | 4 |
|
5 |
| -. ./test-lib.sh |
| 5 | +. ./lib-gettext.sh |
6 | 6 |
|
7 |
| -nul_match () { |
| 7 | +nul_match_internal () { |
8 | 8 | matches=$1
|
9 |
| - flags=$2 |
10 |
| - pattern=$3 |
| 9 | + prereqs=$2 |
| 10 | + lc_all=$3 |
| 11 | + extra_flags=$4 |
| 12 | + flags=$5 |
| 13 | + pattern=$6 |
11 | 14 | pattern_human=$(echo "$pattern" | sed 's/Q/<NUL>/g')
|
12 | 15 |
|
13 | 16 | if test "$matches" = 1
|
14 | 17 | then
|
15 |
| - test_expect_success "git grep -f f $flags '$pattern_human' a" " |
| 18 | + test_expect_success $prereqs "LC_ALL='$lc_all' git grep $extra_flags -f f $flags '$pattern_human' a" " |
16 | 19 | printf '$pattern' | q_to_nul >f &&
|
17 |
| - git grep -f f $flags a |
| 20 | + LC_ALL='$lc_all' git grep $extra_flags -f f $flags a |
18 | 21 | "
|
19 | 22 | elif test "$matches" = 0
|
20 | 23 | then
|
21 |
| - test_expect_success "git grep -f f $flags '$pattern_human' a" " |
| 24 | + test_expect_success $prereqs "LC_ALL='$lc_all' git grep $extra_flags -f f $flags '$pattern_human' a" " |
| 25 | + >stderr && |
22 | 26 | printf '$pattern' | q_to_nul >f &&
|
23 |
| - test_must_fail git grep -f f $flags a |
| 27 | + test_must_fail env LC_ALL=\"$lc_all\" git grep $extra_flags -f f $flags a 2>stderr && |
| 28 | + test_i18ngrep ! 'This is only supported with -P under PCRE v2' stderr |
24 | 29 | "
|
25 |
| - elif test "$matches" = T1 |
| 30 | + elif test "$matches" = P |
26 | 31 | then
|
27 |
| - test_expect_failure "git grep -f f $flags '$pattern_human' a" " |
| 32 | + test_expect_success $prereqs "error, PCRE v2 only: LC_ALL='$lc_all' git grep -f f $flags '$pattern_human' a" " |
| 33 | + >stderr && |
28 | 34 | printf '$pattern' | q_to_nul >f &&
|
29 |
| - git grep -f f $flags a |
30 |
| - " |
31 |
| - elif test "$matches" = T0 |
32 |
| - then |
33 |
| - test_expect_failure "git grep -f f $flags '$pattern_human' a" " |
34 |
| - printf '$pattern' | q_to_nul >f && |
35 |
| - test_must_fail git grep -f f $flags a |
| 35 | + test_must_fail env LC_ALL=\"$lc_all\" git grep -f f $flags a 2>stderr && |
| 36 | + test_i18ngrep 'This is only supported with -P under PCRE v2' stderr |
36 | 37 | "
|
37 | 38 | else
|
38 | 39 | test_expect_success "PANIC: Test framework error. Unknown matches value $matches" 'false'
|
39 | 40 | fi
|
40 | 41 | }
|
41 | 42 |
|
| 43 | +nul_match () { |
| 44 | + matches=$1 |
| 45 | + matches_pcre2=$2 |
| 46 | + matches_pcre2_locale=$3 |
| 47 | + flags=$4 |
| 48 | + pattern=$5 |
| 49 | + pattern_human=$(echo "$pattern" | sed 's/Q/<NUL>/g') |
| 50 | + |
| 51 | + nul_match_internal "$matches" "" "C" "" "$flags" "$pattern" |
| 52 | + nul_match_internal "$matches_pcre2" "LIBPCRE2" "C" "-P" "$flags" "$pattern" |
| 53 | + nul_match_internal "$matches_pcre2_locale" "LIBPCRE2,GETTEXT_LOCALE" "$is_IS_locale" "-P" "$flags" "$pattern" |
| 54 | +} |
| 55 | + |
42 | 56 | test_expect_success 'setup' "
|
43 | 57 | echo 'binaryQfileQm[*]cQ*æQð' | q_to_nul >a &&
|
44 | 58 | git add a &&
|
45 | 59 | git commit -m.
|
46 | 60 | "
|
47 | 61 |
|
48 |
| -nul_match 1 '-F' 'yQf' |
49 |
| -nul_match 0 '-F' 'yQx' |
50 |
| -nul_match 1 '-Fi' 'YQf' |
51 |
| -nul_match 0 '-Fi' 'YQx' |
52 |
| -nul_match 1 '' 'yQf' |
53 |
| -nul_match 0 '' 'yQx' |
54 |
| -nul_match 1 '' 'æQð' |
55 |
| -nul_match 1 '-F' 'eQm[*]c' |
56 |
| -nul_match 1 '-Fi' 'EQM[*]C' |
| 62 | +# Simple fixed-string matching that can use kwset (no -i && non-ASCII) |
| 63 | +nul_match 1 1 1 '-F' 'yQf' |
| 64 | +nul_match 0 0 0 '-F' 'yQx' |
| 65 | +nul_match 1 1 1 '-Fi' 'YQf' |
| 66 | +nul_match 0 0 0 '-Fi' 'YQx' |
| 67 | +nul_match 1 1 1 '' 'yQf' |
| 68 | +nul_match 0 0 0 '' 'yQx' |
| 69 | +nul_match 1 1 1 '' 'æQð' |
| 70 | +nul_match 1 1 1 '-F' 'eQm[*]c' |
| 71 | +nul_match 1 1 1 '-Fi' 'EQM[*]C' |
57 | 72 |
|
58 | 73 | # Regex patterns that would match but shouldn't with -F
|
59 |
| -nul_match 0 '-F' 'yQ[f]' |
60 |
| -nul_match 0 '-F' '[y]Qf' |
61 |
| -nul_match 0 '-Fi' 'YQ[F]' |
62 |
| -nul_match 0 '-Fi' '[Y]QF' |
63 |
| -nul_match 0 '-F' 'æQ[ð]' |
64 |
| -nul_match 0 '-F' '[æ]Qð' |
65 |
| -nul_match 0 '-Fi' 'ÆQ[Ð]' |
66 |
| -nul_match 0 '-Fi' '[Æ]QÐ' |
| 74 | +nul_match 0 0 0 '-F' 'yQ[f]' |
| 75 | +nul_match 0 0 0 '-F' '[y]Qf' |
| 76 | +nul_match 0 0 0 '-Fi' 'YQ[F]' |
| 77 | +nul_match 0 0 0 '-Fi' '[Y]QF' |
| 78 | +nul_match 0 0 0 '-F' 'æQ[ð]' |
| 79 | +nul_match 0 0 0 '-F' '[æ]Qð' |
67 | 80 |
|
68 |
| -# kwset is disabled on -i & non-ASCII. No way to match non-ASCII \0 |
69 |
| -# patterns case-insensitively. |
70 |
| -nul_match T1 '-i' 'ÆQÐ' |
| 81 | +# The -F kwset codepath can't handle -i && non-ASCII... |
| 82 | +nul_match P 1 1 '-i' '[æ]Qð' |
71 | 83 |
|
72 |
| -# \0 implicitly disables regexes. This is an undocumented internal |
73 |
| -# limitation. |
74 |
| -nul_match T1 '' 'yQ[f]' |
75 |
| -nul_match T1 '' '[y]Qf' |
76 |
| -nul_match T1 '-i' 'YQ[F]' |
77 |
| -nul_match T1 '-i' '[Y]Qf' |
78 |
| -nul_match T1 '' 'æQ[ð]' |
79 |
| -nul_match T1 '' '[æ]Qð' |
80 |
| -nul_match T1 '-i' 'ÆQ[Ð]' |
| 84 | +# ...PCRE v2 only matches non-ASCII with -i casefolding under UTF-8 |
| 85 | +# semantics |
| 86 | +nul_match P P P '-Fi' 'ÆQ[Ð]' |
| 87 | +nul_match P 0 1 '-i' 'ÆQ[Ð]' |
| 88 | +nul_match P 0 1 '-i' '[Æ]QÐ' |
| 89 | +nul_match P 0 1 '-i' '[Æ]Qð' |
| 90 | +nul_match P 0 1 '-i' 'ÆQÐ' |
81 | 91 |
|
82 |
| -# ... because of \0 implicitly disabling regexes regexes that |
83 |
| -# should/shouldn't match don't do the right thing. |
84 |
| -nul_match T1 '' 'eQm.*cQ' |
85 |
| -nul_match T1 '-i' 'EQM.*cQ' |
86 |
| -nul_match T0 '' 'eQm[*]c' |
87 |
| -nul_match T0 '-i' 'EQM[*]C' |
| 92 | +# \0 in regexes can only work with -P & PCRE v2 |
| 93 | +nul_match P 1 1 '' 'yQ[f]' |
| 94 | +nul_match P 1 1 '' '[y]Qf' |
| 95 | +nul_match P 1 1 '-i' 'YQ[F]' |
| 96 | +nul_match P 1 1 '-i' '[Y]Qf' |
| 97 | +nul_match P 1 1 '' 'æQ[ð]' |
| 98 | +nul_match P 1 1 '' '[æ]Qð' |
| 99 | +nul_match P 0 1 '-i' 'ÆQ[Ð]' |
| 100 | +nul_match P 1 1 '' 'eQm.*cQ' |
| 101 | +nul_match P 1 1 '-i' 'EQM.*cQ' |
| 102 | +nul_match P 0 0 '' 'eQm[*]c' |
| 103 | +nul_match P 0 0 '-i' 'EQM[*]C' |
88 | 104 |
|
89 |
| -# Due to the REG_STARTEND extension when kwset() is disabled on -i & |
90 |
| -# non-ASCII the string will be matched in its entirety, but the |
91 |
| -# pattern will be cut off at the first \0. |
92 |
| -nul_match 0 '-i' 'NOMATCHQð' |
93 |
| -nul_match T0 '-i' '[Æ]QNOMATCH' |
94 |
| -nul_match T0 '-i' '[æ]QNOMATCH' |
95 |
| -# Matches, but for the wrong reasons, just stops at [æ] |
96 |
| -nul_match 1 '-i' '[Æ]Qð' |
97 |
| -nul_match 1 '-i' '[æ]Qð' |
| 105 | +# Assert that we're using REG_STARTEND and the pattern doesn't match |
| 106 | +# just because it's cut off at the first \0. |
| 107 | +nul_match 0 0 0 '-i' 'NOMATCHQð' |
| 108 | +nul_match P 0 0 '-i' '[Æ]QNOMATCH' |
| 109 | +nul_match P 0 0 '-i' '[æ]QNOMATCH' |
98 | 110 |
|
99 | 111 | # Ensure that the matcher doesn't regress to something that stops at
|
100 | 112 | # \0
|
101 |
| -nul_match 0 '-F' 'yQ[f]' |
102 |
| -nul_match 0 '-Fi' 'YQ[F]' |
103 |
| -nul_match 0 '' 'yQNOMATCH' |
104 |
| -nul_match 0 '' 'QNOMATCH' |
105 |
| -nul_match 0 '-i' 'YQNOMATCH' |
106 |
| -nul_match 0 '-i' 'QNOMATCH' |
107 |
| -nul_match 0 '-F' 'æQ[ð]' |
108 |
| -nul_match 0 '-Fi' 'ÆQ[Ð]' |
109 |
| -nul_match 0 '' 'yQNÓMATCH' |
110 |
| -nul_match 0 '' 'QNÓMATCH' |
111 |
| -nul_match 0 '-i' 'YQNÓMATCH' |
112 |
| -nul_match 0 '-i' 'QNÓMATCH' |
| 113 | +nul_match 0 0 0 '-F' 'yQ[f]' |
| 114 | +nul_match 0 0 0 '-Fi' 'YQ[F]' |
| 115 | +nul_match 0 0 0 '' 'yQNOMATCH' |
| 116 | +nul_match 0 0 0 '' 'QNOMATCH' |
| 117 | +nul_match 0 0 0 '-i' 'YQNOMATCH' |
| 118 | +nul_match 0 0 0 '-i' 'QNOMATCH' |
| 119 | +nul_match 0 0 0 '-F' 'æQ[ð]' |
| 120 | +nul_match P P P '-Fi' 'ÆQ[Ð]' |
| 121 | +nul_match P 0 1 '-i' 'ÆQ[Ð]' |
| 122 | +nul_match 0 0 0 '' 'yQNÓMATCH' |
| 123 | +nul_match 0 0 0 '' 'QNÓMATCH' |
| 124 | +nul_match 0 0 0 '-i' 'YQNÓMATCH' |
| 125 | +nul_match 0 0 0 '-i' 'QNÓMATCH' |
113 | 126 |
|
114 | 127 | test_done
|
0 commit comments