Skip to content

Commit b7d36ff

Browse files
dschogitster
authored andcommitted
regex: use regexec_buf()
The new regexec_buf() function operates on buffers with an explicitly specified length, rather than NUL-terminated strings. We need to use this function whenever the buffer we want to pass to regexec(3) may have been mmap(2)ed (and is hence not NUL-terminated). Note: the original motivation for this patch was to fix a bug where `git diff -G <regex>` would crash. This patch converts more callers, though, some of which allocated to construct NUL-terminated strings, or worse, modified buffers to temporarily insert NULs while calling regexec(3). By converting them to use regexec_buf(), the code has become much cleaner. Signed-off-by: Johannes Schindelin <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 2f89522 commit b7d36ff

File tree

5 files changed

+17
-33
lines changed

5 files changed

+17
-33
lines changed

diff.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -941,7 +941,8 @@ static int find_word_boundaries(mmfile_t *buffer, regex_t *word_regex,
941941
{
942942
if (word_regex && *begin < buffer->size) {
943943
regmatch_t match[1];
944-
if (!regexec(word_regex, buffer->ptr + *begin, 1, match, 0)) {
944+
if (!regexec_buf(word_regex, buffer->ptr + *begin,
945+
buffer->size - *begin, 1, match, 0)) {
945946
char *p = memchr(buffer->ptr + *begin + match[0].rm_so,
946947
'\n', match[0].rm_eo - match[0].rm_so);
947948
*end = p ? p - buffer->ptr : match[0].rm_eo + *begin;

diffcore-pickaxe.c

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ static void diffgrep_consume(void *priv, char *line, unsigned long len)
2121
{
2222
struct diffgrep_cb *data = priv;
2323
regmatch_t regmatch;
24-
int hold;
2524

2625
if (line[0] != '+' && line[0] != '-')
2726
return;
@@ -31,11 +30,8 @@ static void diffgrep_consume(void *priv, char *line, unsigned long len)
3130
* caller early.
3231
*/
3332
return;
34-
/* Yuck -- line ought to be "const char *"! */
35-
hold = line[len];
36-
line[len] = '\0';
37-
data->hit = !regexec(data->regexp, line + 1, 1, &regmatch, 0);
38-
line[len] = hold;
33+
data->hit = !regexec_buf(data->regexp, line + 1, len - 1, 1,
34+
&regmatch, 0);
3935
}
4036

4137
static int diff_grep(mmfile_t *one, mmfile_t *two,
@@ -48,9 +44,11 @@ static int diff_grep(mmfile_t *one, mmfile_t *two,
4844
xdemitconf_t xecfg;
4945

5046
if (!one)
51-
return !regexec(regexp, two->ptr, 1, &regmatch, 0);
47+
return !regexec_buf(regexp, two->ptr, two->size,
48+
1, &regmatch, 0);
5249
if (!two)
53-
return !regexec(regexp, one->ptr, 1, &regmatch, 0);
50+
return !regexec_buf(regexp, one->ptr, one->size,
51+
1, &regmatch, 0);
5452

5553
/*
5654
* We have both sides; need to run textual diff and see if
@@ -81,8 +79,8 @@ static unsigned int contains(mmfile_t *mf, regex_t *regexp, kwset_t kws)
8179
regmatch_t regmatch;
8280
int flags = 0;
8381

84-
assert(data[sz] == '\0');
85-
while (*data && !regexec(regexp, data, 1, &regmatch, flags)) {
82+
while (*data &&
83+
!regexec_buf(regexp, data, sz, 1, &regmatch, flags)) {
8684
flags |= REG_NOTBOL;
8785
data += regmatch.rm_eo;
8886
if (*data && regmatch.rm_so == regmatch.rm_eo)

grep.c

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -848,17 +848,6 @@ static int fixmatch(struct grep_pat *p, char *line, char *eol,
848848
}
849849
}
850850

851-
static int regmatch(const regex_t *preg, char *line, char *eol,
852-
regmatch_t *match, int eflags)
853-
{
854-
#ifdef REG_STARTEND
855-
match->rm_so = 0;
856-
match->rm_eo = eol - line;
857-
eflags |= REG_STARTEND;
858-
#endif
859-
return regexec(preg, line, 1, match, eflags);
860-
}
861-
862851
static int patmatch(struct grep_pat *p, char *line, char *eol,
863852
regmatch_t *match, int eflags)
864853
{
@@ -869,7 +858,8 @@ static int patmatch(struct grep_pat *p, char *line, char *eol,
869858
else if (p->pcre_regexp)
870859
hit = !pcrematch(p, line, eol, match, eflags);
871860
else
872-
hit = !regmatch(&p->regexp, line, eol, match, eflags);
861+
hit = !regexec_buf(&p->regexp, line, eol - line, 1, match,
862+
eflags);
873863

874864
return hit;
875865
}

t/t4062-diff-pickaxe.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ test_expect_success setup '
1414
test_tick &&
1515
git commit -m "A 4k file"
1616
'
17-
test_expect_failure '-G matches' '
17+
test_expect_success '-G matches' '
1818
git diff --name-only -G "^0{4096}$" HEAD^ >out &&
1919
test 4096-zeroes.txt = "$(cat out)"
2020
'

xdiff-interface.c

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -216,11 +216,10 @@ struct ff_regs {
216216
static long ff_regexp(const char *line, long len,
217217
char *buffer, long buffer_size, void *priv)
218218
{
219-
char *line_buffer;
220219
struct ff_regs *regs = priv;
221220
regmatch_t pmatch[2];
222221
int i;
223-
int result = -1;
222+
int result;
224223

225224
/* Exclude terminating newline (and cr) from matching */
226225
if (len > 0 && line[len-1] == '\n') {
@@ -230,18 +229,16 @@ static long ff_regexp(const char *line, long len,
230229
len--;
231230
}
232231

233-
line_buffer = xstrndup(line, len); /* make NUL terminated */
234-
235232
for (i = 0; i < regs->nr; i++) {
236233
struct ff_reg *reg = regs->array + i;
237-
if (!regexec(&reg->re, line_buffer, 2, pmatch, 0)) {
234+
if (!regexec_buf(&reg->re, line, len, 2, pmatch, 0)) {
238235
if (reg->negate)
239-
goto fail;
236+
return -1;
240237
break;
241238
}
242239
}
243240
if (regs->nr <= i)
244-
goto fail;
241+
return -1;
245242
i = pmatch[1].rm_so >= 0 ? 1 : 0;
246243
line += pmatch[i].rm_so;
247244
result = pmatch[i].rm_eo - pmatch[i].rm_so;
@@ -250,8 +247,6 @@ static long ff_regexp(const char *line, long len,
250247
while (result > 0 && (isspace(line[result - 1])))
251248
result--;
252249
memcpy(buffer, line, result);
253-
fail:
254-
free(line_buffer);
255250
return result;
256251
}
257252

0 commit comments

Comments
 (0)