Skip to content

Fix #77726: Allow null character in regex patterns #8114

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 18 additions & 18 deletions ext/pcre/php_pcre.c
Original file line number Diff line number Diff line change
Expand Up @@ -615,7 +615,7 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in
char delimiter;
char start_delimiter;
char end_delimiter;
char *p, *pp;
char *p, *pp, *end_p;
char *pattern;
size_t pattern_len;
uint32_t poptions = 0;
Expand All @@ -624,7 +624,7 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in
pcre_cache_entry new_entry;
int rc;
zend_string *key;
pcre_cache_entry *ret;
pcre_cache_entry *ret;

if (locale_aware && BG(ctype_string)) {
key = zend_string_concat2(
Expand All @@ -645,28 +645,30 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in
}

p = ZSTR_VAL(regex);
end_p = ZSTR_VAL(regex) + ZSTR_LEN(regex);

/* Parse through the leading whitespace, and display a warning if we
get to the end without encountering a delimiter. */
while (isspace((int)*(unsigned char *)p)) p++;
if (*p == 0) {
if (p >= end_p) {
if (key != regex) {
zend_string_release_ex(key, 0);
}
php_error_docref(NULL, E_WARNING,
p < ZSTR_VAL(regex) + ZSTR_LEN(regex) ? "Null byte in regex" : "Empty regular expression");
php_error_docref(NULL, E_WARNING, "Empty regular expression");
pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
return NULL;
}

/* Get the delimiter and display a warning if it is alphanumeric
or a backslash. */
delimiter = *p++;
if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\' || delimiter == '\0') {
if (key != regex) {
zend_string_release_ex(key, 0);
}
php_error_docref(NULL,E_WARNING, "Delimiter must not be alphanumeric or backslash");
php_error_docref(NULL, E_WARNING, delimiter == '\0' ?
"Delimiter must not be a null character" :
"Delimiter must not be alphanumeric or backslash");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems to me that there is no real need for different error messages:

Suggested change
php_error_docref(NULL, E_WARNING, delimiter == '\0' ?
"Delimiter must not be a null character" :
"Delimiter must not be alphanumeric or backslash");
php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric, backslash or null character");

(oh, and do we have precedence for "null character" – I prefer "NUL character", or just "NUL")

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have been thinking whether it might be better to simply treat NUL as whitespace here. That would avoid the need for changing or adding error messages, and instead the only change would be the removal of the messages related to the issue. Other functions, such as trim, treat it as whitespace: https://www.php.net/manual/en/function.trim.php.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, it might be better security wise to not treat NUL bytes as whitespace here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alright. I have updated the warning messages.

pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
return NULL;
}
Expand All @@ -682,8 +684,8 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in
/* We need to iterate through the pattern, searching for the ending delimiter,
but skipping the backslashed delimiters. If the ending delimiter is not
found, display a warning. */
while (*pp != 0) {
if (*pp == '\\' && pp[1] != 0) pp++;
while (pp < end_p) {
if (*pp == '\\' && pp + 1 < end_p) pp++;
else if (*pp == delimiter)
break;
pp++;
Expand All @@ -695,8 +697,8 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in
* reach the end of the pattern without matching, display a warning.
*/
int brackets = 1; /* brackets nesting level */
while (*pp != 0) {
if (*pp == '\\' && pp[1] != 0) pp++;
while (pp < end_p) {
if (*pp == '\\' && pp + 1 < end_p) pp++;
else if (*pp == end_delimiter && --brackets <= 0)
break;
else if (*pp == start_delimiter)
Expand All @@ -705,13 +707,11 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in
}
}

if (*pp == 0) {
if (pp == end_p) {
if (key != regex) {
zend_string_release_ex(key, 0);
}
if (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
php_error_docref(NULL,E_WARNING, "Null byte in regex");
} else if (start_delimiter == end_delimiter) {
if (start_delimiter == end_delimiter) {
php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
} else {
php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
Expand All @@ -729,7 +729,7 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in

/* Parse through the options, setting appropriate flags. Display
a warning if we encounter an unknown modifier. */
while (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
while (pp < end_p) {
switch (*pp++) {
/* Perl compatible options */
case 'i': coptions |= PCRE2_CASELESS; break;
Expand Down Expand Up @@ -763,9 +763,9 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in

default:
if (pp[-1]) {
php_error_docref(NULL,E_WARNING, "Unknown modifier '%c'", pp[-1]);
php_error_docref(NULL, E_WARNING, "Unknown modifier '%c'", pp[-1]);
} else {
php_error_docref(NULL,E_WARNING, "Null byte in regex");
php_error_docref(NULL, E_WARNING, "Null character is not a valid modifier");
}
pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
efree(pattern);
Expand Down
4 changes: 4 additions & 0 deletions ext/pcre/tests/delimiters.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ var_dump(preg_match('~a', ''));
var_dump(preg_match('@\@\@@', '@@'));
var_dump(preg_match('//z', '@@'));
var_dump(preg_match('{', ''));
var_dump(preg_match("\0\0", ''));

?>
--EXPECTF--
Expand All @@ -35,3 +36,6 @@ bool(false)

Warning: preg_match(): No ending matching delimiter '}' found in %sdelimiters.php on line 11
bool(false)

Warning: preg_match(): Delimiter must not be a null character in %sdelimiters.php on line 12
bool(false)
76 changes: 50 additions & 26 deletions ext/pcre/tests/null_bytes.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -3,40 +3,64 @@ Zero byte test
--FILE--
<?php

preg_match("\0//i", "");
preg_match("/\0/i", "");
preg_match("//\0i", "");
preg_match("//i\0", "");
preg_match("/\\\0/i", "");

preg_match("\0[]i", "");
preg_match("[\0]i", "");
preg_match("[]\0i", "");
preg_match("[]i\0", "");
preg_match("[\\\0]i", "");
var_dump(preg_match("\0//i", ""));
var_dump(preg_match("/\0/i", ""));
var_dump(preg_match("/\0/i", "\0"));
var_dump(preg_match("//\0i", ""));
var_dump(preg_match("//i\0", ""));
var_dump(preg_match("/\\\0/i", ""));
var_dump(preg_match("/\\\0/i", "\\\0"));

preg_replace("/foo/e\0/i", "echo('Eek');", "");

?>
--EXPECTF--
Warning: preg_match(): Null byte in regex in %snull_bytes.php on line 3
var_dump(preg_match("\0[]i", ""));
var_dump(preg_match("[\0]i", ""));
var_dump(preg_match("[\0]i", "\0"));
var_dump(preg_match("[]\0i", ""));
var_dump(preg_match("[]i\0", ""));
var_dump(preg_match("[\\\0]i", ""));
var_dump(preg_match("[\\\0]i", "\\\0"));

Warning: preg_match(): Null byte in regex in %snull_bytes.php on line 4
var_dump(preg_match("/abc\0def/", "abc"));
var_dump(preg_match("/abc\0def/", "abc\0def"));
var_dump(preg_match("/abc\0def/", "abc\0fed"));

Warning: preg_match(): Null byte in regex in %snull_bytes.php on line 5
var_dump(preg_match("[abc\0def]", "abc"));
var_dump(preg_match("[abc\0def]", "abc\0def"));
var_dump(preg_match("[abc\0def]", "abc\0fed"));

Warning: preg_match(): Null byte in regex in %snull_bytes.php on line 6
preg_replace("/foo/e\0/i", "echo('Eek');", "");

Warning: preg_match(): Null byte in regex in %snull_bytes.php on line 7
?>
--EXPECTF--
Warning: preg_match(): Delimiter must not be a null character in %snull_bytes.php on line 3
bool(false)
int(0)
int(1)

Warning: preg_match(): Null byte in regex in %snull_bytes.php on line 9
Warning: preg_match(): Null character is not a valid modifier in %snull_bytes.php on line 6
bool(false)

Warning: preg_match(): Null byte in regex in %snull_bytes.php on line 10
Warning: preg_match(): Null character is not a valid modifier in %snull_bytes.php on line 7
bool(false)
int(0)
int(1)

Warning: preg_match(): Null byte in regex in %snull_bytes.php on line 11
Warning: preg_match(): Delimiter must not be a null character in %snull_bytes.php on line 11
bool(false)
int(0)
int(1)

Warning: preg_match(): Null byte in regex in %snull_bytes.php on line 12
Warning: preg_match(): Null character is not a valid modifier in %snull_bytes.php on line 14
bool(false)

Warning: preg_match(): Null byte in regex in %snull_bytes.php on line 13
Warning: preg_match(): Null character is not a valid modifier in %snull_bytes.php on line 15
bool(false)
int(0)
int(1)
int(0)
int(1)
int(0)
int(0)
int(1)
int(0)

Warning: preg_replace(): Null byte in regex in %snull_bytes.php on line 15
Warning: preg_replace(): Null character is not a valid modifier in %snull_bytes.php on line 27