Skip to content

Commit 67051eb

Browse files
committed
Fix segfault caused by use of 'pass' encoding when mbstring converts multipart form POST data
When mbstring.encoding_translation=1, and PHP receives an (RFC1867) form-based file upload, and the Content-Disposition HTTP header contains a filename for the uploaded file, PHP will internally invoke mbstring code to 1) try to auto-detect the text encoding of the filename, and if that succeeds, 2) convert the filename to internal text encoding. In such cases, the candidate text encodings which are considered during "auto-detection" are those listed in the INI parameter mbstring.http_input. Further, mbstring.http_input is one of the few contexts where mbstring allows the magic string "pass" to appear in place of an actual text encoding name. Before mbstring's encoding auto-detection function was reimplemented, the old implementation would never return "pass", even if "pass" was the only candidate it was given to choose from. It is not clear if this was intended by the original developers or not. This behavior was the result of some rather subtle details of the implementation. After mbstring's auto-detection function was reimplemented, if the new implementation was given only one candidate to choose, and it was not running in 'strict' mode, it would always return that candidate, even if the candidate was the non-encoding "pass". The upshot of all of this: Previously, if mbstring.encoding_translation=1 and mbstring.http_input=pass, encoding conversion of RFC1867 filenames would never be attempted. But after the reimplementation, encoding 'conversion' would occur (uselessly). Further, in December 2022, I reimplemented the relevant bit of encoding conversion code. When doing this, I never bothered to implement encoding/decoding routines for the non-encoding "pass", because I thought that they would never be used. Well, in the one case described above, those routines *would* have been used, had they actually existed. Because they didn't exist, we get a nice NULL pointer dereference and ensuing segfault instead. Instead of 'fixing' this by adding encoding/decoding routines for the non-encoding "pass", I have modified the function which the RFC1867 form-handling code invokes to auto-detect input encoding. This function will never return "pass" now, just like the previous implementation. Thanks to the GitHub user 'tstangner' for reporting this bug.
1 parent ea8d143 commit 67051eb

File tree

2 files changed

+43
-13
lines changed

2 files changed

+43
-13
lines changed

ext/mbstring/mbstring.c

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -291,8 +291,7 @@ static size_t count_commas(const char *p, const char *end) {
291291
* Emits a ValueError in function context and a warning in INI context, in INI context arg_num must be 0.
292292
*/
293293
static zend_result php_mb_parse_encoding_list(const char *value, size_t value_length,
294-
const mbfl_encoding ***return_list, size_t *return_size, bool persistent, uint32_t arg_num,
295-
bool allow_pass_encoding)
294+
const mbfl_encoding ***return_list, size_t *return_size, bool persistent, uint32_t arg_num)
296295
{
297296
if (value == NULL || value_length == 0) {
298297
*return_list = NULL;
@@ -345,8 +344,7 @@ static zend_result php_mb_parse_encoding_list(const char *value, size_t value_le
345344
}
346345
}
347346
} else {
348-
const mbfl_encoding *encoding =
349-
allow_pass_encoding ? php_mb_get_encoding_or_pass(p1) : mbfl_name2encoding(p1);
347+
const mbfl_encoding *encoding = mbfl_name2encoding(p1);
350348
if (!encoding) {
351349
/* Called from an INI setting modification */
352350
if (arg_num == 0) {
@@ -452,8 +450,12 @@ static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *a
452450
list = (const zend_encoding**)MBSTRG(current_detect_order_list);
453451
list_size = MBSTRG(current_detect_order_list_size);
454452
}
455-
456-
return (const zend_encoding*)mb_guess_encoding((unsigned char*)arg_string, arg_length, (const mbfl_encoding **)list, list_size, false, false);
453+
if (list_size == 1 && ((mbfl_encoding*)*list) == &mbfl_encoding_pass) {
454+
/* Emulate behavior of previous implementation; it would never return "pass"
455+
* from an encoding auto-detection operation */
456+
return NULL;
457+
}
458+
return (const zend_encoding*)mb_guess_encoding((unsigned char*)arg_string, arg_length, (const mbfl_encoding**)list, list_size, false, false);
457459
}
458460

459461
static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from)
@@ -474,7 +476,7 @@ static zend_result php_mb_zend_encoding_list_parser(const char *encoding_list, s
474476
return php_mb_parse_encoding_list(
475477
encoding_list, encoding_list_len,
476478
(const mbfl_encoding ***)return_list, return_size,
477-
persistent, /* arg_num */ 0, /* allow_pass_encoding */ 0);
479+
persistent, /* arg_num */ 0);
478480
}
479481

480482
static const zend_encoding *php_mb_zend_internal_encoding_getter(void)
@@ -712,7 +714,7 @@ static PHP_INI_MH(OnUpdate_mbstring_detect_order)
712714
return SUCCESS;
713715
}
714716

715-
if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(new_value), ZSTR_LEN(new_value), &list, &size, /* persistent */ 1, /* arg_num */ 0, /* allow_pass_encoding */ 0) || size == 0) {
717+
if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(new_value), ZSTR_LEN(new_value), &list, &size, /* persistent */ 1, /* arg_num */ 0) || size == 0) {
716718
return FAILURE;
717719
}
718720

@@ -728,7 +730,11 @@ static PHP_INI_MH(OnUpdate_mbstring_detect_order)
728730
static int _php_mb_ini_mbstring_http_input_set(const char *new_value, size_t new_value_length) {
729731
const mbfl_encoding **list;
730732
size_t size;
731-
if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, /* persistent */ 1, /* arg_num */ 0, /* allow_pass_encoding */ 1) || size == 0) {
733+
if (new_value_length == 4 && strncmp(new_value, "pass", 4) == 0) {
734+
list = (const mbfl_encoding**)pecalloc(1, sizeof(mbfl_encoding*), 1);
735+
*list = &mbfl_encoding_pass;
736+
size = 1;
737+
} else if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, /* persistent */ 1, /* arg_num */ 0) || size == 0) {
732738
return FAILURE;
733739
}
734740
if (MBSTRG(http_input_list)) {
@@ -1383,7 +1389,7 @@ PHP_FUNCTION(mb_detect_order)
13831389
RETURN_THROWS();
13841390
}
13851391
} else {
1386-
if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(order_str), ZSTR_LEN(order_str), &list, &size, /* persistent */ 0, /* arg_num */ 1, /* allow_pass_encoding */ 0)) {
1392+
if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(order_str), ZSTR_LEN(order_str), &list, &size, /* persistent */ 0, /* arg_num */ 1)) {
13871393
RETURN_THROWS();
13881394
}
13891395
}
@@ -2799,7 +2805,7 @@ PHP_FUNCTION(mb_convert_encoding)
27992805
} else if (from_encodings_str) {
28002806
if (php_mb_parse_encoding_list(ZSTR_VAL(from_encodings_str), ZSTR_LEN(from_encodings_str),
28012807
&from_encodings, &num_from_encodings,
2802-
/* persistent */ 0, /* arg_num */ 3, /* allow_pass_encoding */ 0) == FAILURE) {
2808+
/* persistent */ 0, /* arg_num */ 3) == FAILURE) {
28032809
RETURN_THROWS();
28042810
}
28052811
free_from_encodings = true;
@@ -3163,7 +3169,7 @@ PHP_FUNCTION(mb_detect_encoding)
31633169
RETURN_THROWS();
31643170
}
31653171
} else if (encoding_str) {
3166-
if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(encoding_str), ZSTR_LEN(encoding_str), &elist, &size, /* persistent */ 0, /* arg_num */ 2, /* allow_pass_encoding */ 0)) {
3172+
if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(encoding_str), ZSTR_LEN(encoding_str), &elist, &size, /* persistent */ 0, /* arg_num */ 2)) {
31673173
RETURN_THROWS();
31683174
}
31693175
} else {
@@ -3564,7 +3570,7 @@ PHP_FUNCTION(mb_convert_variables)
35643570
RETURN_THROWS();
35653571
}
35663572
} else {
3567-
if (php_mb_parse_encoding_list(ZSTR_VAL(from_enc_str), ZSTR_LEN(from_enc_str), &elist, &elistsz, /* persistent */ 0, /* arg_num */ 2, /* allow_pass_encoding */ 0) == FAILURE) {
3573+
if (php_mb_parse_encoding_list(ZSTR_VAL(from_enc_str), ZSTR_LEN(from_enc_str), &elist, &elistsz, /* persistent */ 0, /* arg_num */ 2) == FAILURE) {
35683574
RETURN_THROWS();
35693575
}
35703576
}

ext/mbstring/tests/gh13123.phpt

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
--TEST--
2+
Segfault in mb_fast_convert() when mbstring.encoding_translation is enabled (GH-13123)
3+
--EXTENSIONS--
4+
mbstring
5+
--POST_RAW--
6+
Content-Type: multipart/form-data, boundary=Blah
7+
8+
--Blah
9+
Content-Disposition: form-data; name="file"; filename="file.txt"
10+
Content-Type: text/plain
11+
12+
foo
13+
--Blah
14+
15+
--INI--
16+
error_reporting=E_ALL&~E_DEPRECATED
17+
mbstring.encoding_translation=On
18+
mbstring.http_input=pass
19+
--FILE--
20+
<?php
21+
print "Done!\n";
22+
?>
23+
--EXPECT--
24+
Done!

0 commit comments

Comments
 (0)