Skip to content

Commit e1561c4

Browse files
committed
Merge branch 'PHP-5.5' into PHP-5.6
* PHP-5.5: updated NEWS Fixed Bug #53823 (preg_replace: * qualifier on unicode replace garbles the string)
2 parents 868b932 + 1cbcbcb commit e1561c4

File tree

3 files changed

+87
-4
lines changed

3 files changed

+87
-4
lines changed

ext/pcre/php_pcre.c

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,25 @@ static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce TSRMLS_D
225225
}
226226
/* }}} */
227227

228+
/* {{{ static calculate_unit_length */
229+
/* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE_UTF8. */
230+
static zend_always_inline int calculate_unit_length(pcre_cache_entry *pce, char *start)
231+
{
232+
int unit_len;
233+
234+
if (pce->compile_options & PCRE_UTF8) {
235+
char *end = start;
236+
237+
/* skip continuation bytes */
238+
while ((*++end & 0xC0) == 0x80);
239+
unit_len = end - start;
240+
} else {
241+
unit_len = 1;
242+
}
243+
return unit_len;
244+
}
245+
/* }}} */
246+
228247
/* {{{ pcre_get_compiled_regex_cache
229248
*/
230249
PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_len TSRMLS_DC)
@@ -780,8 +799,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
780799
the start offset, and continue. Fudge the offset values
781800
to achieve this, unless we're already at the end of the string. */
782801
if (g_notempty != 0 && start_offset < subject_len) {
802+
int unit_len = calculate_unit_length(pce, subject + start_offset);
803+
783804
offsets[0] = start_offset;
784-
offsets[1] = start_offset + 1;
805+
offsets[1] = start_offset + unit_len;
785806
} else
786807
break;
787808
} else {
@@ -1240,10 +1261,12 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub
12401261
the start offset, and continue. Fudge the offset values
12411262
to achieve this, unless we're already at the end of the string. */
12421263
if (g_notempty != 0 && start_offset < subject_len) {
1264+
int unit_len = calculate_unit_length(pce, piece);
1265+
12431266
offsets[0] = start_offset;
1244-
offsets[1] = start_offset + 1;
1245-
memcpy(&result[*result_len], piece, 1);
1246-
(*result_len)++;
1267+
offsets[1] = start_offset + unit_len;
1268+
memcpy(&result[*result_len], piece, unit_len);
1269+
*result_len += unit_len;
12471270
} else {
12481271
new_len = *result_len + subject_len - start_offset;
12491272
if (new_len + 1 > alloc_len) {

ext/pcre/tests/bug53823.phpt

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
--TEST--
2+
Bug #53823 - preg_replace: * qualifier on unicode replace garbles the string
3+
--FILE--
4+
<?php
5+
var_dump(preg_replace('/[^\pL\pM]*/iu', '', 'áéíóú'));
6+
// invalid UTF-8
7+
var_dump(preg_replace('/[^\pL\pM]*/iu', '', "\xFCáéíóú"));
8+
var_dump(preg_replace('/[^\pL\pM]*/iu', '', "áéíóú\xFC"));
9+
?>
10+
--EXPECT--
11+
string(10) "áéíóú"
12+
NULL
13+
NULL

ext/pcre/tests/bug66121.phpt

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
--TEST--
2+
Bug #66121 - UTF-8 lookbehinds match bytes instead of characters
3+
--FILE--
4+
<?php
5+
// Sinhala characters
6+
var_dump(preg_replace('/(?<!ක)/u', '*', ''));
7+
var_dump(preg_replace('/(?<!ක)/u', '*', ''));
8+
// English characters
9+
var_dump(preg_replace('/(?<!k)/u', '*', 'k'));
10+
var_dump(preg_replace('/(?<!k)/u', '*', 'm'));
11+
// Sinhala characters
12+
preg_match_all('/(?<!ක)/u', '', $matches, PREG_OFFSET_CAPTURE);
13+
var_dump($matches);
14+
// invalid UTF-8
15+
var_dump(preg_replace('/(?<!ක)/u', '*', "\xFC"));
16+
var_dump(preg_replace('/(?<!ක)/u', '*', "\xFC"));
17+
var_dump(preg_match_all('/(?<!ක)/u', "\xFC", $matches, PREG_OFFSET_CAPTURE));
18+
var_dump(preg_match_all('/(?<!ක)/u', "\xFC", $matches, PREG_OFFSET_CAPTURE));
19+
?>
20+
--EXPECT--
21+
string(4) "*ක"
22+
string(5) "*ම*"
23+
string(2) "*k"
24+
string(3) "*m*"
25+
array(1) {
26+
[0]=>
27+
array(2) {
28+
[0]=>
29+
array(2) {
30+
[0]=>
31+
string(0) ""
32+
[1]=>
33+
int(0)
34+
}
35+
[1]=>
36+
array(2) {
37+
[0]=>
38+
string(0) ""
39+
[1]=>
40+
int(3)
41+
}
42+
}
43+
}
44+
NULL
45+
NULL
46+
bool(false)
47+
bool(false)

0 commit comments

Comments
 (0)