Skip to content

Commit c4b383d

Browse files
committed
Merge branch 'PHP-7.4'
2 parents 4916271 + 1cf84f1 commit c4b383d

File tree

4 files changed

+192
-73
lines changed

4 files changed

+192
-73
lines changed

ext/pcre/php_pcre.c

Lines changed: 96 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -968,13 +968,40 @@ static void init_unmatched_empty_pair() {
968968
zend_hash_next_index_insert_new(Z_ARRVAL_P(pair), &tmp);
969969
}
970970

971+
static zend_always_inline void populate_match_value_str(
972+
zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset) {
973+
if (start_offset == end_offset) {
974+
ZVAL_EMPTY_STRING(val);
975+
} else if (start_offset + 1 == end_offset) {
976+
ZVAL_INTERNED_STR(val, ZSTR_CHAR((unsigned char) subject[start_offset]));
977+
} else {
978+
ZVAL_STRINGL(val, subject + start_offset, end_offset - start_offset);
979+
}
980+
}
981+
982+
static inline void populate_match_value(
983+
zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
984+
uint32_t unmatched_as_null) {
985+
if (PCRE2_UNSET == start_offset) {
986+
if (unmatched_as_null) {
987+
ZVAL_NULL(val);
988+
} else {
989+
ZVAL_EMPTY_STRING(val);
990+
}
991+
} else {
992+
populate_match_value_str(val, subject, start_offset, end_offset);
993+
}
994+
}
995+
971996
/* {{{ add_offset_pair */
972-
static inline void add_offset_pair(zval *result, char *str, size_t len, PCRE2_SIZE offset, zend_string *name, uint32_t unmatched_as_null)
997+
static inline void add_offset_pair(
998+
zval *result, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
999+
zend_string *name, uint32_t unmatched_as_null)
9731000
{
9741001
zval match_pair, tmp;
9751002

9761003
/* Add (match, offset) to the return value */
977-
if (PCRE2_UNSET == offset) {
1004+
if (PCRE2_UNSET == start_offset) {
9781005
if (unmatched_as_null) {
9791006
if (Z_ISUNDEF(PCRE_G(unmatched_null_pair))) {
9801007
init_unmatched_null_pair();
@@ -988,15 +1015,9 @@ static inline void add_offset_pair(zval *result, char *str, size_t len, PCRE2_SI
9881015
}
9891016
} else {
9901017
array_init_size(&match_pair, 2);
991-
if (len == 0) {
992-
ZVAL_EMPTY_STRING(&tmp);
993-
} else if (len == 1) {
994-
ZVAL_INTERNED_STR(&tmp, ZSTR_CHAR((unsigned char) *str));
995-
} else {
996-
ZVAL_STRINGL(&tmp, str, len);
997-
}
1018+
populate_match_value_str(&tmp, subject, start_offset, end_offset);
9981019
zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
999-
ZVAL_LONG(&tmp, offset);
1020+
ZVAL_LONG(&tmp, start_offset);
10001021
zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
10011022
}
10021023

@@ -1008,37 +1029,24 @@ static inline void add_offset_pair(zval *result, char *str, size_t len, PCRE2_SI
10081029
}
10091030
/* }}} */
10101031

1011-
static inline void populate_match_value(
1012-
zval *val, char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
1013-
uint32_t unmatched_as_null) {
1014-
if (PCRE2_UNSET == start_offset) {
1015-
if (unmatched_as_null) {
1016-
ZVAL_NULL(val);
1017-
} else {
1018-
ZVAL_EMPTY_STRING(val);
1019-
}
1020-
} else if (start_offset == end_offset) {
1021-
ZVAL_EMPTY_STRING(val);
1022-
} else if (start_offset + 1 == end_offset) {
1023-
ZVAL_INTERNED_STR(val, ZSTR_CHAR((unsigned char) subject[start_offset]));
1024-
} else {
1025-
ZVAL_STRINGL(val, subject + start_offset, end_offset - start_offset);
1026-
}
1027-
}
1028-
10291032
static void populate_subpat_array(
10301033
zval *subpats, char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names,
1031-
int count, const PCRE2_SPTR mark, zend_long flags) {
1034+
uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags) {
10321035
zend_bool offset_capture = (flags & PREG_OFFSET_CAPTURE) != 0;
10331036
zend_bool unmatched_as_null = (flags & PREG_UNMATCHED_AS_NULL) != 0;
10341037
zval val;
10351038
int i;
10361039
if (subpat_names) {
10371040
if (offset_capture) {
10381041
for (i = 0; i < count; i++) {
1039-
add_offset_pair(subpats, subject + offsets[i<<1],
1040-
offsets[(i<<1)+1] - offsets[i<<1],
1041-
offsets[i<<1], subpat_names[i], unmatched_as_null);
1042+
add_offset_pair(
1043+
subpats, subject, offsets[2*i], offsets[2*i+1],
1044+
subpat_names[i], unmatched_as_null);
1045+
}
1046+
if (unmatched_as_null) {
1047+
for (i = count; i < num_subpats; i++) {
1048+
add_offset_pair(subpats, NULL, PCRE2_UNSET, PCRE2_UNSET, subpat_names[i], 1);
1049+
}
10421050
}
10431051
} else {
10441052
for (i = 0; i < count; i++) {
@@ -1050,20 +1058,38 @@ static void populate_subpat_array(
10501058
}
10511059
zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val);
10521060
}
1061+
if (unmatched_as_null) {
1062+
for (i = count; i < num_subpats; i++) {
1063+
ZVAL_NULL(&val);
1064+
if (subpat_names[i]) {
1065+
zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], &val);
1066+
}
1067+
zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val);
1068+
}
1069+
}
10531070
}
10541071
} else {
10551072
if (offset_capture) {
10561073
for (i = 0; i < count; i++) {
1057-
add_offset_pair(subpats, subject + offsets[i<<1],
1058-
offsets[(i<<1)+1] - offsets[i<<1],
1059-
offsets[i<<1], NULL, unmatched_as_null);
1074+
add_offset_pair(
1075+
subpats, subject, offsets[2*i], offsets[2*i+1], NULL, unmatched_as_null);
1076+
}
1077+
if (unmatched_as_null) {
1078+
for (i = count; i < num_subpats; i++) {
1079+
add_offset_pair(subpats, NULL, PCRE2_UNSET, PCRE2_UNSET, NULL, 1);
1080+
}
10601081
}
10611082
} else {
10621083
for (i = 0; i < count; i++) {
10631084
populate_match_value(
10641085
&val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
10651086
zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val);
10661087
}
1088+
if (unmatched_as_null) {
1089+
for (i = count; i < num_subpats; i++) {
1090+
add_next_index_null(subpats);
1091+
}
1092+
}
10671093
}
10681094
}
10691095
/* Add MARK, if available */
@@ -1264,8 +1290,9 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
12641290
/* For each subpattern, insert it into the appropriate array. */
12651291
if (offset_capture) {
12661292
for (i = 0; i < count; i++) {
1267-
add_offset_pair(&match_sets[i], subject + offsets[i<<1],
1268-
offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL, unmatched_as_null);
1293+
add_offset_pair(
1294+
&match_sets[i], subject, offsets[2*i], offsets[2*i+1],
1295+
NULL, unmatched_as_null);
12691296
}
12701297
} else {
12711298
for (i = 0; i < count; i++) {
@@ -1292,7 +1319,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
12921319
for (; i < num_subpats; i++) {
12931320
if (offset_capture) {
12941321
add_offset_pair(
1295-
&match_sets[i], NULL, 0, PCRE2_UNSET,
1322+
&match_sets[i], NULL, PCRE2_UNSET, PCRE2_UNSET,
12961323
NULL, unmatched_as_null);
12971324
} else if (unmatched_as_null) {
12981325
add_next_index_null(&match_sets[i]);
@@ -1306,15 +1333,16 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
13061333
array_init_size(&result_set, count + (mark ? 1 : 0));
13071334
mark = pcre2_get_mark(match_data);
13081335
populate_subpat_array(
1309-
&result_set, subject, offsets, subpat_names, count, mark, flags);
1336+
&result_set, subject, offsets, subpat_names,
1337+
num_subpats, count, mark, flags);
13101338
/* And add it to the output array */
13111339
zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set);
13121340
}
13131341
} else { /* single pattern matching */
13141342
/* For each subpattern, insert it into the subpatterns array. */
13151343
mark = pcre2_get_mark(match_data);
13161344
populate_subpat_array(
1317-
subpats, subject, offsets, subpat_names, count, mark, flags);
1345+
subpats, subject, offsets, subpat_names, num_subpats, count, mark, flags);
13181346
break;
13191347
}
13201348
}
@@ -1473,14 +1501,14 @@ static int preg_get_backref(char **str, int *backref)
14731501

14741502
/* {{{ preg_do_repl_func
14751503
*/
1476-
static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, int count, const PCRE2_SPTR mark, zend_long flags)
1504+
static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags)
14771505
{
14781506
zend_string *result_str;
14791507
zval retval; /* Function return value */
14801508
zval arg; /* Argument to pass to function */
14811509

14821510
array_init_size(&arg, count + (mark ? 1 : 0));
1483-
populate_subpat_array(&arg, subject, offsets, subpat_names, count, mark, flags);
1511+
populate_subpat_array(&arg, subject, offsets, subpat_names, num_subpats, count, mark, flags);
14841512

14851513
fci->retval = &retval;
14861514
fci->param_count = 1;
@@ -1878,7 +1906,8 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin
18781906
new_len = result_len + offsets[0] - start_offset; /* part before the match */
18791907

18801908
/* Use custom function to get replacement string and its length. */
1881-
eval_result = preg_do_repl_func(fci, fcc, subject, offsets, subpat_names, count,
1909+
eval_result = preg_do_repl_func(
1910+
fci, fcc, subject, offsets, subpat_names, num_subpats, count,
18821911
pcre2_get_mark(match_data), flags);
18831912

18841913
ZEND_ASSERT(eval_result);
@@ -2464,14 +2493,14 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
24642493
uint32_t options; /* Execution options */
24652494
int count; /* Count of matched subpatterns */
24662495
PCRE2_SIZE start_offset; /* Where the new search starts */
2467-
PCRE2_SIZE next_offset; /* End of the last delimiter match + 1 */
2468-
char *last_match; /* Location of last match */
2496+
PCRE2_SIZE last_match_offset; /* Location of last match */
24692497
uint32_t no_empty; /* If NO_EMPTY flag is set */
24702498
uint32_t delim_capture; /* If delimiters should be captured */
24712499
uint32_t offset_capture; /* If offsets should be captured */
24722500
uint32_t num_subpats; /* Number of captured subpatterns */
24732501
zval tmp;
24742502
pcre2_match_data *match_data;
2503+
char *subject = ZSTR_VAL(subject_str);
24752504

24762505
no_empty = flags & PREG_SPLIT_NO_EMPTY;
24772506
delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
@@ -2485,11 +2514,9 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
24852514

24862515
/* Start at the beginning of the string */
24872516
start_offset = 0;
2488-
next_offset = 0;
2489-
last_match = ZSTR_VAL(subject_str);
2517+
last_match_offset = 0;
24902518
PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
24912519

2492-
24932520
if (limit_val == -1) {
24942521
/* pass */
24952522
} else if (limit_val == 0) {
@@ -2513,11 +2540,11 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
25132540

25142541
#ifdef HAVE_PCRE_JIT_SUPPORT
25152542
if ((pce->preg_options & PREG_JIT) && options) {
2516-
count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
2543+
count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
25172544
PCRE2_NO_UTF_CHECK, match_data, mctx);
25182545
} else
25192546
#endif
2520-
count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
2547+
count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
25212548
options, match_data, mctx);
25222549

25232550
while (1) {
@@ -2537,14 +2564,15 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
25372564
break;
25382565
}
25392566

2540-
if (!no_empty || &ZSTR_VAL(subject_str)[offsets[0]] != last_match) {
2541-
2567+
if (!no_empty || offsets[0] != last_match_offset) {
25422568
if (offset_capture) {
25432569
/* Add (match, offset) pair to the return value */
2544-
add_offset_pair(return_value, last_match, (&ZSTR_VAL(subject_str)[offsets[0]]-last_match), next_offset, NULL, 0);
2570+
add_offset_pair(
2571+
return_value, subject, last_match_offset, offsets[0],
2572+
NULL, 0);
25452573
} else {
25462574
/* Add the piece to the return value */
2547-
ZVAL_STRINGL(&tmp, last_match, &ZSTR_VAL(subject_str)[offsets[0]]-last_match);
2575+
populate_match_value_str(&tmp, subject, last_match_offset, offsets[0]);
25482576
zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
25492577
}
25502578

@@ -2553,34 +2581,31 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
25532581
limit_val--;
25542582
}
25552583

2556-
last_match = &ZSTR_VAL(subject_str)[offsets[1]];
2557-
next_offset = offsets[1];
2558-
25592584
if (delim_capture) {
2560-
size_t i, match_len;
2585+
size_t i;
25612586
for (i = 1; i < count; i++) {
2562-
match_len = offsets[(i<<1)+1] - offsets[i<<1];
25632587
/* If we have matched a delimiter */
2564-
if (!no_empty || match_len > 0) {
2588+
if (!no_empty || offsets[2*i] != offsets[2*i+1]) {
25652589
if (offset_capture) {
2566-
add_offset_pair(return_value, &ZSTR_VAL(subject_str)[offsets[i<<1]], match_len, offsets[i<<1], NULL, 0);
2590+
add_offset_pair(
2591+
return_value, subject, offsets[2*i], offsets[2*i+1], NULL, 0);
25672592
} else {
2568-
ZVAL_STRINGL(&tmp, &ZSTR_VAL(subject_str)[offsets[i<<1]], match_len);
2593+
populate_match_value_str(&tmp, subject, offsets[2*i], offsets[2*i+1]);
25692594
zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
25702595
}
25712596
}
25722597
}
25732598
}
25742599

25752600
/* Advance to the position right after the last full match */
2576-
start_offset = offsets[1];
2601+
start_offset = last_match_offset = offsets[1];
25772602

25782603
/* If we have matched an empty string, mimic what Perl's /g options does.
25792604
This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
25802605
the match again at the same point. If this fails (picked up above) we
25812606
advance to the next character. */
25822607
if (start_offset == offsets[0]) {
2583-
count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
2608+
count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
25842609
PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
25852610
if (count >= 0) {
25862611
goto matched;
@@ -2590,7 +2615,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
25902615
the start offset, and continue. Fudge the offset values
25912616
to achieve this, unless we're already at the end of the string. */
25922617
if (start_offset < ZSTR_LEN(subject_str)) {
2593-
start_offset += calculate_unit_length(pce, ZSTR_VAL(subject_str) + start_offset);
2618+
start_offset += calculate_unit_length(pce, subject + start_offset);
25942619
} else {
25952620
break;
25962621
}
@@ -2614,11 +2639,11 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
26142639

26152640
#ifdef HAVE_PCRE_JIT_SUPPORT
26162641
if (pce->preg_options & PREG_JIT) {
2617-
count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
2642+
count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
26182643
PCRE2_NO_UTF_CHECK, match_data, mctx);
26192644
} else
26202645
#endif
2621-
count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
2646+
count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
26222647
PCRE2_NO_UTF_CHECK, match_data, mctx);
26232648
}
26242649
if (match_data != mdata) {
@@ -2631,18 +2656,18 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
26312656
}
26322657

26332658
last:
2634-
start_offset = (last_match - ZSTR_VAL(subject_str)); /* the offset might have been incremented, but without further successful matches */
2659+
start_offset = last_match_offset; /* the offset might have been incremented, but without further successful matches */
26352660

26362661
if (!no_empty || start_offset < ZSTR_LEN(subject_str)) {
26372662
if (offset_capture) {
26382663
/* Add the last (match, offset) pair to the return value */
2639-
add_offset_pair(return_value, &ZSTR_VAL(subject_str)[start_offset], ZSTR_LEN(subject_str) - start_offset, start_offset, NULL, 0);
2664+
add_offset_pair(return_value, subject, start_offset, ZSTR_LEN(subject_str), NULL, 0);
26402665
} else {
26412666
/* Add the last piece to the return value */
2642-
if (last_match == ZSTR_VAL(subject_str)) {
2667+
if (start_offset == 0) {
26432668
ZVAL_STR_COPY(&tmp, subject_str);
26442669
} else {
2645-
ZVAL_STRINGL(&tmp, last_match, ZSTR_VAL(subject_str) + ZSTR_LEN(subject_str) - last_match);
2670+
populate_match_value_str(&tmp, subject, start_offset, ZSTR_LEN(subject_str));
26462671
}
26472672
zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
26482673
}

0 commit comments

Comments
 (0)