Skip to content

Commit 1cf84f1

Browse files
committed
Try to create interned strings in preg_split as well
And convert last_match to last_match_offset, which is more convenient now.
1 parent 621b1f0 commit 1cf84f1

File tree

1 file changed

+12
-16
lines changed

1 file changed

+12
-16
lines changed

ext/pcre/php_pcre.c

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2493,7 +2493,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
24932493
uint32_t options; /* Execution options */
24942494
int count; /* Count of matched subpatterns */
24952495
PCRE2_SIZE start_offset; /* Where the new search starts */
2496-
char *last_match; /* Location of last match */
2496+
PCRE2_SIZE last_match_offset; /* Location of last match */
24972497
uint32_t no_empty; /* If NO_EMPTY flag is set */
24982498
uint32_t delim_capture; /* If delimiters should be captured */
24992499
uint32_t offset_capture; /* If offsets should be captured */
@@ -2514,10 +2514,9 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
25142514

25152515
/* Start at the beginning of the string */
25162516
start_offset = 0;
2517-
last_match = subject;
2517+
last_match_offset = 0;
25182518
PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
25192519

2520-
25212520
if (limit_val == -1) {
25222521
/* pass */
25232522
} else if (limit_val == 0) {
@@ -2565,15 +2564,15 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
25652564
break;
25662565
}
25672566

2568-
if (!no_empty || &subject[offsets[0]] != last_match) {
2567+
if (!no_empty || offsets[0] != last_match_offset) {
25692568
if (offset_capture) {
25702569
/* Add (match, offset) pair to the return value */
25712570
add_offset_pair(
2572-
return_value, subject, last_match - subject, offsets[0],
2571+
return_value, subject, last_match_offset, offsets[0],
25732572
NULL, 0);
25742573
} else {
25752574
/* Add the piece to the return value */
2576-
ZVAL_STRINGL(&tmp, last_match, &subject[offsets[0]]-last_match);
2575+
populate_match_value_str(&tmp, subject, last_match_offset, offsets[0]);
25772576
zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
25782577
}
25792578

@@ -2582,27 +2581,24 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
25822581
limit_val--;
25832582
}
25842583

2585-
last_match = &subject[offsets[1]];
2586-
25872584
if (delim_capture) {
2588-
size_t i, match_len;
2585+
size_t i;
25892586
for (i = 1; i < count; i++) {
2590-
match_len = offsets[2*i+1] - offsets[2*i];
25912587
/* If we have matched a delimiter */
2592-
if (!no_empty || match_len > 0) {
2588+
if (!no_empty || offsets[2*i] != offsets[2*i+1]) {
25932589
if (offset_capture) {
25942590
add_offset_pair(
25952591
return_value, subject, offsets[2*i], offsets[2*i+1], NULL, 0);
25962592
} else {
2597-
ZVAL_STRINGL(&tmp, &subject[offsets[2*i]], match_len);
2593+
populate_match_value_str(&tmp, subject, offsets[2*i], offsets[2*i+1]);
25982594
zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
25992595
}
26002596
}
26012597
}
26022598
}
26032599

26042600
/* Advance to the position right after the last full match */
2605-
start_offset = offsets[1];
2601+
start_offset = last_match_offset = offsets[1];
26062602

26072603
/* If we have matched an empty string, mimic what Perl's /g options does.
26082604
This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
@@ -2660,18 +2656,18 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
26602656
}
26612657

26622658
last:
2663-
start_offset = (last_match - subject); /* the offset might have been incremented, but without further successful matches */
2659+
start_offset = last_match_offset; /* the offset might have been incremented, but without further successful matches */
26642660

26652661
if (!no_empty || start_offset < ZSTR_LEN(subject_str)) {
26662662
if (offset_capture) {
26672663
/* Add the last (match, offset) pair to the return value */
26682664
add_offset_pair(return_value, subject, start_offset, ZSTR_LEN(subject_str), NULL, 0);
26692665
} else {
26702666
/* Add the last piece to the return value */
2671-
if (last_match == subject) {
2667+
if (start_offset == 0) {
26722668
ZVAL_STR_COPY(&tmp, subject_str);
26732669
} else {
2674-
ZVAL_STRINGL(&tmp, last_match, subject + ZSTR_LEN(subject_str) - last_match);
2670+
populate_match_value_str(&tmp, subject, start_offset, ZSTR_LEN(subject_str));
26752671
}
26762672
zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
26772673
}

0 commit comments

Comments
 (0)