Skip to content

Commit 621b1f0

Browse files
committed
Cleanup add_offset_pair API
Accept the two offsets directly, rather than doing length calculations at all callsites. Also extract the logic to create a possibly interned string. Switch the split implementation to work on a char* subject internally, because ZSTR_VAL(subject_str) is a mouthful...
1 parent 6311581 commit 621b1f0

File tree

1 file changed

+64
-61
lines changed

1 file changed

+64
-61
lines changed

ext/pcre/php_pcre.c

Lines changed: 64 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -968,13 +968,40 @@ static void init_unmatched_empty_pair() {
968968
zend_hash_next_index_insert_new(Z_ARRVAL_P(pair), &tmp);
969969
}
970970

971+
static zend_always_inline void populate_match_value_str(
972+
zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset) {
973+
if (start_offset == end_offset) {
974+
ZVAL_EMPTY_STRING(val);
975+
} else if (start_offset + 1 == end_offset) {
976+
ZVAL_INTERNED_STR(val, ZSTR_CHAR((unsigned char) subject[start_offset]));
977+
} else {
978+
ZVAL_STRINGL(val, subject + start_offset, end_offset - start_offset);
979+
}
980+
}
981+
982+
static inline void populate_match_value(
983+
zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
984+
uint32_t unmatched_as_null) {
985+
if (PCRE2_UNSET == start_offset) {
986+
if (unmatched_as_null) {
987+
ZVAL_NULL(val);
988+
} else {
989+
ZVAL_EMPTY_STRING(val);
990+
}
991+
} else {
992+
populate_match_value_str(val, subject, start_offset, end_offset);
993+
}
994+
}
995+
971996
/* {{{ add_offset_pair */
972-
static inline void add_offset_pair(zval *result, char *str, size_t len, PCRE2_SIZE offset, zend_string *name, uint32_t unmatched_as_null)
997+
static inline void add_offset_pair(
998+
zval *result, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
999+
zend_string *name, uint32_t unmatched_as_null)
9731000
{
9741001
zval match_pair, tmp;
9751002

9761003
/* Add (match, offset) to the return value */
977-
if (PCRE2_UNSET == offset) {
1004+
if (PCRE2_UNSET == start_offset) {
9781005
if (unmatched_as_null) {
9791006
if (Z_ISUNDEF(PCRE_G(unmatched_null_pair))) {
9801007
init_unmatched_null_pair();
@@ -988,15 +1015,9 @@ static inline void add_offset_pair(zval *result, char *str, size_t len, PCRE2_SI
9881015
}
9891016
} else {
9901017
array_init_size(&match_pair, 2);
991-
if (len == 0) {
992-
ZVAL_EMPTY_STRING(&tmp);
993-
} else if (len == 1) {
994-
ZVAL_INTERNED_STR(&tmp, ZSTR_CHAR((unsigned char) *str));
995-
} else {
996-
ZVAL_STRINGL(&tmp, str, len);
997-
}
1018+
populate_match_value_str(&tmp, subject, start_offset, end_offset);
9981019
zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
999-
ZVAL_LONG(&tmp, offset);
1020+
ZVAL_LONG(&tmp, start_offset);
10001021
zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
10011022
}
10021023

@@ -1008,24 +1029,6 @@ static inline void add_offset_pair(zval *result, char *str, size_t len, PCRE2_SI
10081029
}
10091030
/* }}} */
10101031

1011-
static inline void populate_match_value(
1012-
zval *val, char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
1013-
uint32_t unmatched_as_null) {
1014-
if (PCRE2_UNSET == start_offset) {
1015-
if (unmatched_as_null) {
1016-
ZVAL_NULL(val);
1017-
} else {
1018-
ZVAL_EMPTY_STRING(val);
1019-
}
1020-
} else if (start_offset == end_offset) {
1021-
ZVAL_EMPTY_STRING(val);
1022-
} else if (start_offset + 1 == end_offset) {
1023-
ZVAL_INTERNED_STR(val, ZSTR_CHAR((unsigned char) subject[start_offset]));
1024-
} else {
1025-
ZVAL_STRINGL(val, subject + start_offset, end_offset - start_offset);
1026-
}
1027-
}
1028-
10291032
static void populate_subpat_array(
10301033
zval *subpats, char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names,
10311034
uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags) {
@@ -1036,13 +1039,13 @@ static void populate_subpat_array(
10361039
if (subpat_names) {
10371040
if (offset_capture) {
10381041
for (i = 0; i < count; i++) {
1039-
add_offset_pair(subpats, subject + offsets[i<<1],
1040-
offsets[(i<<1)+1] - offsets[i<<1],
1041-
offsets[i<<1], subpat_names[i], unmatched_as_null);
1042+
add_offset_pair(
1043+
subpats, subject, offsets[2*i], offsets[2*i+1],
1044+
subpat_names[i], unmatched_as_null);
10421045
}
10431046
if (unmatched_as_null) {
10441047
for (i = count; i < num_subpats; i++) {
1045-
add_offset_pair(subpats, NULL, 0, PCRE2_UNSET, subpat_names[i], 1);
1048+
add_offset_pair(subpats, NULL, PCRE2_UNSET, PCRE2_UNSET, subpat_names[i], 1);
10461049
}
10471050
}
10481051
} else {
@@ -1068,13 +1071,12 @@ static void populate_subpat_array(
10681071
} else {
10691072
if (offset_capture) {
10701073
for (i = 0; i < count; i++) {
1071-
add_offset_pair(subpats, subject + offsets[i<<1],
1072-
offsets[(i<<1)+1] - offsets[i<<1],
1073-
offsets[i<<1], NULL, unmatched_as_null);
1074+
add_offset_pair(
1075+
subpats, subject, offsets[2*i], offsets[2*i+1], NULL, unmatched_as_null);
10741076
}
10751077
if (unmatched_as_null) {
10761078
for (i = count; i < num_subpats; i++) {
1077-
add_offset_pair(subpats, NULL, 0, PCRE2_UNSET, NULL, 1);
1079+
add_offset_pair(subpats, NULL, PCRE2_UNSET, PCRE2_UNSET, NULL, 1);
10781080
}
10791081
}
10801082
} else {
@@ -1288,8 +1290,9 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
12881290
/* For each subpattern, insert it into the appropriate array. */
12891291
if (offset_capture) {
12901292
for (i = 0; i < count; i++) {
1291-
add_offset_pair(&match_sets[i], subject + offsets[i<<1],
1292-
offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL, unmatched_as_null);
1293+
add_offset_pair(
1294+
&match_sets[i], subject, offsets[2*i], offsets[2*i+1],
1295+
NULL, unmatched_as_null);
12931296
}
12941297
} else {
12951298
for (i = 0; i < count; i++) {
@@ -1316,7 +1319,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
13161319
for (; i < num_subpats; i++) {
13171320
if (offset_capture) {
13181321
add_offset_pair(
1319-
&match_sets[i], NULL, 0, PCRE2_UNSET,
1322+
&match_sets[i], NULL, PCRE2_UNSET, PCRE2_UNSET,
13201323
NULL, unmatched_as_null);
13211324
} else if (unmatched_as_null) {
13221325
add_next_index_null(&match_sets[i]);
@@ -2490,14 +2493,14 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
24902493
uint32_t options; /* Execution options */
24912494
int count; /* Count of matched subpatterns */
24922495
PCRE2_SIZE start_offset; /* Where the new search starts */
2493-
PCRE2_SIZE next_offset; /* End of the last delimiter match + 1 */
24942496
char *last_match; /* Location of last match */
24952497
uint32_t no_empty; /* If NO_EMPTY flag is set */
24962498
uint32_t delim_capture; /* If delimiters should be captured */
24972499
uint32_t offset_capture; /* If offsets should be captured */
24982500
uint32_t num_subpats; /* Number of captured subpatterns */
24992501
zval tmp;
25002502
pcre2_match_data *match_data;
2503+
char *subject = ZSTR_VAL(subject_str);
25012504

25022505
no_empty = flags & PREG_SPLIT_NO_EMPTY;
25032506
delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
@@ -2511,8 +2514,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
25112514

25122515
/* Start at the beginning of the string */
25132516
start_offset = 0;
2514-
next_offset = 0;
2515-
last_match = ZSTR_VAL(subject_str);
2517+
last_match = subject;
25162518
PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
25172519

25182520

@@ -2539,11 +2541,11 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
25392541

25402542
#ifdef HAVE_PCRE_JIT_SUPPORT
25412543
if ((pce->preg_options & PREG_JIT) && options) {
2542-
count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
2544+
count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
25432545
PCRE2_NO_UTF_CHECK, match_data, mctx);
25442546
} else
25452547
#endif
2546-
count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
2548+
count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
25472549
options, match_data, mctx);
25482550

25492551
while (1) {
@@ -2563,14 +2565,15 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
25632565
break;
25642566
}
25652567

2566-
if (!no_empty || &ZSTR_VAL(subject_str)[offsets[0]] != last_match) {
2567-
2568+
if (!no_empty || &subject[offsets[0]] != last_match) {
25682569
if (offset_capture) {
25692570
/* Add (match, offset) pair to the return value */
2570-
add_offset_pair(return_value, last_match, (&ZSTR_VAL(subject_str)[offsets[0]]-last_match), next_offset, NULL, 0);
2571+
add_offset_pair(
2572+
return_value, subject, last_match - subject, offsets[0],
2573+
NULL, 0);
25712574
} else {
25722575
/* Add the piece to the return value */
2573-
ZVAL_STRINGL(&tmp, last_match, &ZSTR_VAL(subject_str)[offsets[0]]-last_match);
2576+
ZVAL_STRINGL(&tmp, last_match, &subject[offsets[0]]-last_match);
25742577
zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
25752578
}
25762579

@@ -2579,19 +2582,19 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
25792582
limit_val--;
25802583
}
25812584

2582-
last_match = &ZSTR_VAL(subject_str)[offsets[1]];
2583-
next_offset = offsets[1];
2585+
last_match = &subject[offsets[1]];
25842586

25852587
if (delim_capture) {
25862588
size_t i, match_len;
25872589
for (i = 1; i < count; i++) {
2588-
match_len = offsets[(i<<1)+1] - offsets[i<<1];
2590+
match_len = offsets[2*i+1] - offsets[2*i];
25892591
/* If we have matched a delimiter */
25902592
if (!no_empty || match_len > 0) {
25912593
if (offset_capture) {
2592-
add_offset_pair(return_value, &ZSTR_VAL(subject_str)[offsets[i<<1]], match_len, offsets[i<<1], NULL, 0);
2594+
add_offset_pair(
2595+
return_value, subject, offsets[2*i], offsets[2*i+1], NULL, 0);
25932596
} else {
2594-
ZVAL_STRINGL(&tmp, &ZSTR_VAL(subject_str)[offsets[i<<1]], match_len);
2597+
ZVAL_STRINGL(&tmp, &subject[offsets[2*i]], match_len);
25952598
zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
25962599
}
25972600
}
@@ -2606,7 +2609,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
26062609
the match again at the same point. If this fails (picked up above) we
26072610
advance to the next character. */
26082611
if (start_offset == offsets[0]) {
2609-
count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
2612+
count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
26102613
PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
26112614
if (count >= 0) {
26122615
goto matched;
@@ -2616,7 +2619,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
26162619
the start offset, and continue. Fudge the offset values
26172620
to achieve this, unless we're already at the end of the string. */
26182621
if (start_offset < ZSTR_LEN(subject_str)) {
2619-
start_offset += calculate_unit_length(pce, ZSTR_VAL(subject_str) + start_offset);
2622+
start_offset += calculate_unit_length(pce, subject + start_offset);
26202623
} else {
26212624
break;
26222625
}
@@ -2640,11 +2643,11 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
26402643

26412644
#ifdef HAVE_PCRE_JIT_SUPPORT
26422645
if (pce->preg_options & PREG_JIT) {
2643-
count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
2646+
count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
26442647
PCRE2_NO_UTF_CHECK, match_data, mctx);
26452648
} else
26462649
#endif
2647-
count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
2650+
count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
26482651
PCRE2_NO_UTF_CHECK, match_data, mctx);
26492652
}
26502653
if (match_data != mdata) {
@@ -2657,18 +2660,18 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
26572660
}
26582661

26592662
last:
2660-
start_offset = (last_match - ZSTR_VAL(subject_str)); /* the offset might have been incremented, but without further successful matches */
2663+
start_offset = (last_match - subject); /* the offset might have been incremented, but without further successful matches */
26612664

26622665
if (!no_empty || start_offset < ZSTR_LEN(subject_str)) {
26632666
if (offset_capture) {
26642667
/* Add the last (match, offset) pair to the return value */
2665-
add_offset_pair(return_value, &ZSTR_VAL(subject_str)[start_offset], ZSTR_LEN(subject_str) - start_offset, start_offset, NULL, 0);
2668+
add_offset_pair(return_value, subject, start_offset, ZSTR_LEN(subject_str), NULL, 0);
26662669
} else {
26672670
/* Add the last piece to the return value */
2668-
if (last_match == ZSTR_VAL(subject_str)) {
2671+
if (last_match == subject) {
26692672
ZVAL_STR_COPY(&tmp, subject_str);
26702673
} else {
2671-
ZVAL_STRINGL(&tmp, last_match, ZSTR_VAL(subject_str) + ZSTR_LEN(subject_str) - last_match);
2674+
ZVAL_STRINGL(&tmp, last_match, subject + ZSTR_LEN(subject_str) - last_match);
26722675
}
26732676
zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
26742677
}

0 commit comments

Comments
 (0)