@@ -968,13 +968,40 @@ static void init_unmatched_empty_pair() {
968
968
zend_hash_next_index_insert_new (Z_ARRVAL_P (pair ), & tmp );
969
969
}
970
970
971
+ static zend_always_inline void populate_match_value_str (
972
+ zval * val , const char * subject , PCRE2_SIZE start_offset , PCRE2_SIZE end_offset ) {
973
+ if (start_offset == end_offset ) {
974
+ ZVAL_EMPTY_STRING (val );
975
+ } else if (start_offset + 1 == end_offset ) {
976
+ ZVAL_INTERNED_STR (val , ZSTR_CHAR ((unsigned char ) subject [start_offset ]));
977
+ } else {
978
+ ZVAL_STRINGL (val , subject + start_offset , end_offset - start_offset );
979
+ }
980
+ }
981
+
982
+ static inline void populate_match_value (
983
+ zval * val , const char * subject , PCRE2_SIZE start_offset , PCRE2_SIZE end_offset ,
984
+ uint32_t unmatched_as_null ) {
985
+ if (PCRE2_UNSET == start_offset ) {
986
+ if (unmatched_as_null ) {
987
+ ZVAL_NULL (val );
988
+ } else {
989
+ ZVAL_EMPTY_STRING (val );
990
+ }
991
+ } else {
992
+ populate_match_value_str (val , subject , start_offset , end_offset );
993
+ }
994
+ }
995
+
971
996
/* {{{ add_offset_pair */
972
- static inline void add_offset_pair (zval * result , char * str , size_t len , PCRE2_SIZE offset , zend_string * name , uint32_t unmatched_as_null )
997
+ static inline void add_offset_pair (
998
+ zval * result , const char * subject , PCRE2_SIZE start_offset , PCRE2_SIZE end_offset ,
999
+ zend_string * name , uint32_t unmatched_as_null )
973
1000
{
974
1001
zval match_pair , tmp ;
975
1002
976
1003
/* Add (match, offset) to the return value */
977
- if (PCRE2_UNSET == offset ) {
1004
+ if (PCRE2_UNSET == start_offset ) {
978
1005
if (unmatched_as_null ) {
979
1006
if (Z_ISUNDEF (PCRE_G (unmatched_null_pair ))) {
980
1007
init_unmatched_null_pair ();
@@ -988,15 +1015,9 @@ static inline void add_offset_pair(zval *result, char *str, size_t len, PCRE2_SI
988
1015
}
989
1016
} else {
990
1017
array_init_size (& match_pair , 2 );
991
- if (len == 0 ) {
992
- ZVAL_EMPTY_STRING (& tmp );
993
- } else if (len == 1 ) {
994
- ZVAL_INTERNED_STR (& tmp , ZSTR_CHAR ((unsigned char ) * str ));
995
- } else {
996
- ZVAL_STRINGL (& tmp , str , len );
997
- }
1018
+ populate_match_value_str (& tmp , subject , start_offset , end_offset );
998
1019
zend_hash_next_index_insert_new (Z_ARRVAL (match_pair ), & tmp );
999
- ZVAL_LONG (& tmp , offset );
1020
+ ZVAL_LONG (& tmp , start_offset );
1000
1021
zend_hash_next_index_insert_new (Z_ARRVAL (match_pair ), & tmp );
1001
1022
}
1002
1023
@@ -1008,24 +1029,6 @@ static inline void add_offset_pair(zval *result, char *str, size_t len, PCRE2_SI
1008
1029
}
1009
1030
/* }}} */
1010
1031
1011
- static inline void populate_match_value (
1012
- zval * val , char * subject , PCRE2_SIZE start_offset , PCRE2_SIZE end_offset ,
1013
- uint32_t unmatched_as_null ) {
1014
- if (PCRE2_UNSET == start_offset ) {
1015
- if (unmatched_as_null ) {
1016
- ZVAL_NULL (val );
1017
- } else {
1018
- ZVAL_EMPTY_STRING (val );
1019
- }
1020
- } else if (start_offset == end_offset ) {
1021
- ZVAL_EMPTY_STRING (val );
1022
- } else if (start_offset + 1 == end_offset ) {
1023
- ZVAL_INTERNED_STR (val , ZSTR_CHAR ((unsigned char ) subject [start_offset ]));
1024
- } else {
1025
- ZVAL_STRINGL (val , subject + start_offset , end_offset - start_offset );
1026
- }
1027
- }
1028
-
1029
1032
static void populate_subpat_array (
1030
1033
zval * subpats , char * subject , PCRE2_SIZE * offsets , zend_string * * subpat_names ,
1031
1034
uint32_t num_subpats , int count , const PCRE2_SPTR mark , zend_long flags ) {
@@ -1036,13 +1039,13 @@ static void populate_subpat_array(
1036
1039
if (subpat_names ) {
1037
1040
if (offset_capture ) {
1038
1041
for (i = 0 ; i < count ; i ++ ) {
1039
- add_offset_pair (subpats , subject + offsets [ i << 1 ],
1040
- offsets [( i << 1 ) + 1 ] - offsets [i << 1 ],
1041
- offsets [ i << 1 ], subpat_names [i ], unmatched_as_null );
1042
+ add_offset_pair (
1043
+ subpats , subject , offsets [2 * i ], offsets [2 * i + 1 ],
1044
+ subpat_names [i ], unmatched_as_null );
1042
1045
}
1043
1046
if (unmatched_as_null ) {
1044
1047
for (i = count ; i < num_subpats ; i ++ ) {
1045
- add_offset_pair (subpats , NULL , 0 , PCRE2_UNSET , subpat_names [i ], 1 );
1048
+ add_offset_pair (subpats , NULL , PCRE2_UNSET , PCRE2_UNSET , subpat_names [i ], 1 );
1046
1049
}
1047
1050
}
1048
1051
} else {
@@ -1068,13 +1071,12 @@ static void populate_subpat_array(
1068
1071
} else {
1069
1072
if (offset_capture ) {
1070
1073
for (i = 0 ; i < count ; i ++ ) {
1071
- add_offset_pair (subpats , subject + offsets [i <<1 ],
1072
- offsets [(i <<1 )+ 1 ] - offsets [i <<1 ],
1073
- offsets [i <<1 ], NULL , unmatched_as_null );
1074
+ add_offset_pair (
1075
+ subpats , subject , offsets [2 * i ], offsets [2 * i + 1 ], NULL , unmatched_as_null );
1074
1076
}
1075
1077
if (unmatched_as_null ) {
1076
1078
for (i = count ; i < num_subpats ; i ++ ) {
1077
- add_offset_pair (subpats , NULL , 0 , PCRE2_UNSET , NULL , 1 );
1079
+ add_offset_pair (subpats , NULL , PCRE2_UNSET , PCRE2_UNSET , NULL , 1 );
1078
1080
}
1079
1081
}
1080
1082
} else {
@@ -1288,8 +1290,9 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
1288
1290
/* For each subpattern, insert it into the appropriate array. */
1289
1291
if (offset_capture ) {
1290
1292
for (i = 0 ; i < count ; i ++ ) {
1291
- add_offset_pair (& match_sets [i ], subject + offsets [i <<1 ],
1292
- offsets [(i <<1 )+ 1 ] - offsets [i <<1 ], offsets [i <<1 ], NULL , unmatched_as_null );
1293
+ add_offset_pair (
1294
+ & match_sets [i ], subject , offsets [2 * i ], offsets [2 * i + 1 ],
1295
+ NULL , unmatched_as_null );
1293
1296
}
1294
1297
} else {
1295
1298
for (i = 0 ; i < count ; i ++ ) {
@@ -1316,7 +1319,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
1316
1319
for (; i < num_subpats ; i ++ ) {
1317
1320
if (offset_capture ) {
1318
1321
add_offset_pair (
1319
- & match_sets [i ], NULL , 0 , PCRE2_UNSET ,
1322
+ & match_sets [i ], NULL , PCRE2_UNSET , PCRE2_UNSET ,
1320
1323
NULL , unmatched_as_null );
1321
1324
} else if (unmatched_as_null ) {
1322
1325
add_next_index_null (& match_sets [i ]);
@@ -2490,14 +2493,14 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
2490
2493
uint32_t options ; /* Execution options */
2491
2494
int count ; /* Count of matched subpatterns */
2492
2495
PCRE2_SIZE start_offset ; /* Where the new search starts */
2493
- PCRE2_SIZE next_offset ; /* End of the last delimiter match + 1 */
2494
2496
char * last_match ; /* Location of last match */
2495
2497
uint32_t no_empty ; /* If NO_EMPTY flag is set */
2496
2498
uint32_t delim_capture ; /* If delimiters should be captured */
2497
2499
uint32_t offset_capture ; /* If offsets should be captured */
2498
2500
uint32_t num_subpats ; /* Number of captured subpatterns */
2499
2501
zval tmp ;
2500
2502
pcre2_match_data * match_data ;
2503
+ char * subject = ZSTR_VAL (subject_str );
2501
2504
2502
2505
no_empty = flags & PREG_SPLIT_NO_EMPTY ;
2503
2506
delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE ;
@@ -2511,8 +2514,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
2511
2514
2512
2515
/* Start at the beginning of the string */
2513
2516
start_offset = 0 ;
2514
- next_offset = 0 ;
2515
- last_match = ZSTR_VAL (subject_str );
2517
+ last_match = subject ;
2516
2518
PCRE_G (error_code ) = PHP_PCRE_NO_ERROR ;
2517
2519
2518
2520
@@ -2539,11 +2541,11 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
2539
2541
2540
2542
#ifdef HAVE_PCRE_JIT_SUPPORT
2541
2543
if ((pce -> preg_options & PREG_JIT ) && options ) {
2542
- count = pcre2_jit_match (pce -> re , (PCRE2_SPTR )ZSTR_VAL ( subject_str ) , ZSTR_LEN (subject_str ), start_offset ,
2544
+ count = pcre2_jit_match (pce -> re , (PCRE2_SPTR )subject , ZSTR_LEN (subject_str ), start_offset ,
2543
2545
PCRE2_NO_UTF_CHECK , match_data , mctx );
2544
2546
} else
2545
2547
#endif
2546
- count = pcre2_match (pce -> re , (PCRE2_SPTR )ZSTR_VAL ( subject_str ) , ZSTR_LEN (subject_str ), start_offset ,
2548
+ count = pcre2_match (pce -> re , (PCRE2_SPTR )subject , ZSTR_LEN (subject_str ), start_offset ,
2547
2549
options , match_data , mctx );
2548
2550
2549
2551
while (1 ) {
@@ -2563,14 +2565,15 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
2563
2565
break ;
2564
2566
}
2565
2567
2566
- if (!no_empty || & ZSTR_VAL (subject_str )[offsets [0 ]] != last_match ) {
2567
-
2568
+ if (!no_empty || & subject [offsets [0 ]] != last_match ) {
2568
2569
if (offset_capture ) {
2569
2570
/* Add (match, offset) pair to the return value */
2570
- add_offset_pair (return_value , last_match , (& ZSTR_VAL (subject_str )[offsets [0 ]]- last_match ), next_offset , NULL , 0 );
2571
+ add_offset_pair (
2572
+ return_value , subject , last_match - subject , offsets [0 ],
2573
+ NULL , 0 );
2571
2574
} else {
2572
2575
/* Add the piece to the return value */
2573
- ZVAL_STRINGL (& tmp , last_match , & ZSTR_VAL ( subject_str ) [offsets [0 ]]- last_match );
2576
+ ZVAL_STRINGL (& tmp , last_match , & subject [offsets [0 ]]- last_match );
2574
2577
zend_hash_next_index_insert_new (Z_ARRVAL_P (return_value ), & tmp );
2575
2578
}
2576
2579
@@ -2579,19 +2582,19 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
2579
2582
limit_val -- ;
2580
2583
}
2581
2584
2582
- last_match = & ZSTR_VAL (subject_str )[offsets [1 ]];
2583
- next_offset = offsets [1 ];
2585
+ last_match = & subject [offsets [1 ]];
2584
2586
2585
2587
if (delim_capture ) {
2586
2588
size_t i , match_len ;
2587
2589
for (i = 1 ; i < count ; i ++ ) {
2588
- match_len = offsets [( i << 1 ) + 1 ] - offsets [i << 1 ];
2590
+ match_len = offsets [2 * i + 1 ] - offsets [2 * i ];
2589
2591
/* If we have matched a delimiter */
2590
2592
if (!no_empty || match_len > 0 ) {
2591
2593
if (offset_capture ) {
2592
- add_offset_pair (return_value , & ZSTR_VAL (subject_str )[offsets [i <<1 ]], match_len , offsets [i <<1 ], NULL , 0 );
2594
+ add_offset_pair (
2595
+ return_value , subject , offsets [2 * i ], offsets [2 * i + 1 ], NULL , 0 );
2593
2596
} else {
2594
- ZVAL_STRINGL (& tmp , & ZSTR_VAL ( subject_str ) [offsets [i << 1 ]], match_len );
2597
+ ZVAL_STRINGL (& tmp , & subject [offsets [2 * i ]], match_len );
2595
2598
zend_hash_next_index_insert_new (Z_ARRVAL_P (return_value ), & tmp );
2596
2599
}
2597
2600
}
@@ -2606,7 +2609,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
2606
2609
the match again at the same point. If this fails (picked up above) we
2607
2610
advance to the next character. */
2608
2611
if (start_offset == offsets [0 ]) {
2609
- count = pcre2_match (pce -> re , (PCRE2_SPTR )ZSTR_VAL ( subject_str ) , ZSTR_LEN (subject_str ), start_offset ,
2612
+ count = pcre2_match (pce -> re , (PCRE2_SPTR )subject , ZSTR_LEN (subject_str ), start_offset ,
2610
2613
PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED , match_data , mctx );
2611
2614
if (count >= 0 ) {
2612
2615
goto matched ;
@@ -2616,7 +2619,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
2616
2619
the start offset, and continue. Fudge the offset values
2617
2620
to achieve this, unless we're already at the end of the string. */
2618
2621
if (start_offset < ZSTR_LEN (subject_str )) {
2619
- start_offset += calculate_unit_length (pce , ZSTR_VAL ( subject_str ) + start_offset );
2622
+ start_offset += calculate_unit_length (pce , subject + start_offset );
2620
2623
} else {
2621
2624
break ;
2622
2625
}
@@ -2640,11 +2643,11 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
2640
2643
2641
2644
#ifdef HAVE_PCRE_JIT_SUPPORT
2642
2645
if (pce -> preg_options & PREG_JIT ) {
2643
- count = pcre2_jit_match (pce -> re , (PCRE2_SPTR )ZSTR_VAL ( subject_str ) , ZSTR_LEN (subject_str ), start_offset ,
2646
+ count = pcre2_jit_match (pce -> re , (PCRE2_SPTR )subject , ZSTR_LEN (subject_str ), start_offset ,
2644
2647
PCRE2_NO_UTF_CHECK , match_data , mctx );
2645
2648
} else
2646
2649
#endif
2647
- count = pcre2_match (pce -> re , (PCRE2_SPTR )ZSTR_VAL ( subject_str ) , ZSTR_LEN (subject_str ), start_offset ,
2650
+ count = pcre2_match (pce -> re , (PCRE2_SPTR )subject , ZSTR_LEN (subject_str ), start_offset ,
2648
2651
PCRE2_NO_UTF_CHECK , match_data , mctx );
2649
2652
}
2650
2653
if (match_data != mdata ) {
@@ -2657,18 +2660,18 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
2657
2660
}
2658
2661
2659
2662
last :
2660
- start_offset = (last_match - ZSTR_VAL ( subject_str ) ); /* the offset might have been incremented, but without further successful matches */
2663
+ start_offset = (last_match - subject ); /* the offset might have been incremented, but without further successful matches */
2661
2664
2662
2665
if (!no_empty || start_offset < ZSTR_LEN (subject_str )) {
2663
2666
if (offset_capture ) {
2664
2667
/* Add the last (match, offset) pair to the return value */
2665
- add_offset_pair (return_value , & ZSTR_VAL ( subject_str )[ start_offset ] , ZSTR_LEN (subject_str ) - start_offset , start_offset , NULL , 0 );
2668
+ add_offset_pair (return_value , subject , start_offset , ZSTR_LEN (subject_str ), NULL , 0 );
2666
2669
} else {
2667
2670
/* Add the last piece to the return value */
2668
- if (last_match == ZSTR_VAL ( subject_str ) ) {
2671
+ if (last_match == subject ) {
2669
2672
ZVAL_STR_COPY (& tmp , subject_str );
2670
2673
} else {
2671
- ZVAL_STRINGL (& tmp , last_match , ZSTR_VAL ( subject_str ) + ZSTR_LEN (subject_str ) - last_match );
2674
+ ZVAL_STRINGL (& tmp , last_match , subject + ZSTR_LEN (subject_str ) - last_match );
2672
2675
}
2673
2676
zend_hash_next_index_insert_new (Z_ARRVAL_P (return_value ), & tmp );
2674
2677
}
0 commit comments