@@ -968,13 +968,40 @@ static void init_unmatched_empty_pair() {
968
968
zend_hash_next_index_insert_new (Z_ARRVAL_P (pair ), & tmp );
969
969
}
970
970
971
+ static zend_always_inline void populate_match_value_str (
972
+ zval * val , const char * subject , PCRE2_SIZE start_offset , PCRE2_SIZE end_offset ) {
973
+ if (start_offset == end_offset ) {
974
+ ZVAL_EMPTY_STRING (val );
975
+ } else if (start_offset + 1 == end_offset ) {
976
+ ZVAL_INTERNED_STR (val , ZSTR_CHAR ((unsigned char ) subject [start_offset ]));
977
+ } else {
978
+ ZVAL_STRINGL (val , subject + start_offset , end_offset - start_offset );
979
+ }
980
+ }
981
+
982
+ static inline void populate_match_value (
983
+ zval * val , const char * subject , PCRE2_SIZE start_offset , PCRE2_SIZE end_offset ,
984
+ uint32_t unmatched_as_null ) {
985
+ if (PCRE2_UNSET == start_offset ) {
986
+ if (unmatched_as_null ) {
987
+ ZVAL_NULL (val );
988
+ } else {
989
+ ZVAL_EMPTY_STRING (val );
990
+ }
991
+ } else {
992
+ populate_match_value_str (val , subject , start_offset , end_offset );
993
+ }
994
+ }
995
+
971
996
/* {{{ add_offset_pair */
972
- static inline void add_offset_pair (zval * result , char * str , size_t len , PCRE2_SIZE offset , zend_string * name , uint32_t unmatched_as_null )
997
+ static inline void add_offset_pair (
998
+ zval * result , const char * subject , PCRE2_SIZE start_offset , PCRE2_SIZE end_offset ,
999
+ zend_string * name , uint32_t unmatched_as_null )
973
1000
{
974
1001
zval match_pair , tmp ;
975
1002
976
1003
/* Add (match, offset) to the return value */
977
- if (PCRE2_UNSET == offset ) {
1004
+ if (PCRE2_UNSET == start_offset ) {
978
1005
if (unmatched_as_null ) {
979
1006
if (Z_ISUNDEF (PCRE_G (unmatched_null_pair ))) {
980
1007
init_unmatched_null_pair ();
@@ -988,15 +1015,9 @@ static inline void add_offset_pair(zval *result, char *str, size_t len, PCRE2_SI
988
1015
}
989
1016
} else {
990
1017
array_init_size (& match_pair , 2 );
991
- if (len == 0 ) {
992
- ZVAL_EMPTY_STRING (& tmp );
993
- } else if (len == 1 ) {
994
- ZVAL_INTERNED_STR (& tmp , ZSTR_CHAR ((unsigned char ) * str ));
995
- } else {
996
- ZVAL_STRINGL (& tmp , str , len );
997
- }
1018
+ populate_match_value_str (& tmp , subject , start_offset , end_offset );
998
1019
zend_hash_next_index_insert_new (Z_ARRVAL (match_pair ), & tmp );
999
- ZVAL_LONG (& tmp , offset );
1020
+ ZVAL_LONG (& tmp , start_offset );
1000
1021
zend_hash_next_index_insert_new (Z_ARRVAL (match_pair ), & tmp );
1001
1022
}
1002
1023
@@ -1008,37 +1029,24 @@ static inline void add_offset_pair(zval *result, char *str, size_t len, PCRE2_SI
1008
1029
}
1009
1030
/* }}} */
1010
1031
1011
- static inline void populate_match_value (
1012
- zval * val , char * subject , PCRE2_SIZE start_offset , PCRE2_SIZE end_offset ,
1013
- uint32_t unmatched_as_null ) {
1014
- if (PCRE2_UNSET == start_offset ) {
1015
- if (unmatched_as_null ) {
1016
- ZVAL_NULL (val );
1017
- } else {
1018
- ZVAL_EMPTY_STRING (val );
1019
- }
1020
- } else if (start_offset == end_offset ) {
1021
- ZVAL_EMPTY_STRING (val );
1022
- } else if (start_offset + 1 == end_offset ) {
1023
- ZVAL_INTERNED_STR (val , ZSTR_CHAR ((unsigned char ) subject [start_offset ]));
1024
- } else {
1025
- ZVAL_STRINGL (val , subject + start_offset , end_offset - start_offset );
1026
- }
1027
- }
1028
-
1029
1032
static void populate_subpat_array (
1030
1033
zval * subpats , char * subject , PCRE2_SIZE * offsets , zend_string * * subpat_names ,
1031
- int count , const PCRE2_SPTR mark , zend_long flags ) {
1034
+ uint32_t num_subpats , int count , const PCRE2_SPTR mark , zend_long flags ) {
1032
1035
zend_bool offset_capture = (flags & PREG_OFFSET_CAPTURE ) != 0 ;
1033
1036
zend_bool unmatched_as_null = (flags & PREG_UNMATCHED_AS_NULL ) != 0 ;
1034
1037
zval val ;
1035
1038
int i ;
1036
1039
if (subpat_names ) {
1037
1040
if (offset_capture ) {
1038
1041
for (i = 0 ; i < count ; i ++ ) {
1039
- add_offset_pair (subpats , subject + offsets [i <<1 ],
1040
- offsets [(i <<1 )+ 1 ] - offsets [i <<1 ],
1041
- offsets [i <<1 ], subpat_names [i ], unmatched_as_null );
1042
+ add_offset_pair (
1043
+ subpats , subject , offsets [2 * i ], offsets [2 * i + 1 ],
1044
+ subpat_names [i ], unmatched_as_null );
1045
+ }
1046
+ if (unmatched_as_null ) {
1047
+ for (i = count ; i < num_subpats ; i ++ ) {
1048
+ add_offset_pair (subpats , NULL , PCRE2_UNSET , PCRE2_UNSET , subpat_names [i ], 1 );
1049
+ }
1042
1050
}
1043
1051
} else {
1044
1052
for (i = 0 ; i < count ; i ++ ) {
@@ -1050,20 +1058,38 @@ static void populate_subpat_array(
1050
1058
}
1051
1059
zend_hash_next_index_insert (Z_ARRVAL_P (subpats ), & val );
1052
1060
}
1061
+ if (unmatched_as_null ) {
1062
+ for (i = count ; i < num_subpats ; i ++ ) {
1063
+ ZVAL_NULL (& val );
1064
+ if (subpat_names [i ]) {
1065
+ zend_hash_update (Z_ARRVAL_P (subpats ), subpat_names [i ], & val );
1066
+ }
1067
+ zend_hash_next_index_insert (Z_ARRVAL_P (subpats ), & val );
1068
+ }
1069
+ }
1053
1070
}
1054
1071
} else {
1055
1072
if (offset_capture ) {
1056
1073
for (i = 0 ; i < count ; i ++ ) {
1057
- add_offset_pair (subpats , subject + offsets [i <<1 ],
1058
- offsets [(i <<1 )+ 1 ] - offsets [i <<1 ],
1059
- offsets [i <<1 ], NULL , unmatched_as_null );
1074
+ add_offset_pair (
1075
+ subpats , subject , offsets [2 * i ], offsets [2 * i + 1 ], NULL , unmatched_as_null );
1076
+ }
1077
+ if (unmatched_as_null ) {
1078
+ for (i = count ; i < num_subpats ; i ++ ) {
1079
+ add_offset_pair (subpats , NULL , PCRE2_UNSET , PCRE2_UNSET , NULL , 1 );
1080
+ }
1060
1081
}
1061
1082
} else {
1062
1083
for (i = 0 ; i < count ; i ++ ) {
1063
1084
populate_match_value (
1064
1085
& val , subject , offsets [2 * i ], offsets [2 * i + 1 ], unmatched_as_null );
1065
1086
zend_hash_next_index_insert (Z_ARRVAL_P (subpats ), & val );
1066
1087
}
1088
+ if (unmatched_as_null ) {
1089
+ for (i = count ; i < num_subpats ; i ++ ) {
1090
+ add_next_index_null (subpats );
1091
+ }
1092
+ }
1067
1093
}
1068
1094
}
1069
1095
/* Add MARK, if available */
@@ -1264,8 +1290,9 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
1264
1290
/* For each subpattern, insert it into the appropriate array. */
1265
1291
if (offset_capture ) {
1266
1292
for (i = 0 ; i < count ; i ++ ) {
1267
- add_offset_pair (& match_sets [i ], subject + offsets [i <<1 ],
1268
- offsets [(i <<1 )+ 1 ] - offsets [i <<1 ], offsets [i <<1 ], NULL , unmatched_as_null );
1293
+ add_offset_pair (
1294
+ & match_sets [i ], subject , offsets [2 * i ], offsets [2 * i + 1 ],
1295
+ NULL , unmatched_as_null );
1269
1296
}
1270
1297
} else {
1271
1298
for (i = 0 ; i < count ; i ++ ) {
@@ -1292,7 +1319,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
1292
1319
for (; i < num_subpats ; i ++ ) {
1293
1320
if (offset_capture ) {
1294
1321
add_offset_pair (
1295
- & match_sets [i ], NULL , 0 , PCRE2_UNSET ,
1322
+ & match_sets [i ], NULL , PCRE2_UNSET , PCRE2_UNSET ,
1296
1323
NULL , unmatched_as_null );
1297
1324
} else if (unmatched_as_null ) {
1298
1325
add_next_index_null (& match_sets [i ]);
@@ -1306,15 +1333,16 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
1306
1333
array_init_size (& result_set , count + (mark ? 1 : 0 ));
1307
1334
mark = pcre2_get_mark (match_data );
1308
1335
populate_subpat_array (
1309
- & result_set , subject , offsets , subpat_names , count , mark , flags );
1336
+ & result_set , subject , offsets , subpat_names ,
1337
+ num_subpats , count , mark , flags );
1310
1338
/* And add it to the output array */
1311
1339
zend_hash_next_index_insert (Z_ARRVAL_P (subpats ), & result_set );
1312
1340
}
1313
1341
} else { /* single pattern matching */
1314
1342
/* For each subpattern, insert it into the subpatterns array. */
1315
1343
mark = pcre2_get_mark (match_data );
1316
1344
populate_subpat_array (
1317
- subpats , subject , offsets , subpat_names , count , mark , flags );
1345
+ subpats , subject , offsets , subpat_names , num_subpats , count , mark , flags );
1318
1346
break ;
1319
1347
}
1320
1348
}
@@ -1473,14 +1501,14 @@ static int preg_get_backref(char **str, int *backref)
1473
1501
1474
1502
/* {{{ preg_do_repl_func
1475
1503
*/
1476
- static zend_string * preg_do_repl_func (zend_fcall_info * fci , zend_fcall_info_cache * fcc , char * subject , PCRE2_SIZE * offsets , zend_string * * subpat_names , int count , const PCRE2_SPTR mark , zend_long flags )
1504
+ static zend_string * preg_do_repl_func (zend_fcall_info * fci , zend_fcall_info_cache * fcc , char * subject , PCRE2_SIZE * offsets , zend_string * * subpat_names , uint32_t num_subpats , int count , const PCRE2_SPTR mark , zend_long flags )
1477
1505
{
1478
1506
zend_string * result_str ;
1479
1507
zval retval ; /* Function return value */
1480
1508
zval arg ; /* Argument to pass to function */
1481
1509
1482
1510
array_init_size (& arg , count + (mark ? 1 : 0 ));
1483
- populate_subpat_array (& arg , subject , offsets , subpat_names , count , mark , flags );
1511
+ populate_subpat_array (& arg , subject , offsets , subpat_names , num_subpats , count , mark , flags );
1484
1512
1485
1513
fci -> retval = & retval ;
1486
1514
fci -> param_count = 1 ;
@@ -1878,7 +1906,8 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin
1878
1906
new_len = result_len + offsets [0 ] - start_offset ; /* part before the match */
1879
1907
1880
1908
/* Use custom function to get replacement string and its length. */
1881
- eval_result = preg_do_repl_func (fci , fcc , subject , offsets , subpat_names , count ,
1909
+ eval_result = preg_do_repl_func (
1910
+ fci , fcc , subject , offsets , subpat_names , num_subpats , count ,
1882
1911
pcre2_get_mark (match_data ), flags );
1883
1912
1884
1913
ZEND_ASSERT (eval_result );
@@ -2464,14 +2493,14 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
2464
2493
uint32_t options ; /* Execution options */
2465
2494
int count ; /* Count of matched subpatterns */
2466
2495
PCRE2_SIZE start_offset ; /* Where the new search starts */
2467
- PCRE2_SIZE next_offset ; /* End of the last delimiter match + 1 */
2468
- char * last_match ; /* Location of last match */
2496
+ PCRE2_SIZE last_match_offset ; /* Location of last match */
2469
2497
uint32_t no_empty ; /* If NO_EMPTY flag is set */
2470
2498
uint32_t delim_capture ; /* If delimiters should be captured */
2471
2499
uint32_t offset_capture ; /* If offsets should be captured */
2472
2500
uint32_t num_subpats ; /* Number of captured subpatterns */
2473
2501
zval tmp ;
2474
2502
pcre2_match_data * match_data ;
2503
+ char * subject = ZSTR_VAL (subject_str );
2475
2504
2476
2505
no_empty = flags & PREG_SPLIT_NO_EMPTY ;
2477
2506
delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE ;
@@ -2485,11 +2514,9 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
2485
2514
2486
2515
/* Start at the beginning of the string */
2487
2516
start_offset = 0 ;
2488
- next_offset = 0 ;
2489
- last_match = ZSTR_VAL (subject_str );
2517
+ last_match_offset = 0 ;
2490
2518
PCRE_G (error_code ) = PHP_PCRE_NO_ERROR ;
2491
2519
2492
-
2493
2520
if (limit_val == -1 ) {
2494
2521
/* pass */
2495
2522
} else if (limit_val == 0 ) {
@@ -2513,11 +2540,11 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
2513
2540
2514
2541
#ifdef HAVE_PCRE_JIT_SUPPORT
2515
2542
if ((pce -> preg_options & PREG_JIT ) && options ) {
2516
- count = pcre2_jit_match (pce -> re , (PCRE2_SPTR )ZSTR_VAL ( subject_str ) , ZSTR_LEN (subject_str ), start_offset ,
2543
+ count = pcre2_jit_match (pce -> re , (PCRE2_SPTR )subject , ZSTR_LEN (subject_str ), start_offset ,
2517
2544
PCRE2_NO_UTF_CHECK , match_data , mctx );
2518
2545
} else
2519
2546
#endif
2520
- count = pcre2_match (pce -> re , (PCRE2_SPTR )ZSTR_VAL ( subject_str ) , ZSTR_LEN (subject_str ), start_offset ,
2547
+ count = pcre2_match (pce -> re , (PCRE2_SPTR )subject , ZSTR_LEN (subject_str ), start_offset ,
2521
2548
options , match_data , mctx );
2522
2549
2523
2550
while (1 ) {
@@ -2537,14 +2564,15 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
2537
2564
break ;
2538
2565
}
2539
2566
2540
- if (!no_empty || & ZSTR_VAL (subject_str )[offsets [0 ]] != last_match ) {
2541
-
2567
+ if (!no_empty || offsets [0 ] != last_match_offset ) {
2542
2568
if (offset_capture ) {
2543
2569
/* Add (match, offset) pair to the return value */
2544
- add_offset_pair (return_value , last_match , (& ZSTR_VAL (subject_str )[offsets [0 ]]- last_match ), next_offset , NULL , 0 );
2570
+ add_offset_pair (
2571
+ return_value , subject , last_match_offset , offsets [0 ],
2572
+ NULL , 0 );
2545
2573
} else {
2546
2574
/* Add the piece to the return value */
2547
- ZVAL_STRINGL (& tmp , last_match , & ZSTR_VAL ( subject_str )[ offsets [0 ]] - last_match );
2575
+ populate_match_value_str (& tmp , subject , last_match_offset , offsets [0 ]);
2548
2576
zend_hash_next_index_insert_new (Z_ARRVAL_P (return_value ), & tmp );
2549
2577
}
2550
2578
@@ -2553,34 +2581,31 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
2553
2581
limit_val -- ;
2554
2582
}
2555
2583
2556
- last_match = & ZSTR_VAL (subject_str )[offsets [1 ]];
2557
- next_offset = offsets [1 ];
2558
-
2559
2584
if (delim_capture ) {
2560
- size_t i , match_len ;
2585
+ size_t i ;
2561
2586
for (i = 1 ; i < count ; i ++ ) {
2562
- match_len = offsets [(i <<1 )+ 1 ] - offsets [i <<1 ];
2563
2587
/* If we have matched a delimiter */
2564
- if (!no_empty || match_len > 0 ) {
2588
+ if (!no_empty || offsets [ 2 * i ] != offsets [ 2 * i + 1 ] ) {
2565
2589
if (offset_capture ) {
2566
- add_offset_pair (return_value , & ZSTR_VAL (subject_str )[offsets [i <<1 ]], match_len , offsets [i <<1 ], NULL , 0 );
2590
+ add_offset_pair (
2591
+ return_value , subject , offsets [2 * i ], offsets [2 * i + 1 ], NULL , 0 );
2567
2592
} else {
2568
- ZVAL_STRINGL (& tmp , & ZSTR_VAL ( subject_str )[ offsets [i << 1 ]], match_len );
2593
+ populate_match_value_str (& tmp , subject , offsets [2 * i ], offsets [ 2 * i + 1 ] );
2569
2594
zend_hash_next_index_insert_new (Z_ARRVAL_P (return_value ), & tmp );
2570
2595
}
2571
2596
}
2572
2597
}
2573
2598
}
2574
2599
2575
2600
/* Advance to the position right after the last full match */
2576
- start_offset = offsets [1 ];
2601
+ start_offset = last_match_offset = offsets [1 ];
2577
2602
2578
2603
/* If we have matched an empty string, mimic what Perl's /g options does.
2579
2604
This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
2580
2605
the match again at the same point. If this fails (picked up above) we
2581
2606
advance to the next character. */
2582
2607
if (start_offset == offsets [0 ]) {
2583
- count = pcre2_match (pce -> re , (PCRE2_SPTR )ZSTR_VAL ( subject_str ) , ZSTR_LEN (subject_str ), start_offset ,
2608
+ count = pcre2_match (pce -> re , (PCRE2_SPTR )subject , ZSTR_LEN (subject_str ), start_offset ,
2584
2609
PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED , match_data , mctx );
2585
2610
if (count >= 0 ) {
2586
2611
goto matched ;
@@ -2590,7 +2615,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
2590
2615
the start offset, and continue. Fudge the offset values
2591
2616
to achieve this, unless we're already at the end of the string. */
2592
2617
if (start_offset < ZSTR_LEN (subject_str )) {
2593
- start_offset += calculate_unit_length (pce , ZSTR_VAL ( subject_str ) + start_offset );
2618
+ start_offset += calculate_unit_length (pce , subject + start_offset );
2594
2619
} else {
2595
2620
break ;
2596
2621
}
@@ -2614,11 +2639,11 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
2614
2639
2615
2640
#ifdef HAVE_PCRE_JIT_SUPPORT
2616
2641
if (pce -> preg_options & PREG_JIT ) {
2617
- count = pcre2_jit_match (pce -> re , (PCRE2_SPTR )ZSTR_VAL ( subject_str ) , ZSTR_LEN (subject_str ), start_offset ,
2642
+ count = pcre2_jit_match (pce -> re , (PCRE2_SPTR )subject , ZSTR_LEN (subject_str ), start_offset ,
2618
2643
PCRE2_NO_UTF_CHECK , match_data , mctx );
2619
2644
} else
2620
2645
#endif
2621
- count = pcre2_match (pce -> re , (PCRE2_SPTR )ZSTR_VAL ( subject_str ) , ZSTR_LEN (subject_str ), start_offset ,
2646
+ count = pcre2_match (pce -> re , (PCRE2_SPTR )subject , ZSTR_LEN (subject_str ), start_offset ,
2622
2647
PCRE2_NO_UTF_CHECK , match_data , mctx );
2623
2648
}
2624
2649
if (match_data != mdata ) {
@@ -2631,18 +2656,18 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
2631
2656
}
2632
2657
2633
2658
last :
2634
- start_offset = ( last_match - ZSTR_VAL ( subject_str )) ; /* the offset might have been incremented, but without further successful matches */
2659
+ start_offset = last_match_offset ; /* the offset might have been incremented, but without further successful matches */
2635
2660
2636
2661
if (!no_empty || start_offset < ZSTR_LEN (subject_str )) {
2637
2662
if (offset_capture ) {
2638
2663
/* Add the last (match, offset) pair to the return value */
2639
- add_offset_pair (return_value , & ZSTR_VAL ( subject_str )[ start_offset ] , ZSTR_LEN (subject_str ) - start_offset , start_offset , NULL , 0 );
2664
+ add_offset_pair (return_value , subject , start_offset , ZSTR_LEN (subject_str ), NULL , 0 );
2640
2665
} else {
2641
2666
/* Add the last piece to the return value */
2642
- if (last_match == ZSTR_VAL ( subject_str ) ) {
2667
+ if (start_offset == 0 ) {
2643
2668
ZVAL_STR_COPY (& tmp , subject_str );
2644
2669
} else {
2645
- ZVAL_STRINGL (& tmp , last_match , ZSTR_VAL ( subject_str ) + ZSTR_LEN (subject_str ) - last_match );
2670
+ populate_match_value_str (& tmp , subject , start_offset , ZSTR_LEN (subject_str ));
2646
2671
}
2647
2672
zend_hash_next_index_insert_new (Z_ARRVAL_P (return_value ), & tmp );
2648
2673
}
0 commit comments