@@ -233,6 +233,25 @@ static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce)
233
233
}
234
234
/* }}} */
235
235
236
+ /* {{{ static calculate_unit_length */
237
+ /* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE_UTF8. */
238
+ static zend_always_inline int calculate_unit_length (pcre_cache_entry * pce , char * start )
239
+ {
240
+ int unit_len ;
241
+
242
+ if (pce -> compile_options & PCRE_UTF8 ) {
243
+ char * end = start ;
244
+
245
+ /* skip continuation bytes */
246
+ while ((* ++ end & 0xC0 ) == 0x80 );
247
+ unit_len = end - start ;
248
+ } else {
249
+ unit_len = 1 ;
250
+ }
251
+ return unit_len ;
252
+ }
253
+ /* }}} */
254
+
236
255
/* {{{ pcre_get_compiled_regex_cache
237
256
*/
238
257
PHPAPI pcre_cache_entry * pcre_get_compiled_regex_cache (zend_string * regex )
@@ -854,8 +873,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
854
873
the start offset, and continue. Fudge the offset values
855
874
to achieve this, unless we're already at the end of the string. */
856
875
if (g_notempty != 0 && start_offset < subject_len ) {
876
+ int unit_len = calculate_unit_length (pce , subject + start_offset );
877
+
857
878
offsets [0 ] = (int )start_offset ;
858
- offsets [1 ] = (int )(start_offset + 1 );
879
+ offsets [1 ] = (int )(start_offset + unit_len );
859
880
} else
860
881
break ;
861
882
} else {
@@ -1247,10 +1268,12 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su
1247
1268
the start offset, and continue. Fudge the offset values
1248
1269
to achieve this, unless we're already at the end of the string. */
1249
1270
if (g_notempty != 0 && start_offset < subject_len ) {
1271
+ int unit_len = calculate_unit_length (pce , piece );
1272
+
1250
1273
offsets [0 ] = start_offset ;
1251
- offsets [1 ] = start_offset + 1 ;
1252
- memcpy (& result -> val [result_len ], piece , 1 );
1253
- result_len ++ ;
1274
+ offsets [1 ] = start_offset + unit_len ;
1275
+ memcpy (& result -> val [result_len ], piece , unit_len );
1276
+ result_len += unit_len ;
1254
1277
} else {
1255
1278
if (!result && subject_str ) {
1256
1279
result = zend_string_copy (subject_str );
0 commit comments