@@ -225,6 +225,25 @@ static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce TSRMLS_D
225
225
}
226
226
/* }}} */
227
227
228
+ /* {{{ static calculate_unit_length */
229
+ /* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE_UTF8. */
230
+ static zend_always_inline int calculate_unit_length (pcre_cache_entry * pce , char * start )
231
+ {
232
+ int unit_len ;
233
+
234
+ if (pce -> compile_options & PCRE_UTF8 ) {
235
+ char * end = start ;
236
+
237
+ /* skip continuation bytes */
238
+ while ((* ++ end & 0xC0 ) == 0x80 );
239
+ unit_len = end - start ;
240
+ } else {
241
+ unit_len = 1 ;
242
+ }
243
+ return unit_len ;
244
+ }
245
+ /* }}} */
246
+
228
247
/* {{{ pcre_get_compiled_regex_cache
229
248
*/
230
249
PHPAPI pcre_cache_entry * pcre_get_compiled_regex_cache (char * regex , int regex_len TSRMLS_DC )
@@ -780,8 +799,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
780
799
the start offset, and continue. Fudge the offset values
781
800
to achieve this, unless we're already at the end of the string. */
782
801
if (g_notempty != 0 && start_offset < subject_len ) {
802
+ int unit_len = calculate_unit_length (pce , subject + start_offset );
803
+
783
804
offsets [0 ] = start_offset ;
784
- offsets [1 ] = start_offset + 1 ;
805
+ offsets [1 ] = start_offset + unit_len ;
785
806
} else
786
807
break ;
787
808
} else {
@@ -1240,10 +1261,12 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub
1240
1261
the start offset, and continue. Fudge the offset values
1241
1262
to achieve this, unless we're already at the end of the string. */
1242
1263
if (g_notempty != 0 && start_offset < subject_len ) {
1264
+ int unit_len = calculate_unit_length (pce , piece );
1265
+
1243
1266
offsets [0 ] = start_offset ;
1244
- offsets [1 ] = start_offset + 1 ;
1245
- memcpy (& result [* result_len ], piece , 1 );
1246
- ( * result_len ) ++ ;
1267
+ offsets [1 ] = start_offset + unit_len ;
1268
+ memcpy (& result [* result_len ], piece , unit_len );
1269
+ * result_len += unit_len ;
1247
1270
} else {
1248
1271
new_len = * result_len + subject_len - start_offset ;
1249
1272
if (new_len + 1 > alloc_len ) {
0 commit comments