Skip to content

Commit 54b6ac2

Browse files
committed
Promote some warnings in MBString Regex
1 parent 66f2ebe commit 54b6ac2

File tree

11 files changed

+209
-206
lines changed

11 files changed

+209
-206
lines changed

ext/mbstring/php_mbregex.c

Lines changed: 65 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "php_mbregex.h"
2929
#include "mbstring.h"
3030
#include "libmbfl/filters/mbfilter_utf8.h"
31+
#include <stdbool.h>
3132

3233
#include "php_onig_compat.h" /* must come prior to the oniguruma header */
3334
#include <oniguruma.h>
@@ -600,8 +601,8 @@ static size_t _php_mb_regex_get_option_string(char *str, size_t len, OnigOptionT
600601
/* }}} */
601602

602603
/* {{{ _php_mb_regex_init_options */
603-
static void
604-
_php_mb_regex_init_options(const char *parg, size_t narg, OnigOptionType *option, OnigSyntaxType **syntax, int *eval)
604+
static bool _php_mb_regex_init_options(const char *parg, size_t narg, OnigOptionType *option,
605+
OnigSyntaxType **syntax, uint32_t option_arg_num)
605606
{
606607
size_t n;
607608
char c;
@@ -660,14 +661,16 @@ _php_mb_regex_init_options(const char *parg, size_t narg, OnigOptionType *option
660661
*syntax = ONIG_SYNTAX_POSIX_EXTENDED;
661662
break;
662663
case 'e':
663-
if (eval != NULL) *eval = 1;
664-
break;
664+
zend_argument_value_error(option_arg_num, "option 'e' is not supported");
665+
return false;
665666
default:
667+
// TODO Unsupported ValueError
666668
break;
667669
}
668670
}
669671
if (option != NULL) *option|=optm;
670672
}
673+
return true;
671674
}
672675
/* }}} */
673676

@@ -908,6 +911,11 @@ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
908911
RETURN_THROWS();
909912
}
910913

914+
if (arg_pattern_len == 0) {
915+
zend_argument_value_error(1, "must not be empty");
916+
RETURN_THROWS();
917+
}
918+
911919
if (array != NULL) {
912920
array = zend_try_array_init(array);
913921
if (!array) {
@@ -920,20 +928,15 @@ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
920928
string_len,
921929
php_mb_regex_get_mbctype_encoding()
922930
)) {
923-
RETURN_FALSE;
931+
zend_argument_value_error(2, "must be a valid string in '%s'", php_mb_regex_get_mbctype());
932+
RETURN_THROWS();
924933
}
925934

926935
options = MBREX(regex_default_options);
927936
if (icase) {
928937
options |= ONIG_OPTION_IGNORECASE;
929938
}
930939

931-
if (arg_pattern_len == 0) {
932-
php_error_docref(NULL, E_WARNING, "Empty pattern");
933-
RETVAL_FALSE;
934-
goto out;
935-
}
936-
937940
re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBREX(regex_default_syntax));
938941
if (re == NULL) {
939942
RETVAL_FALSE;
@@ -1017,15 +1020,14 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp
10171020
smart_str out_buf = {0};
10181021
smart_str eval_buf = {0};
10191022
smart_str *pbuf;
1020-
int err, eval, n;
1023+
int err, n;
10211024
OnigUChar *pos;
10221025
OnigUChar *string_lim;
10231026
char *description = NULL;
10241027

10251028
const mbfl_encoding *enc = php_mb_regex_get_mbctype_encoding();
10261029
ZEND_ASSERT(enc != NULL);
10271030

1028-
eval = 0;
10291031
{
10301032
char *option_str = NULL;
10311033
size_t option_str_len = 0;
@@ -1049,42 +1051,36 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp
10491051
}
10501052

10511053
if (!php_mb_check_encoding(string, string_len, enc)) {
1052-
RETURN_NULL();
1054+
zend_argument_value_error(3, "must be a valid string in '%s'", php_mb_regex_get_mbctype());
1055+
RETURN_THROWS();
10531056
}
10541057

10551058
if (option_str != NULL) {
1056-
_php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, &eval);
1059+
/* Initialize option and in case of failure it means there is a value error */
1060+
if(!_php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, 4)) {
1061+
RETURN_THROWS();
1062+
}
10571063
} else {
10581064
options |= MBREX(regex_default_options);
10591065
syntax = MBREX(regex_default_syntax);
10601066
}
10611067
}
1062-
if (eval && !is_callable) {
1063-
php_error_docref(NULL, E_WARNING, "The 'e' option is no longer supported, use mb_ereg_replace_callback instead");
1064-
RETURN_FALSE;
1065-
}
10661068

10671069
/* create regex pattern buffer */
10681070
re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, syntax);
10691071
if (re == NULL) {
1072+
// Should this be considered an error instead?
10701073
RETURN_FALSE;
10711074
}
10721075

1073-
if (eval || is_callable) {
1076+
if (is_callable) {
10741077
pbuf = &eval_buf;
10751078
description = zend_make_compiled_string_description("mbregex replace");
10761079
} else {
10771080
pbuf = &out_buf;
10781081
description = NULL;
10791082
}
10801083

1081-
if (is_callable) {
1082-
if (eval) {
1083-
php_error_docref(NULL, E_WARNING, "Option 'e' cannot be used with replacement callback");
1084-
RETURN_FALSE;
1085-
}
1086-
}
1087-
10881084
/* do the actual work */
10891085
err = 0;
10901086
pos = (OnigUChar *)string;
@@ -1106,35 +1102,7 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp
11061102
mb_regex_substitute(pbuf, string, string_len, replace, replace_len, re, regs, enc);
11071103
}
11081104

1109-
if (eval) {
1110-
zval v;
1111-
zend_string *eval_str;
1112-
/* null terminate buffer */
1113-
smart_str_0(&eval_buf);
1114-
1115-
if (eval_buf.s) {
1116-
eval_str = eval_buf.s;
1117-
} else {
1118-
eval_str = ZSTR_EMPTY_ALLOC();
1119-
}
1120-
1121-
/* do eval */
1122-
if (zend_eval_stringl(ZSTR_VAL(eval_str), ZSTR_LEN(eval_str), &v, description) == FAILURE) {
1123-
efree(description);
1124-
zend_throw_error(NULL, "Failed evaluating code: %s%s", PHP_EOL, ZSTR_VAL(eval_str));
1125-
onig_region_free(regs, 1);
1126-
smart_str_free(&out_buf);
1127-
smart_str_free(&eval_buf);
1128-
RETURN_FALSE;
1129-
}
1130-
1131-
/* result of eval */
1132-
convert_to_string(&v);
1133-
smart_str_appendl(&out_buf, Z_STRVAL(v), Z_STRLEN(v));
1134-
/* Clean up */
1135-
smart_str_free(&eval_buf);
1136-
zval_ptr_dtor_str(&v);
1137-
} else if (is_callable) {
1105+
if (is_callable) {
11381106
zval args[1];
11391107
zval subpats, retval;
11401108
int i;
@@ -1163,7 +1131,9 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp
11631131
zval_ptr_dtor(&retval);
11641132
} else {
11651133
if (!EG(exception)) {
1166-
php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
1134+
zend_throw_error(NULL, "Unable to call custom replacement function");
1135+
zval_ptr_dtor(&subpats);
1136+
RETURN_THROWS();
11671137
}
11681138
}
11691139
zval_ptr_dtor(&subpats);
@@ -1195,6 +1165,7 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp
11951165
}
11961166
smart_str_free(&eval_buf);
11971167

1168+
// Need to investigate if failure in Oniguruma and if should throw.
11981169
if (err <= -2) {
11991170
smart_str_free(&out_buf);
12001171
RETVAL_FALSE;
@@ -1255,11 +1226,13 @@ PHP_FUNCTION(mb_split)
12551226
}
12561227

12571228
if (!php_mb_check_encoding(string, string_len, php_mb_regex_get_mbctype_encoding())) {
1258-
RETURN_FALSE;
1229+
zend_argument_value_error(2, "must be a valid string in '%s'", php_mb_regex_get_mbctype());
1230+
RETURN_THROWS();
12591231
}
12601232

12611233
/* create regex pattern buffer */
12621234
if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, MBREX(regex_default_options), MBREX(regex_default_syntax))) == NULL) {
1235+
// TODO throw as invalid regex?
12631236
RETURN_FALSE;
12641237
}
12651238

@@ -1296,6 +1269,7 @@ PHP_FUNCTION(mb_split)
12961269
onig_region_free(regs, 1);
12971270

12981271
/* see if we encountered an error */
1272+
// ToDo investigate if this can actually/should happen ...
12991273
if (err <= -2) {
13001274
OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
13011275
onig_error_code_to_str(err_str, err);
@@ -1341,18 +1315,22 @@ PHP_FUNCTION(mb_ereg_match)
13411315
}
13421316

13431317
if (option_str != NULL) {
1344-
_php_mb_regex_init_options(option_str, option_str_len, &option, &syntax, NULL);
1318+
if(!_php_mb_regex_init_options(option_str, option_str_len, &option, &syntax, 3)) {
1319+
RETURN_THROWS();
1320+
}
13451321
} else {
13461322
option |= MBREX(regex_default_options);
13471323
syntax = MBREX(regex_default_syntax);
13481324
}
13491325
}
13501326

13511327
if (!php_mb_check_encoding(string, string_len, php_mb_regex_get_mbctype_encoding())) {
1352-
RETURN_FALSE;
1328+
zend_argument_value_error(2, "must be a valid string in '%s'", php_mb_regex_get_mbctype());
1329+
RETURN_THROWS();
13531330
}
13541331

13551332
if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, syntax)) == NULL) {
1333+
// TODO throw as invalid regex?
13561334
RETURN_FALSE;
13571335
}
13581336

@@ -1398,7 +1376,9 @@ _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
13981376

13991377
if (arg_options) {
14001378
option = 0;
1401-
_php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
1379+
if(!_php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, 2)) {
1380+
RETURN_THROWS();
1381+
}
14021382
}
14031383

14041384
if (MBREX(search_regs)) {
@@ -1409,6 +1389,7 @@ _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
14091389
if (arg_pattern) {
14101390
/* create regex pattern buffer */
14111391
if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(regex_default_syntax))) == NULL) {
1392+
// TODO throw as invalid regex?
14121393
RETURN_FALSE;
14131394
}
14141395
}
@@ -1422,13 +1403,13 @@ _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
14221403
}
14231404

14241405
if (MBREX(search_re) == NULL) {
1425-
php_error_docref(NULL, E_WARNING, "No regex given");
1426-
RETURN_FALSE;
1406+
zend_throw_error(NULL, "No pattern was provided");
1407+
RETURN_THROWS();
14271408
}
14281409

14291410
if (str == NULL) {
1430-
php_error_docref(NULL, E_WARNING, "No string given");
1431-
RETURN_FALSE;
1411+
zend_throw_error(NULL, "No string was provided");
1412+
RETURN_THROWS();
14321413
}
14331414

14341415
MBREX(search_regs) = onig_region_new();
@@ -1531,21 +1512,24 @@ PHP_FUNCTION(mb_ereg_search_init)
15311512
}
15321513

15331514
if (ZEND_NUM_ARGS() > 1 && arg_pattern_len == 0) {
1534-
php_error_docref(NULL, E_WARNING, "Empty pattern");
1535-
RETURN_FALSE;
1515+
zend_argument_value_error(2, "must not be empty");
1516+
RETURN_THROWS();
15361517
}
15371518

15381519
option = MBREX(regex_default_options);
15391520
syntax = MBREX(regex_default_syntax);
15401521

15411522
if (ZEND_NUM_ARGS() == 3) {
15421523
option = 0;
1543-
_php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
1524+
if(!_php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, 3)) {
1525+
RETURN_THROWS();
1526+
}
15441527
}
15451528

15461529
if (ZEND_NUM_ARGS() > 1) {
15471530
/* create regex pattern buffer */
15481531
if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, syntax)) == NULL) {
1532+
// TODO throw as invalid regex?
15491533
RETURN_FALSE;
15501534
}
15511535
}
@@ -1556,18 +1540,15 @@ PHP_FUNCTION(mb_ereg_search_init)
15561540

15571541
ZVAL_STR_COPY(&MBREX(search_str), arg_str);
15581542

1559-
if (php_mb_check_encoding(
1560-
ZSTR_VAL(arg_str),
1561-
ZSTR_LEN(arg_str),
1562-
php_mb_regex_get_mbctype_encoding()
1563-
)) {
1564-
MBREX(search_pos) = 0;
1565-
RETVAL_TRUE;
1566-
} else {
1543+
if (!php_mb_check_encoding(ZSTR_VAL(arg_str), ZSTR_LEN(arg_str), php_mb_regex_get_mbctype_encoding())) {
15671544
MBREX(search_pos) = ZSTR_LEN(arg_str);
1568-
RETVAL_FALSE;
1545+
zend_argument_value_error(1, "must be a valid string in '%s'", php_mb_regex_get_mbctype());
1546+
RETURN_THROWS();
15691547
}
15701548

1549+
MBREX(search_pos) = 0;
1550+
RETVAL_TRUE;
1551+
15711552
if (MBREX(search_regs) != NULL) {
15721553
onig_region_free(MBREX(search_regs), 1);
15731554
MBREX(search_regs) = NULL;
@@ -1613,6 +1594,7 @@ PHP_FUNCTION(mb_ereg_search_getregs)
16131594
onig_foreach_name(MBREX(search_re), mb_regex_groups_iter, &args);
16141595
}
16151596
} else {
1597+
// TODO This seems to be some logical error, promote to Error
16161598
RETVAL_FALSE;
16171599
}
16181600
}
@@ -1646,12 +1628,12 @@ PHP_FUNCTION(mb_ereg_search_setpos)
16461628
}
16471629

16481630
if (position < 0 || (!Z_ISUNDEF(MBREX(search_str)) && Z_TYPE(MBREX(search_str)) == IS_STRING && (size_t)position > Z_STRLEN(MBREX(search_str)))) {
1649-
php_error_docref(NULL, E_WARNING, "Position is out of range");
1650-
MBREX(search_pos) = 0;
1651-
RETURN_FALSE;
1631+
zend_argument_value_error(1, "is out of range");
1632+
RETURN_THROWS();
16521633
}
16531634

16541635
MBREX(search_pos) = position;
1636+
// TODO Return void
16551637
RETURN_TRUE;
16561638
}
16571639
/* }}} */
@@ -1687,7 +1669,9 @@ PHP_FUNCTION(mb_regex_set_options)
16871669
if (string != NULL) {
16881670
opt = 0;
16891671
syntax = NULL;
1690-
_php_mb_regex_init_options(string, string_len, &opt, &syntax, NULL);
1672+
if(!_php_mb_regex_init_options(string, string_len, &opt, &syntax, 1)) {
1673+
RETURN_THROWS();
1674+
}
16911675
_php_mb_regex_set_options(opt, syntax, &prev_opt, &prev_syntax);
16921676
opt = prev_opt;
16931677
syntax = prev_syntax;

0 commit comments

Comments
 (0)