Skip to content

Commit 2aa6653

Browse files
committed
Promote some warnings in MBString Regex
1 parent 53e0331 commit 2aa6653

File tree

11 files changed

+207
-173
lines changed

11 files changed

+207
-173
lines changed

ext/mbstring/php_mbregex.c

Lines changed: 63 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "php_mbregex.h"
2929
#include "mbstring.h"
3030
#include "libmbfl/filters/mbfilter_utf8.h"
31+
#include <stdbool.h>
3132

3233
#include "php_onig_compat.h" /* must come prior to the oniguruma header */
3334
#include <oniguruma.h>
@@ -600,8 +601,8 @@ static size_t _php_mb_regex_get_option_string(char *str, size_t len, OnigOptionT
600601
/* }}} */
601602

602603
/* {{{ _php_mb_regex_init_options */
603-
static void
604-
_php_mb_regex_init_options(const char *parg, size_t narg, OnigOptionType *option, OnigSyntaxType **syntax, int *eval)
604+
static bool _php_mb_regex_init_options(const char *parg, size_t narg, OnigOptionType *option,
605+
OnigSyntaxType **syntax, uint32_t option_arg_num)
605606
{
606607
size_t n;
607608
char c;
@@ -660,14 +661,16 @@ _php_mb_regex_init_options(const char *parg, size_t narg, OnigOptionType *option
660661
*syntax = ONIG_SYNTAX_POSIX_EXTENDED;
661662
break;
662663
case 'e':
663-
if (eval != NULL) *eval = 1;
664-
break;
664+
zend_argument_value_error(option_arg_num, "option 'e' is not supported");
665+
return false;
665666
default:
667+
// TODO Unsupported ValueError
666668
break;
667669
}
668670
}
669671
if (option != NULL) *option|=optm;
670672
}
673+
return true;
671674
}
672675
/* }}} */
673676

@@ -910,6 +913,11 @@ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
910913
RETURN_THROWS();
911914
}
912915

916+
if (arg_pattern_len == 0) {
917+
zend_argument_value_error(1, "must not be empty");
918+
RETURN_THROWS();
919+
}
920+
913921
if (array != NULL) {
914922
array = zend_try_array_init(array);
915923
if (!array) {
@@ -922,20 +930,15 @@ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
922930
string_len,
923931
php_mb_regex_get_mbctype_encoding()
924932
)) {
925-
RETURN_FALSE;
933+
zend_argument_value_error(2, "must be a valid string in '%s'", php_mb_regex_get_mbctype());
934+
RETURN_THROWS();
926935
}
927936

928937
options = MBREX(regex_default_options);
929938
if (icase) {
930939
options |= ONIG_OPTION_IGNORECASE;
931940
}
932941

933-
if (arg_pattern_len == 0) {
934-
php_error_docref(NULL, E_WARNING, "Empty pattern");
935-
RETVAL_FALSE;
936-
goto out;
937-
}
938-
939942
re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBREX(regex_default_syntax));
940943
if (re == NULL) {
941944
RETVAL_FALSE;
@@ -1019,15 +1022,14 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp
10191022
smart_str out_buf = {0};
10201023
smart_str eval_buf = {0};
10211024
smart_str *pbuf;
1022-
int err, eval, n;
1025+
int err, n;
10231026
OnigUChar *pos;
10241027
OnigUChar *string_lim;
10251028
char *description = NULL;
10261029

10271030
const mbfl_encoding *enc = php_mb_regex_get_mbctype_encoding();
10281031
ZEND_ASSERT(enc != NULL);
10291032

1030-
eval = 0;
10311033
{
10321034
char *option_str = NULL;
10331035
size_t option_str_len = 0;
@@ -1051,28 +1053,25 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp
10511053
}
10521054

10531055
if (!php_mb_check_encoding(string, string_len, enc)) {
1054-
RETURN_NULL();
1056+
zend_argument_value_error(3, "must be a valid string in '%s'", php_mb_regex_get_mbctype());
1057+
RETURN_THROWS();
10551058
}
10561059

10571060
if (option_str != NULL) {
1058-
_php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, &eval);
1061+
/* Initialize option and in case of failure it means there is a value error */
1062+
if(!_php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, 4)) {
1063+
RETURN_THROWS();
1064+
}
10591065
} else {
10601066
options |= MBREX(regex_default_options);
10611067
syntax = MBREX(regex_default_syntax);
10621068
}
10631069
}
1064-
if (eval) {
1065-
if (is_callable) {
1066-
php_error_docref(NULL, E_WARNING, "Option 'e' cannot be used with replacement callback");
1067-
} else {
1068-
php_error_docref(NULL, E_WARNING, "The 'e' option is no longer supported, use mb_ereg_replace_callback instead");
1069-
}
1070-
RETURN_FALSE;
1071-
}
10721070

10731071
/* create regex pattern buffer */
10741072
re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, syntax);
10751073
if (re == NULL) {
1074+
// Should this be considered an error instead?
10761075
RETURN_FALSE;
10771076
}
10781077

@@ -1134,7 +1133,9 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp
11341133
zval_ptr_dtor(&retval);
11351134
} else {
11361135
if (!EG(exception)) {
1137-
php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
1136+
zend_throw_error(NULL, "Unable to call custom replacement function");
1137+
zval_ptr_dtor(&subpats);
1138+
RETURN_THROWS();
11381139
}
11391140
}
11401141
zval_ptr_dtor(&subpats);
@@ -1166,6 +1167,7 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp
11661167
}
11671168
smart_str_free(&eval_buf);
11681169

1170+
// Need to investigate if failure in Oniguruma and if should throw.
11691171
if (err <= -2) {
11701172
smart_str_free(&out_buf);
11711173
RETVAL_FALSE;
@@ -1226,11 +1228,13 @@ PHP_FUNCTION(mb_split)
12261228
}
12271229

12281230
if (!php_mb_check_encoding(string, string_len, php_mb_regex_get_mbctype_encoding())) {
1229-
RETURN_FALSE;
1231+
zend_argument_value_error(2, "must be a valid string in '%s'", php_mb_regex_get_mbctype());
1232+
RETURN_THROWS();
12301233
}
12311234

12321235
/* create regex pattern buffer */
12331236
if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, MBREX(regex_default_options), MBREX(regex_default_syntax))) == NULL) {
1237+
// TODO throw as invalid regex?
12341238
RETURN_FALSE;
12351239
}
12361240

@@ -1267,6 +1271,7 @@ PHP_FUNCTION(mb_split)
12671271
onig_region_free(regs, 1);
12681272

12691273
/* see if we encountered an error */
1274+
// ToDo investigate if this can actually/should happen ...
12701275
if (err <= -2) {
12711276
OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
12721277
onig_error_code_to_str(err_str, err);
@@ -1312,18 +1317,22 @@ PHP_FUNCTION(mb_ereg_match)
13121317
}
13131318

13141319
if (option_str != NULL) {
1315-
_php_mb_regex_init_options(option_str, option_str_len, &option, &syntax, NULL);
1320+
if(!_php_mb_regex_init_options(option_str, option_str_len, &option, &syntax, 3)) {
1321+
RETURN_THROWS();
1322+
}
13161323
} else {
13171324
option |= MBREX(regex_default_options);
13181325
syntax = MBREX(regex_default_syntax);
13191326
}
13201327
}
13211328

13221329
if (!php_mb_check_encoding(string, string_len, php_mb_regex_get_mbctype_encoding())) {
1323-
RETURN_FALSE;
1330+
zend_argument_value_error(2, "must be a valid string in '%s'", php_mb_regex_get_mbctype());
1331+
RETURN_THROWS();
13241332
}
13251333

13261334
if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, syntax)) == NULL) {
1335+
// TODO throw as invalid regex?
13271336
RETURN_FALSE;
13281337
}
13291338

@@ -1369,7 +1378,9 @@ _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
13691378

13701379
if (arg_options) {
13711380
option = 0;
1372-
_php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
1381+
if(!_php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, 2)) {
1382+
RETURN_THROWS();
1383+
}
13731384
}
13741385

13751386
if (MBREX(search_regs)) {
@@ -1380,6 +1391,7 @@ _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
13801391
if (arg_pattern) {
13811392
/* create regex pattern buffer */
13821393
if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(regex_default_syntax))) == NULL) {
1394+
// TODO throw as invalid regex?
13831395
RETURN_FALSE;
13841396
}
13851397
}
@@ -1393,13 +1405,13 @@ _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
13931405
}
13941406

13951407
if (MBREX(search_re) == NULL) {
1396-
php_error_docref(NULL, E_WARNING, "No regex given");
1397-
RETURN_FALSE;
1408+
zend_throw_error(NULL, "No pattern was provided");
1409+
RETURN_THROWS();
13981410
}
13991411

14001412
if (str == NULL) {
1401-
php_error_docref(NULL, E_WARNING, "No string given");
1402-
RETURN_FALSE;
1413+
zend_throw_error(NULL, "No string was provided");
1414+
RETURN_THROWS();
14031415
}
14041416

14051417
MBREX(search_regs) = onig_region_new();
@@ -1502,21 +1514,24 @@ PHP_FUNCTION(mb_ereg_search_init)
15021514
}
15031515

15041516
if (ZEND_NUM_ARGS() > 1 && arg_pattern_len == 0) {
1505-
php_error_docref(NULL, E_WARNING, "Empty pattern");
1506-
RETURN_FALSE;
1517+
zend_argument_value_error(2, "must not be empty");
1518+
RETURN_THROWS();
15071519
}
15081520

15091521
option = MBREX(regex_default_options);
15101522
syntax = MBREX(regex_default_syntax);
15111523

15121524
if (ZEND_NUM_ARGS() == 3) {
15131525
option = 0;
1514-
_php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
1526+
if(!_php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, 3)) {
1527+
RETURN_THROWS();
1528+
}
15151529
}
15161530

15171531
if (ZEND_NUM_ARGS() > 1) {
15181532
/* create regex pattern buffer */
15191533
if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, syntax)) == NULL) {
1534+
// TODO throw as invalid regex?
15201535
RETURN_FALSE;
15211536
}
15221537
}
@@ -1527,18 +1542,15 @@ PHP_FUNCTION(mb_ereg_search_init)
15271542

15281543
ZVAL_STR_COPY(&MBREX(search_str), arg_str);
15291544

1530-
if (php_mb_check_encoding(
1531-
ZSTR_VAL(arg_str),
1532-
ZSTR_LEN(arg_str),
1533-
php_mb_regex_get_mbctype_encoding()
1534-
)) {
1535-
MBREX(search_pos) = 0;
1536-
RETVAL_TRUE;
1537-
} else {
1545+
if (!php_mb_check_encoding(ZSTR_VAL(arg_str), ZSTR_LEN(arg_str), php_mb_regex_get_mbctype_encoding())) {
15381546
MBREX(search_pos) = ZSTR_LEN(arg_str);
1539-
RETVAL_FALSE;
1547+
zend_argument_value_error(1, "must be a valid string in '%s'", php_mb_regex_get_mbctype());
1548+
RETURN_THROWS();
15401549
}
15411550

1551+
MBREX(search_pos) = 0;
1552+
RETVAL_TRUE;
1553+
15421554
if (MBREX(search_regs) != NULL) {
15431555
onig_region_free(MBREX(search_regs), 1);
15441556
MBREX(search_regs) = NULL;
@@ -1584,6 +1596,7 @@ PHP_FUNCTION(mb_ereg_search_getregs)
15841596
onig_foreach_name(MBREX(search_re), mb_regex_groups_iter, &args);
15851597
}
15861598
} else {
1599+
// TODO This seems to be some logical error, promote to Error
15871600
RETVAL_FALSE;
15881601
}
15891602
}
@@ -1617,12 +1630,12 @@ PHP_FUNCTION(mb_ereg_search_setpos)
16171630
}
16181631

16191632
if (position < 0 || (!Z_ISUNDEF(MBREX(search_str)) && Z_TYPE(MBREX(search_str)) == IS_STRING && (size_t)position > Z_STRLEN(MBREX(search_str)))) {
1620-
php_error_docref(NULL, E_WARNING, "Position is out of range");
1621-
MBREX(search_pos) = 0;
1622-
RETURN_FALSE;
1633+
zend_argument_value_error(1, "is out of range");
1634+
RETURN_THROWS();
16231635
}
16241636

16251637
MBREX(search_pos) = position;
1638+
// TODO Return void
16261639
RETURN_TRUE;
16271640
}
16281641
/* }}} */
@@ -1658,7 +1671,9 @@ PHP_FUNCTION(mb_regex_set_options)
16581671
if (string != NULL) {
16591672
opt = 0;
16601673
syntax = NULL;
1661-
_php_mb_regex_init_options(string, string_len, &opt, &syntax, NULL);
1674+
if(!_php_mb_regex_init_options(string, string_len, &opt, &syntax, 1)) {
1675+
RETURN_THROWS();
1676+
}
16621677
_php_mb_regex_set_options(opt, syntax, &prev_opt, &prev_syntax);
16631678
opt = prev_opt;
16641679
syntax = prev_syntax;

0 commit comments

Comments
 (0)