Skip to content

Commit c50d3a4

Browse files
committed
Promote some warnings in MBString Regex
1 parent 196f8fd commit c50d3a4

File tree

11 files changed

+200
-163
lines changed

11 files changed

+200
-163
lines changed

ext/mbstring/php_mbregex.c

Lines changed: 56 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "php_mbregex.h"
2929
#include "mbstring.h"
3030
#include "libmbfl/filters/mbfilter_utf8.h"
31+
#include <stdbool.h>
3132

3233
#include "php_onig_compat.h" /* must come prior to the oniguruma header */
3334
#include <oniguruma.h>
@@ -600,8 +601,8 @@ static size_t _php_mb_regex_get_option_string(char *str, size_t len, OnigOptionT
600601
/* }}} */
601602

602603
/* {{{ _php_mb_regex_init_options */
603-
static void
604-
_php_mb_regex_init_options(const char *parg, size_t narg, OnigOptionType *option, OnigSyntaxType **syntax, int *eval)
604+
static bool _php_mb_regex_init_options(const char *parg, size_t narg, OnigOptionType *option,
605+
OnigSyntaxType **syntax, uint32_t option_arg_num)
605606
{
606607
size_t n;
607608
char c;
@@ -660,14 +661,16 @@ _php_mb_regex_init_options(const char *parg, size_t narg, OnigOptionType *option
660661
*syntax = ONIG_SYNTAX_POSIX_EXTENDED;
661662
break;
662663
case 'e':
663-
if (eval != NULL) *eval = 1;
664-
break;
664+
zend_argument_value_error(option_arg_num, "option 'e' is not supported");
665+
return false;
665666
default:
667+
// TODO Unsupported ValueError
666668
break;
667669
}
668670
}
669671
if (option != NULL) *option|=optm;
670672
}
673+
return true;
671674
}
672675
/* }}} */
673676

@@ -909,6 +912,11 @@ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
909912
RETURN_THROWS();
910913
}
911914

915+
if (arg_pattern_len == 0) {
916+
zend_argument_value_error(1, "must not be empty");
917+
RETURN_THROWS();
918+
}
919+
912920
if (array != NULL) {
913921
array = zend_try_array_init(array);
914922
if (!array) {
@@ -921,20 +929,15 @@ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
921929
string_len,
922930
php_mb_regex_get_mbctype_encoding()
923931
)) {
924-
RETURN_FALSE;
932+
zend_argument_value_error(2, "must be a valid string in '%s'", php_mb_regex_get_mbctype());
933+
RETURN_THROWS();
925934
}
926935

927936
options = MBREX(regex_default_options);
928937
if (icase) {
929938
options |= ONIG_OPTION_IGNORECASE;
930939
}
931940

932-
if (arg_pattern_len == 0) {
933-
php_error_docref(NULL, E_WARNING, "Empty pattern");
934-
RETVAL_FALSE;
935-
goto out;
936-
}
937-
938941
re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBREX(regex_default_syntax));
939942
if (re == NULL) {
940943
RETVAL_FALSE;
@@ -1016,15 +1019,14 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp
10161019
smart_str out_buf = {0};
10171020
smart_str eval_buf = {0};
10181021
smart_str *pbuf;
1019-
int err, eval, n;
1022+
int err, n;
10201023
OnigUChar *pos;
10211024
OnigUChar *string_lim;
10221025
char *description = NULL;
10231026

10241027
const mbfl_encoding *enc = php_mb_regex_get_mbctype_encoding();
10251028
ZEND_ASSERT(enc != NULL);
10261029

1027-
eval = 0;
10281030
{
10291031
char *option_str = NULL;
10301032
size_t option_str_len = 0;
@@ -1048,28 +1050,25 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp
10481050
}
10491051

10501052
if (!php_mb_check_encoding(string, string_len, enc)) {
1051-
RETURN_NULL();
1053+
zend_argument_value_error(3, "must be a valid string in '%s'", php_mb_regex_get_mbctype());
1054+
RETURN_THROWS();
10521055
}
10531056

10541057
if (option_str != NULL) {
1055-
_php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, &eval);
1058+
/* Initialize option and in case of failure it means there is a value error */
1059+
if(!_php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, 4)) {
1060+
RETURN_THROWS();
1061+
}
10561062
} else {
10571063
options |= MBREX(regex_default_options);
10581064
syntax = MBREX(regex_default_syntax);
10591065
}
10601066
}
1061-
if (eval) {
1062-
if (is_callable) {
1063-
php_error_docref(NULL, E_WARNING, "Option 'e' cannot be used with replacement callback");
1064-
} else {
1065-
php_error_docref(NULL, E_WARNING, "The 'e' option is no longer supported, use mb_ereg_replace_callback instead");
1066-
}
1067-
RETURN_FALSE;
1068-
}
10691067

10701068
/* create regex pattern buffer */
10711069
re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, syntax);
10721070
if (re == NULL) {
1071+
// Should this be considered an error instead?
10731072
RETURN_FALSE;
10741073
}
10751074

@@ -1131,7 +1130,9 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp
11311130
zval_ptr_dtor(&retval);
11321131
} else {
11331132
if (!EG(exception)) {
1134-
php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
1133+
zend_throw_error(NULL, "Unable to call custom replacement function");
1134+
zval_ptr_dtor(&subpats);
1135+
RETURN_THROWS();
11351136
}
11361137
}
11371138
zval_ptr_dtor(&subpats);
@@ -1163,6 +1164,7 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp
11631164
}
11641165
smart_str_free(&eval_buf);
11651166

1167+
// Need to investigate if failure in Oniguruma and if should throw.
11661168
if (err <= -2) {
11671169
smart_str_free(&out_buf);
11681170
RETVAL_FALSE;
@@ -1219,11 +1221,13 @@ PHP_FUNCTION(mb_split)
12191221
}
12201222

12211223
if (!php_mb_check_encoding(string, string_len, php_mb_regex_get_mbctype_encoding())) {
1222-
RETURN_FALSE;
1224+
zend_argument_value_error(2, "must be a valid string in '%s'", php_mb_regex_get_mbctype());
1225+
RETURN_THROWS();
12231226
}
12241227

12251228
/* create regex pattern buffer */
12261229
if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, MBREX(regex_default_options), MBREX(regex_default_syntax))) == NULL) {
1230+
// TODO throw as invalid regex?
12271231
RETURN_FALSE;
12281232
}
12291233

@@ -1260,6 +1264,7 @@ PHP_FUNCTION(mb_split)
12601264
onig_region_free(regs, 1);
12611265

12621266
/* see if we encountered an error */
1267+
// ToDo investigate if this can actually/should happen ...
12631268
if (err <= -2) {
12641269
OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
12651270
onig_error_code_to_str(err_str, err);
@@ -1304,18 +1309,22 @@ PHP_FUNCTION(mb_ereg_match)
13041309
}
13051310

13061311
if (option_str != NULL) {
1307-
_php_mb_regex_init_options(option_str, option_str_len, &option, &syntax, NULL);
1312+
if(!_php_mb_regex_init_options(option_str, option_str_len, &option, &syntax, 3)) {
1313+
RETURN_THROWS();
1314+
}
13081315
} else {
13091316
option |= MBREX(regex_default_options);
13101317
syntax = MBREX(regex_default_syntax);
13111318
}
13121319
}
13131320

13141321
if (!php_mb_check_encoding(string, string_len, php_mb_regex_get_mbctype_encoding())) {
1315-
RETURN_FALSE;
1322+
zend_argument_value_error(2, "must be a valid string in '%s'", php_mb_regex_get_mbctype());
1323+
RETURN_THROWS();
13161324
}
13171325

13181326
if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, syntax)) == NULL) {
1327+
// TODO throw as invalid regex?
13191328
RETURN_FALSE;
13201329
}
13211330

@@ -1371,6 +1380,7 @@ static void _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mod
13711380
if (arg_pattern) {
13721381
/* create regex pattern buffer */
13731382
if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, syntax)) == NULL) {
1383+
// TODO throw as invalid regex?
13741384
RETURN_FALSE;
13751385
}
13761386
}
@@ -1384,13 +1394,13 @@ static void _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mod
13841394
}
13851395

13861396
if (MBREX(search_re) == NULL) {
1387-
php_error_docref(NULL, E_WARNING, "No regex given");
1388-
RETURN_FALSE;
1397+
zend_throw_error(NULL, "No pattern was provided");
1398+
RETURN_THROWS();
13891399
}
13901400

13911401
if (str == NULL) {
1392-
php_error_docref(NULL, E_WARNING, "No string given");
1393-
RETURN_FALSE;
1402+
zend_throw_error(NULL, "No string was provided");
1403+
RETURN_THROWS();
13941404
}
13951405

13961406
MBREX(search_regs) = onig_region_new();
@@ -1489,8 +1499,8 @@ PHP_FUNCTION(mb_ereg_search_init)
14891499
}
14901500

14911501
if (arg_pattern && arg_pattern_len == 0) {
1492-
php_error_docref(NULL, E_WARNING, "Empty pattern");
1493-
RETURN_FALSE;
1502+
zend_argument_value_error(2, "must not be empty");
1503+
RETURN_THROWS();
14941504
}
14951505

14961506
if (arg_options) {
@@ -1504,6 +1514,7 @@ PHP_FUNCTION(mb_ereg_search_init)
15041514
if (arg_pattern) {
15051515
/* create regex pattern buffer */
15061516
if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, syntax)) == NULL) {
1517+
// TODO throw as invalid regex?
15071518
RETURN_FALSE;
15081519
}
15091520
}
@@ -1519,9 +1530,13 @@ PHP_FUNCTION(mb_ereg_search_init)
15191530
RETVAL_TRUE;
15201531
} else {
15211532
MBREX(search_pos) = ZSTR_LEN(arg_str);
1522-
RETVAL_FALSE;
1533+
zend_argument_value_error(1, "must be a valid string in '%s'", php_mb_regex_get_mbctype());
1534+
RETURN_THROWS();
15231535
}
15241536

1537+
MBREX(search_pos) = 0;
1538+
RETVAL_TRUE;
1539+
15251540
if (MBREX(search_regs) != NULL) {
15261541
onig_region_free(MBREX(search_regs), 1);
15271542
MBREX(search_regs) = NULL;
@@ -1566,6 +1581,7 @@ PHP_FUNCTION(mb_ereg_search_getregs)
15661581
onig_foreach_name(MBREX(search_re), mb_regex_groups_iter, &args);
15671582
}
15681583
} else {
1584+
// TODO This seems to be some logical error, promote to Error
15691585
RETVAL_FALSE;
15701586
}
15711587
}
@@ -1597,12 +1613,12 @@ PHP_FUNCTION(mb_ereg_search_setpos)
15971613
}
15981614

15991615
if (position < 0 || (!Z_ISUNDEF(MBREX(search_str)) && Z_TYPE(MBREX(search_str)) == IS_STRING && (size_t)position > Z_STRLEN(MBREX(search_str)))) {
1600-
php_error_docref(NULL, E_WARNING, "Position is out of range");
1601-
MBREX(search_pos) = 0;
1602-
RETURN_FALSE;
1616+
zend_argument_value_error(1, "is out of range");
1617+
RETURN_THROWS();
16031618
}
16041619

16051620
MBREX(search_pos) = position;
1621+
// TODO Return void
16061622
RETURN_TRUE;
16071623
}
16081624
/* }}} */
@@ -1637,7 +1653,9 @@ PHP_FUNCTION(mb_regex_set_options)
16371653
if (string != NULL) {
16381654
opt = 0;
16391655
syntax = NULL;
1640-
_php_mb_regex_init_options(string, string_len, &opt, &syntax, NULL);
1656+
if(!_php_mb_regex_init_options(string, string_len, &opt, &syntax, 1)) {
1657+
RETURN_THROWS();
1658+
}
16411659
_php_mb_regex_set_options(opt, syntax, &prev_opt, &prev_syntax);
16421660
opt = prev_opt;
16431661
syntax = prev_syntax;

0 commit comments

Comments
 (0)