Skip to content

Commit 2605cee

Browse files
author
Yasuo Ohgaki
committed
Added array parameter support to mb_convert_encoding()
1 parent ddce37b commit 2605cee

File tree

4 files changed

+278
-17
lines changed

4 files changed

+278
-17
lines changed

NEWS

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,12 @@ PHP NEWS
2424
. Fixed bug #70896 (gmp_fact() silently ignores non-integer input). (Sara)
2525

2626
- Mbstring:
27-
. Implemented request #66024 (mb_chr() and mb_ord()) (Masakielastic, Yasuo)
28-
. Implemented request #65081 (mb_scrub()) (Masakielastic, Yasuo)
27+
. Implemented request #66024 (mb_chr() and mb_ord()). (Masakielastic, Yasuo)
28+
. Implemented request #65081 (mb_scrub()). (Masakielastic, Yasuo)
2929
. Implemented request #69086 (enhancement for mb_convert_encoding() that
30-
handles multibyte replacement char nicely) (Masakielastic, Yasuo)
30+
handles multibyte replacement char nicely). (Masakielastic, Yasuo)
31+
. Added array input support to mb_convert_encoding(). (Yasuo)
32+
. Added array input support to mb_check_encoding(). (Yasuo)
3133

3234
<<< NOTE: Insert NEWS from last stable release here prior to actual release! >>>
3335

UPGRADING

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ PHP 7.2 UPGRADE NOTES
7575
- Mbstring
7676
. mb_check_encoding() accepts array parameter. Both key and value
7777
ecodings are checked recursively.
78+
. mb_convert_encoding() accepts array parameter. Only value encodings
79+
are converted recursively.
7880

7981
========================================
8082
10. New Global Constants

ext/mbstring/mbstring.c

Lines changed: 84 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3181,7 +3181,7 @@ static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc)
31813181

31823182

31833183
/* {{{ MBSTRING_API char *php_mb_convert_encoding() */
3184-
MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len)
3184+
MBSTRING_API char *php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len)
31853185
{
31863186
mbfl_string string, result, *ret;
31873187
const mbfl_encoding *from_encoding, *to_encoding;
@@ -3288,23 +3288,86 @@ MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, co
32883288
}
32893289
/* }}} */
32903290

3291+
MBSTRING_API HashTable *php_mb_convert_encoding_recursive(HashTable *input, const char *_to_encoding, const char *_from_encodings)
3292+
{
3293+
HashTable *output, *chash;
3294+
zend_long idx;
3295+
zend_string *key, *key_tmp;
3296+
zval *entry, entry_tmp;
3297+
size_t ckey_len, cval_len;
3298+
char *ckey, *cval;
3299+
3300+
if (!input) {
3301+
return NULL;
3302+
}
3303+
3304+
output = (HashTable *)emalloc(sizeof(HashTable));
3305+
zend_hash_init(output, zend_hash_num_elements(input), NULL, ZVAL_PTR_DTOR, 0);
3306+
ZEND_HASH_FOREACH_KEY_VAL(input, idx, key, entry) {
3307+
/* convert key */
3308+
if (key) {
3309+
ckey = php_mb_convert_encoding(ZSTR_VAL(key), ZSTR_LEN(key), _to_encoding, _from_encodings, &ckey_len);
3310+
key_tmp = zend_string_init(ckey, ckey_len, 0);
3311+
}
3312+
/* convert value */
3313+
ZEND_ASSERT(entry);
3314+
switch(Z_TYPE_P(entry)) {
3315+
case IS_STRING:
3316+
cval = php_mb_convert_encoding(Z_STRVAL_P(entry), Z_STRLEN_P(entry), _to_encoding, _from_encodings, &cval_len);
3317+
ZVAL_STRINGL(&entry_tmp, cval, cval_len);
3318+
break;
3319+
case IS_NULL:
3320+
case IS_TRUE:
3321+
case IS_FALSE:
3322+
case IS_LONG:
3323+
case IS_DOUBLE:
3324+
ZVAL_COPY(&entry_tmp, entry);
3325+
break;
3326+
case IS_ARRAY:
3327+
chash = php_mb_convert_encoding_recursive(HASH_OF(entry), _to_encoding, _from_encodings);
3328+
array_init(&entry_tmp);
3329+
Z_ARRVAL(entry_tmp) = chash;
3330+
break;
3331+
case IS_OBJECT:
3332+
default:
3333+
zval_dtor(&entry_tmp);
3334+
php_error_docref(NULL, E_WARNING, "Object is not supported");
3335+
continue;
3336+
}
3337+
if (key) {
3338+
zend_hash_add(output, key_tmp, &entry_tmp);
3339+
} else {
3340+
zend_hash_index_add(output, idx, &entry_tmp);
3341+
}
3342+
} ZEND_HASH_FOREACH_END();
3343+
3344+
return output;
3345+
}
3346+
/* }}} */
3347+
3348+
32913349
/* {{{ proto string mb_convert_encoding(string str, string to-encoding [, mixed from-encoding])
32923350
Returns converted string in desired encoding */
32933351
PHP_FUNCTION(mb_convert_encoding)
32943352
{
3295-
char *arg_str, *arg_new;
3296-
size_t str_len, new_len;
3353+
zval *input;
3354+
char *arg_new;
3355+
size_t new_len;
32973356
zval *arg_old = NULL;
32983357
size_t size, l, n;
32993358
char *_from_encodings = NULL, *ret, *s_free = NULL;
33003359

33013360
zval *hash_entry;
33023361
HashTable *target_hash;
33033362

3304-
if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|z", &arg_str, &str_len, &arg_new, &new_len, &arg_old) == FAILURE) {
3363+
if (zend_parse_parameters(ZEND_NUM_ARGS(), "zs|z", &input, &arg_new, &new_len, &arg_old) == FAILURE) {
33053364
return;
33063365
}
33073366

3367+
if (Z_TYPE_P(input) != IS_STRING && Z_TYPE_P(input) != IS_ARRAY) {
3368+
convert_to_string(input);
3369+
}
3370+
33083371
if (arg_old) {
33093372
switch (Z_TYPE_P(arg_old)) {
33103373
case IS_ARRAY:
@@ -3339,19 +3402,26 @@ PHP_FUNCTION(mb_convert_encoding)
33393402
}
33403403
}
33413404

3342-
/* new encoding */
3343-
ret = php_mb_convert_encoding(arg_str, str_len, arg_new, _from_encodings, &size);
3344-
if (ret != NULL) {
3345-
// TODO: avoid reallocation ???
3346-
RETVAL_STRINGL(ret, size); /* the string is already strdup()'ed */
3347-
efree(ret);
3405+
if (Z_TYPE_P(input) == IS_STRING) {
3406+
/* new encoding */
3407+
ret = php_mb_convert_encoding(Z_STRVAL_P(input), Z_STRLEN_P(input), arg_new, _from_encodings, &size);
3408+
if (ret != NULL) {
3409+
// TODO: avoid reallocation ???
3410+
RETVAL_STRINGL(ret, size); /* the string is already strdup()'ed */
3411+
efree(ret);
3412+
} else {
3413+
RETVAL_FALSE;
3414+
}
3415+
if (s_free) {
3416+
efree(s_free);
3417+
}
33483418
} else {
3349-
RETVAL_FALSE;
3419+
HashTable *tmp;
3420+
tmp = php_mb_convert_encoding_recursive(HASH_OF(input), arg_new, _from_encodings);
3421+
RETURN_ARR(tmp);
33503422
}
33513423

3352-
if ( s_free) {
3353-
efree(s_free);
3354-
}
3424+
return;
33553425
}
33563426
/* }}} */
33573427

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
--TEST--
2+
Test mb_convert_encoding() function : array functionality
3+
--SKIPIF--
4+
<?php
5+
extension_loaded('mbstring') or die('skip');
6+
function_exists('mb_convert_encoding') or die("skip mb_convert_encoding() is not available in this build");
7+
?>
8+
--FILE--
9+
<?php
10+
/* Prototype : string mb_convert_encoding(string $str, string $to_encoding [, mixed $from_encoding])
11+
* Description: Returns converted string in desired encoding
12+
* Source code: ext/mbstring/mbstring.c
13+
*/
14+
15+
/*
16+
* Test basic functionality of mb_convert_encoding()
17+
*/
18+
19+
echo "*** Testing mb_convert_encoding() : array functionality ***\n";
20+
21+
//All strings are the same when displayed in their respective encodings
22+
$sjis_string[] = base64_decode('k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg==');
23+
$sjis_string[] = base64_decode('k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg==');
24+
$jis_string[] = base64_decode('GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg==');
25+
$jis_string[] = base64_decode('GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg==');
26+
$euc_jp_string[] = base64_decode('xvzL3LjspcalraW5pcikx6S5oaMwMTIzNKO1o7ajt6O4o7mhow==');
27+
$euc_jp_string[] = base64_decode('xvzL3LjspcalraW5pcikx6S5oaMwMTIzNKO1o7ajt6O4o7mhow==');
28+
$utf8_string[] = base64_decode('5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CCMDEyMzTvvJXvvJbvvJfvvJjvvJnjgII=');
29+
$utf8_string[] = base64_decode('5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CCMDEyMzTvvJXvvJbvvJfvvJjvvJnjgII=');
30+
31+
32+
function base64_encode_array($input) {
33+
foreach ($input as $var) {
34+
$ret[] = base64_encode($var);
35+
}
36+
return $ret;
37+
}
38+
39+
echo "\n-- Convert to JIS --\n";
40+
echo "JIS encoded string in base64:\n";
41+
var_dump(base64_encode_array($jis_string));
42+
echo "Converted Strings:\n";
43+
var_dump(base64_encode_array(mb_convert_encoding($sjis_string, 'JIS', 'SJIS')));
44+
var_dump(base64_encode_array(mb_convert_encoding($euc_jp_string, 'JIS', 'EUC-JP')));
45+
var_dump(base64_encode_array(mb_convert_encoding($utf8_string, 'JIS', 'UTF-8')));
46+
47+
echo "\n-- Convert to EUC-JP --\n";
48+
echo "EUC-JP encoded string in base64:\n";
49+
var_dump(base64_encode_array($euc_jp_string));
50+
echo "Converted Strings:\n";
51+
var_dump(base64_encode_array(mb_convert_encoding($sjis_string, 'EUC-JP', 'SJIS')));
52+
var_dump(base64_encode_array(mb_convert_encoding($jis_string, 'EUC-JP', 'JIS')));
53+
var_dump(base64_encode_array(mb_convert_encoding($utf8_string, 'EUC-JP', 'UTF-8')));
54+
55+
echo "\n-- Convert to SJIS --\n";
56+
echo "SJIS encoded string in base64:\n";
57+
var_dump(base64_encode_array($sjis_string));
58+
echo "Converted Strings:\n";
59+
var_dump(base64_encode_array(mb_convert_encoding($jis_string, 'SJIS', 'JIS')));
60+
var_dump(base64_encode_array(mb_convert_encoding($euc_jp_string, 'SJIS', 'EUC-JP')));
61+
var_dump(base64_encode_array(mb_convert_encoding($utf8_string, 'SJIS', 'UTF-8')));
62+
63+
echo "\n-- Convert to UTF-8 --\n";
64+
echo "UTF-8 encoded string in base64:\n";
65+
var_dump(base64_encode_array($utf8_string));
66+
echo "Converted Strings:\n";
67+
var_dump(base64_encode_array(mb_convert_encoding($sjis_string, 'UTF-8', 'SJIS')));
68+
var_dump(base64_encode_array(mb_convert_encoding($jis_string, 'UTF-8', 'JIS')));
69+
var_dump(base64_encode_array(mb_convert_encoding($euc_jp_string, 'UTF-8', 'EUC-JP')));
70+
71+
echo "Done";
72+
?>
73+
--EXPECTF--
74+
*** Testing mb_convert_encoding() : array functionality ***
75+
76+
-- Convert to JIS --
77+
JIS encoded string in base64:
78+
array(2) {
79+
[0]=>
80+
string(68) "GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg=="
81+
[1]=>
82+
string(68) "GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg=="
83+
}
84+
Converted Strings:
85+
array(2) {
86+
[0]=>
87+
string(68) "GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg=="
88+
[1]=>
89+
string(68) "GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg=="
90+
}
91+
array(2) {
92+
[0]=>
93+
string(68) "GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg=="
94+
[1]=>
95+
string(68) "GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg=="
96+
}
97+
array(2) {
98+
[0]=>
99+
string(68) "GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg=="
100+
[1]=>
101+
string(68) "GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg=="
102+
}
103+
104+
-- Convert to EUC-JP --
105+
EUC-JP encoded string in base64:
106+
array(2) {
107+
[0]=>
108+
string(52) "xvzL3LjspcalraW5pcikx6S5oaMwMTIzNKO1o7ajt6O4o7mhow=="
109+
[1]=>
110+
string(52) "xvzL3LjspcalraW5pcikx6S5oaMwMTIzNKO1o7ajt6O4o7mhow=="
111+
}
112+
Converted Strings:
113+
array(2) {
114+
[0]=>
115+
string(52) "xvzL3LjspcalraW5pcikx6S5oaMwMTIzNKO1o7ajt6O4o7mhow=="
116+
[1]=>
117+
string(52) "xvzL3LjspcalraW5pcikx6S5oaMwMTIzNKO1o7ajt6O4o7mhow=="
118+
}
119+
array(2) {
120+
[0]=>
121+
string(52) "xvzL3LjspcalraW5pcikx6S5oaMwMTIzNKO1o7ajt6O4o7mhow=="
122+
[1]=>
123+
string(52) "xvzL3LjspcalraW5pcikx6S5oaMwMTIzNKO1o7ajt6O4o7mhow=="
124+
}
125+
array(2) {
126+
[0]=>
127+
string(52) "xvzL3LjspcalraW5pcikx6S5oaMwMTIzNKO1o7ajt6O4o7mhow=="
128+
[1]=>
129+
string(52) "xvzL3LjspcalraW5pcikx6S5oaMwMTIzNKO1o7ajt6O4o7mhow=="
130+
}
131+
132+
-- Convert to SJIS --
133+
SJIS encoded string in base64:
134+
array(2) {
135+
[0]=>
136+
string(52) "k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg=="
137+
[1]=>
138+
string(52) "k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg=="
139+
}
140+
Converted Strings:
141+
array(2) {
142+
[0]=>
143+
string(52) "k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg=="
144+
[1]=>
145+
string(52) "k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg=="
146+
}
147+
array(2) {
148+
[0]=>
149+
string(52) "k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg=="
150+
[1]=>
151+
string(52) "k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg=="
152+
}
153+
array(2) {
154+
[0]=>
155+
string(52) "k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg=="
156+
[1]=>
157+
string(52) "k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg=="
158+
}
159+
160+
-- Convert to UTF-8 --
161+
UTF-8 encoded string in base64:
162+
array(2) {
163+
[0]=>
164+
string(72) "5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CCMDEyMzTvvJXvvJbvvJfvvJjvvJnjgII="
165+
[1]=>
166+
string(72) "5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CCMDEyMzTvvJXvvJbvvJfvvJjvvJnjgII="
167+
}
168+
Converted Strings:
169+
array(2) {
170+
[0]=>
171+
string(72) "5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CCMDEyMzTvvJXvvJbvvJfvvJjvvJnjgII="
172+
[1]=>
173+
string(72) "5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CCMDEyMzTvvJXvvJbvvJfvvJjvvJnjgII="
174+
}
175+
array(2) {
176+
[0]=>
177+
string(72) "5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CCMDEyMzTvvJXvvJbvvJfvvJjvvJnjgII="
178+
[1]=>
179+
string(72) "5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CCMDEyMzTvvJXvvJbvvJfvvJjvvJnjgII="
180+
}
181+
array(2) {
182+
[0]=>
183+
string(72) "5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CCMDEyMzTvvJXvvJbvvJfvvJjvvJnjgII="
184+
[1]=>
185+
string(72) "5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CCMDEyMzTvvJXvvJbvvJfvvJjvvJnjgII="
186+
}
187+
Done

0 commit comments

Comments
 (0)