Skip to content

Commit 1a7e515

Browse files
committed
Bug#25516881: JSON_UNQUOTE RETURNS WRONG RESULT WHEN INPUT IS UTF-16
The JSON_UNQUOTE function did not unquote the string if it was UTF-16 encoded. This happened because the function had some code to check if the string started and ended with double-quotes, which assumed that the input was UTF-8 encoded. This made it think that UTF-16 strings always started with '\0', so it never treated them as quoted strings. Fix: Convert the input string to utf8mb4, if needed, before attempting to analyze the contents of the string. Change-Id: I1c3fd717cb14ea8fcd57d99f5e9f7a61da798dce
1 parent ceebec6 commit 1a7e515

File tree

3 files changed

+40
-4
lines changed

3 files changed

+40
-4
lines changed

mysql-test/suite/json/r/json_no_table.result

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3308,3 +3308,15 @@ NULL
33083308
#
33093309
DO BIT_COUNT(COMPRESS(1) << JSON_DEPTH(1));
33103310
ERROR 22032: Invalid data type for JSON data in argument 1 to function json_depth; a JSON string or JSON type is required.
3311+
#
3312+
# Bug#25516881: JSON_UNQUOTE RETURNS WRONG RESULT WHEN INPUT IS UTF-16
3313+
#
3314+
SELECT JSON_UNQUOTE(CAST('"abc"' AS CHAR CHARSET utf16));
3315+
JSON_UNQUOTE(CAST('"abc"' AS CHAR CHARSET utf16))
3316+
abc
3317+
SELECT JSON_UNQUOTE(CAST('ABCD' AS BINARY));
3318+
ERROR 22032: Cannot create a JSON value from a string with CHARACTER SET 'binary'.
3319+
SELECT HEX(JSON_UNQUOTE(CAST('"abc"' AS CHAR CHARSET utf16))),
3320+
HEX(JSON_UNQUOTE(CAST('abc' AS CHAR CHARSET utf16)));
3321+
HEX(JSON_UNQUOTE(CAST('"abc"' AS CHAR CHARSET utf16))) HEX(JSON_UNQUOTE(CAST('abc' AS CHAR CHARSET utf16)))
3322+
616263 616263

mysql-test/suite/json/t/json_no_table.test

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2306,3 +2306,15 @@ SELECT MIN(CASE 0 WHEN 1 THEN JSON_ARRAY() ELSE NULL END);
23062306
--echo #
23072307
--error ER_INVALID_TYPE_FOR_JSON
23082308
DO BIT_COUNT(COMPRESS(1) << JSON_DEPTH(1));
2309+
2310+
--echo #
2311+
--echo # Bug#25516881: JSON_UNQUOTE RETURNS WRONG RESULT WHEN INPUT IS UTF-16
2312+
--echo #
2313+
# Used to return the quoted string unchanged.
2314+
SELECT JSON_UNQUOTE(CAST('"abc"' AS CHAR CHARSET utf16));
2315+
# Used to accept binary strings.
2316+
--error ER_INVALID_JSON_CHARSET
2317+
SELECT JSON_UNQUOTE(CAST('ABCD' AS BINARY));
2318+
# JSON_UNQUOTE should return utf8mb4 encoded results, but used to return utf16.
2319+
SELECT HEX(JSON_UNQUOTE(CAST('"abc"' AS CHAR CHARSET utf16))),
2320+
HEX(JSON_UNQUOTE(CAST('abc' AS CHAR CHARSET utf16)));

sql/item_json_func.cc

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3518,15 +3518,27 @@ String *Item_func_json_unquote::val_str(String *str)
35183518
return error_str();
35193519
}
35203520

3521-
if (res->length() < 2 || *res->ptr() != '"' ||
3522-
res->ptr()[res->length() - 1] != '"')
3521+
StringBuffer<STRING_BUFFER_USUAL_SIZE> buf;
3522+
const char *utf8text;
3523+
size_t utf8len;
3524+
if (ensure_utf8mb4(res, &buf, &utf8text, &utf8len, true))
3525+
return error_str();
3526+
String *utf8str= (res->ptr() == utf8text) ? res : &buf;
3527+
DBUG_ASSERT(utf8text == utf8str->ptr());
3528+
3529+
if (utf8len < 2 || utf8text[0] != '"' || utf8text[utf8len - 1] != '"')
35233530
{
35243531
null_value= false;
3525-
return res; // return string unchanged
3532+
// Return string unchanged, but convert to utf8mb4 if needed.
3533+
if (res == utf8str)
3534+
return res;
3535+
if (str->copy(utf8text, utf8len, collation.collation))
3536+
return error_str(); /* purecov: inspected */
3537+
return str;
35263538
}
35273539

35283540
bool parse_error= false;
3529-
if (parse_json(res, 0, func_name(), &dom, true, &parse_error))
3541+
if (parse_json(utf8str, 0, func_name(), &dom, true, &parse_error))
35303542
{
35313543
return error_str();
35323544
}

0 commit comments

Comments
 (0)