Skip to content

Commit 31d6837

Browse files
author
Sreeharsha Ramanavarapu
committed
Bug #20238729: ILLEGALLY CRAFTED UTF8 SELECT PROVIDES NO
WARNINGS Issue: ----- No warning is delivered when MYSQL is unable to interpret a character with the given charset. SOLUTION: --------- Check is now performed to test whether each character can be interpreted with the relevant charset. Failing which, a warning is raised.
1 parent 8c7cab0 commit 31d6837

File tree

10 files changed

+172
-45
lines changed

10 files changed

+172
-45
lines changed

mysql-test/r/partition_utf8.result

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,19 @@ set names utf8;
22
create table t1 (a varchar(2) character set cp1250)
33
partition by list columns (a)
44
( partition p0 values in (0x81));
5+
Warnings:
6+
Warning 1300 Invalid cp1250 character string: '81'
7+
Warning 1300 Invalid cp1250 character string: '81'
8+
Warning 1300 Invalid cp1250 character string: '81'
59
show create table t1;
610
Table Create Table
711
t1 CREATE TABLE `t1` (
812
`a` varchar(2) CHARACTER SET cp1250 DEFAULT NULL
913
) ENGINE=MyISAM DEFAULT CHARSET=latin1
1014
/*!50500 PARTITION BY LIST COLUMNS(a)
1115
(PARTITION p0 VALUES IN (_cp1250 0x81) ENGINE = MyISAM) */
16+
Warnings:
17+
Warning 1300 Invalid cp1250 character string: '81'
1218
drop table t1;
1319
create table t1 (a varchar(2) character set cp1250)
1420
partition by list columns (a)

mysql-test/r/plugin_auth_qa.result

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,15 +220,24 @@ plüg_dest mysql_native_password
220220
DROP USER plüg_dest;
221221
SET NAMES ascii;
222222
CREATE USER 'plüg' IDENTIFIED WITH 'test_plugin_server' AS 'plüg_dest';
223+
Warnings:
224+
Warning 1105 Can't convert the character string from ascii to utf8: 'pl\xC3\xBCg'
225+
Warning 1105 Can't convert the character string from ascii to utf8: 'pl\xC3\xBCg_...'
223226
SELECT user,plugin,authentication_string FROM mysql.user WHERE user != 'root';
224227
user plugin authentication_string
225228
pl??g test_plugin_server pl??g_dest
226229
DROP USER 'plüg';
230+
Warnings:
231+
Warning 1105 Can't convert the character string from ascii to utf8: 'pl\xC3\xBCg'
227232
CREATE USER 'plüg_dest' IDENTIFIED BY 'plug_dest_passwd';
233+
Warnings:
234+
Warning 1105 Can't convert the character string from ascii to utf8: 'pl\xC3\xBCg_...'
228235
SELECT user,plugin,authentication_string FROM mysql.user WHERE user != 'root';
229236
user plugin authentication_string
230237
pl??g_dest mysql_native_password
231238
DROP USER 'plüg_dest';
239+
Warnings:
240+
Warning 1105 Can't convert the character string from ascii to utf8: 'pl\xC3\xBCg_...'
232241
SET NAMES latin1;
233242
========== test 1.1.1.5 ====================================
234243
CREATE USER 'plüg' IDENTIFIED WITH 'test_plügin_server' AS 'plüg_dest';

mysql-test/suite/sys_vars/r/character_set_connection_func.result

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,17 @@ SET @@session.character_set_connection = latin1;
2323
SELECT 'ЁЂЃЄ' AS utf_text;
2424
utf_text
2525
????
26+
Warnings:
27+
Warning 1105 Can't convert the character string from utf8 to latin1: '\xD0\x81\xD0\x82\xD0\x83...'
2628
SET @@session.character_set_connection = utf8;
2729
SELECT 'ЁЂЃЄ' AS utf_text;
2830
utf_text
2931
ЁЂЃЄ
3032
'---now inserting utf8 string with different character_set_connection--'
3133
SET @@session.character_set_connection = ascii;
3234
INSERT INTO t1 VALUES('ЁЂЃЄ');
35+
Warnings:
36+
Warning 1105 Can't convert the character string from utf8 to ascii: '\xD0\x81\xD0\x82\xD0\x83...'
3337
SELECT * FROM t1;
3438
b
3539
????
@@ -39,6 +43,8 @@ SET @@session.character_set_connection = ascii;
3943
SET @@session.character_set_client = latin1;
4044
SET @@session.character_set_results = latin1;
4145
INSERT INTO t1 VALUES('ЁЂЃЄ');
46+
Warnings:
47+
Warning 1105 Can't convert the character string from latin1 to ascii: '\xD0\x81\xD0\x82\xD0\x83...'
4248
SELECT * FROM t1;
4349
b
4450
????????

sql/item.cc

Lines changed: 31 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved.
2+
Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
33
44
This program is free software; you can redistribute it and/or modify
55
it under the terms of the GNU General Public License as published by
@@ -5900,44 +5900,54 @@ enum_field_types Item::field_type() const
59005900
/**
59015901
Verifies that the input string is well-formed according to its character set.
59025902
@param send_error If true, call my_error if string is not well-formed.
5903-
5904-
Will truncate input string if it is not well-formed.
5903+
@param truncate If true, set to null/truncate if not well-formed.
59055904
59065905
@return
59075906
If well-formed: input string.
59085907
If not well-formed:
5909-
if strict mode: NULL pointer and we set this Item's value to NULL
5910-
if not strict mode: input string truncated up to last good character
5908+
if truncate is true and strict mode: NULL pointer and we set this
5909+
Item's value to NULL.
5910+
if truncate is true and not strict mode: input string truncated up to
5911+
last good character.
5912+
if truncate is false: input string is returned.
59115913
*/
5912-
String *Item::check_well_formed_result(String *str, bool send_error)
5914+
String *Item::check_well_formed_result(String *str,
5915+
bool send_error,
5916+
bool truncate)
59135917
{
59145918
/* Check whether we got a well-formed string */
59155919
const CHARSET_INFO *cs= str->charset();
5916-
int well_formed_error;
5917-
uint wlen= cs->cset->well_formed_len(cs,
5918-
str->ptr(), str->ptr() + str->length(),
5919-
str->length(), &well_formed_error);
5920-
if (wlen < str->length())
5920+
5921+
size_t valid_length;
5922+
bool length_error;
5923+
5924+
if (validate_string(cs, str->ptr(), str->length(),
5925+
&valid_length, &length_error))
59215926
{
5927+
const char *str_end= str->ptr() + str->length();
5928+
const char *print_byte= str->ptr() + valid_length;
59225929
THD *thd= current_thd;
59235930
char hexbuf[7];
5924-
uint diff= str->length() - wlen;
5931+
uint diff= str_end - print_byte;
59255932
set_if_smaller(diff, 3);
5926-
octet2hex(hexbuf, str->ptr() + wlen, diff);
5927-
if (send_error)
5933+
octet2hex(hexbuf, print_byte, diff);
5934+
if (send_error && length_error)
59285935
{
59295936
my_error(ER_INVALID_CHARACTER_STRING, MYF(0),
59305937
cs->csname, hexbuf);
59315938
return 0;
59325939
}
5933-
if (thd->is_strict_mode())
5934-
{
5935-
null_value= 1;
5936-
str= 0;
5937-
}
5938-
else
5940+
if (truncate && length_error)
59395941
{
5940-
str->length(wlen);
5942+
if (thd->is_strict_mode())
5943+
{
5944+
null_value= 1;
5945+
str= 0;
5946+
}
5947+
else
5948+
{
5949+
str->length(valid_length);
5950+
}
59415951
}
59425952
push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, ER_INVALID_CHARACTER_STRING,
59435953
ER(ER_INVALID_CHARACTER_STRING), cs->csname, hexbuf);

sql/item.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1653,7 +1653,9 @@ class Item
16531653
}
16541654
virtual Field::geometry_type get_geometry_type() const
16551655
{ return Field::GEOM_GEOMETRY; };
1656-
String *check_well_formed_result(String *str, bool send_error= 0);
1656+
String *check_well_formed_result(String *str,
1657+
bool send_error,
1658+
bool truncate);
16571659
bool eq_by_collation(Item *item, bool binary_cmp, const CHARSET_INFO *cs);
16581660

16591661
/*
@@ -2841,6 +2843,11 @@ class Item_string :public Item_basic_constant
28412843
decimals=NOT_FIXED_DEC;
28422844
// it is constant => can be used without fix_fields (and frequently used)
28432845
fixed= 1;
2846+
/*
2847+
Check if the string has any character that can't be
2848+
interpreted using the relevant charset.
2849+
*/
2850+
check_well_formed_result(&str_value, false, false);
28442851
}
28452852
/* Just create an item and do not fill string representation */
28462853
Item_string(const CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE)

sql/item_strfunc.cc

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3017,7 +3017,9 @@ String *Item_func_char::val_str(String *str)
30173017
}
30183018
}
30193019
str->realloc(str->length()); // Add end 0 (for Purify)
3020-
return check_well_formed_result(str);
3020+
return check_well_formed_result(str,
3021+
false, // send warning
3022+
true); // truncate
30213023
}
30223024

30233025

@@ -3257,7 +3259,9 @@ String *Item_func_rpad::val_str(String *str)
32573259
if (use_mb(rpad->charset()))
32583260
{
32593261
// This will chop off any trailing illegal characters from rpad.
3260-
String *well_formed_pad= args[2]->check_well_formed_result(rpad, false);
3262+
String *well_formed_pad= args[2]->check_well_formed_result(rpad,
3263+
false, //send warning
3264+
true); //truncate
32613265
if (!well_formed_pad)
32623266
goto err;
32633267
}
@@ -3372,7 +3376,9 @@ String *Item_func_lpad::val_str(String *str)
33723376
if (use_mb(pad->charset()))
33733377
{
33743378
// This will chop off any trailing illegal characters from pad.
3375-
String *well_formed_pad= args[2]->check_well_formed_result(pad, false);
3379+
String *well_formed_pad= args[2]->check_well_formed_result(pad,
3380+
false, // send warning
3381+
true); // truncate
33763382
if (!well_formed_pad)
33773383
goto err;
33783384
}
@@ -3488,7 +3494,9 @@ String *Item_func_conv_charset::val_str(String *str)
34883494
}
34893495
null_value= tmp_value.copy(arg->ptr(), arg->length(), arg->charset(),
34903496
conv_charset, &dummy_errors);
3491-
return null_value ? 0 : check_well_formed_result(&tmp_value);
3497+
return null_value ? 0 : check_well_formed_result(&tmp_value,
3498+
false, // send warning
3499+
true); // truncate
34923500
}
34933501

34943502
void Item_func_conv_charset::fix_length_and_dec()

sql/sql_class.cc

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2093,21 +2093,17 @@ LEX_STRING *THD::make_lex_string(LEX_STRING *lex_str,
20932093
/*
20942094
Convert a string to another character set
20952095
2096-
SYNOPSIS
2097-
convert_string()
2098-
to Store new allocated string here
2099-
to_cs New character set for allocated string
2100-
from String to convert
2101-
from_length Length of string to convert
2102-
from_cs Original character set
2096+
@param to Store new allocated string here
2097+
@param to_cs New character set for allocated string
2098+
@param from String to convert
2099+
@param from_length Length of string to convert
2100+
@param from_cs Original character set
21032101
2104-
NOTES
2105-
to will be 0-terminated to make it easy to pass to system funcs
2102+
@note to will be 0-terminated to make it easy to pass to system funcs
21062103
2107-
RETURN
2108-
0 ok
2109-
1 End of memory.
2110-
In this case to->str will point to 0 and to->length will be 0.
2104+
@retval false ok
2105+
@retval true End of memory.
2106+
In this case to->str will point to 0 and to->length will be 0.
21112107
*/
21122108

21132109
bool THD::convert_string(LEX_STRING *to, const CHARSET_INFO *to_cs,
@@ -2116,15 +2112,25 @@ bool THD::convert_string(LEX_STRING *to, const CHARSET_INFO *to_cs,
21162112
{
21172113
DBUG_ENTER("convert_string");
21182114
size_t new_length= to_cs->mbmaxlen * from_length;
2119-
uint dummy_errors;
2115+
uint errors= 0;
21202116
if (!(to->str= (char*) alloc(new_length+1)))
21212117
{
21222118
to->length= 0; // Safety fix
21232119
DBUG_RETURN(1); // EOM
21242120
}
21252121
to->length= copy_and_convert((char*) to->str, new_length, to_cs,
2126-
from, from_length, from_cs, &dummy_errors);
2122+
from, from_length, from_cs, &errors);
21272123
to->str[to->length]=0; // Safety
2124+
if (errors != 0)
2125+
{
2126+
char printable_buff[32];
2127+
convert_to_printable(printable_buff, sizeof(printable_buff),
2128+
from, from_length, from_cs, 6);
2129+
push_warning_printf(this, Sql_condition::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
2130+
"Can't convert the character string from %s to %s: '%.64s'",
2131+
from_cs->csname, to_cs->csname, printable_buff);
2132+
}
2133+
21282134
DBUG_RETURN(0);
21292135
}
21302136

sql/sql_string.cc

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
/* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
1+
/* Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights
2+
* reserved.
23
34
This program is free software; you can redistribute it and/or modify
45
it under the terms of the GNU General Public License as published by
@@ -1167,3 +1168,69 @@ uint convert_to_printable(char *to, size_t to_len,
11671168
*t= '\0';
11681169
return t - to;
11691170
}
1171+
1172+
/**
1173+
Check if an input byte sequence is a valid character string of a given charset
1174+
1175+
@param cs The input character set.
1176+
@param str The input byte sequence to validate.
1177+
@param length A byte length of the str.
1178+
@param [out] valid_length A byte length of a valid prefix of the str.
1179+
@param [out] length_error True in the case of a character length error:
1180+
some byte[s] in the input is not a valid
1181+
prefix for a character, i.e. the byte length
1182+
of that invalid character is undefined.
1183+
1184+
@retval true if the whole input byte sequence is a valid character string.
1185+
The length_error output parameter is undefined.
1186+
1187+
@return
1188+
if the whole input byte sequence is a valid character string
1189+
then
1190+
return false
1191+
else
1192+
if the length of some character in the input is undefined (MY_CS_ILSEQ)
1193+
or the last character is truncated (MY_CS_TOOSMALL)
1194+
then
1195+
*length_error= true; // fatal error!
1196+
else
1197+
*length_error= false; // non-fatal error: there is no wide character
1198+
// encoding for some input character
1199+
return true
1200+
*/
1201+
bool validate_string(const CHARSET_INFO *cs, const char *str, uint32 length,
1202+
size_t *valid_length, bool *length_error)
1203+
{
1204+
if (cs->mbmaxlen > 1)
1205+
{
1206+
int well_formed_error;
1207+
*valid_length= cs->cset->well_formed_len(cs, str, str + length,
1208+
length, &well_formed_error);
1209+
*length_error= well_formed_error;
1210+
return well_formed_error;
1211+
}
1212+
1213+
/*
1214+
well_formed_len() is not functional on single-byte character sets,
1215+
so use mb_wc() instead:
1216+
*/
1217+
*length_error= false;
1218+
1219+
const uchar *from= reinterpret_cast<const uchar *>(str);
1220+
const uchar *from_end= from + length;
1221+
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
1222+
1223+
while (from < from_end)
1224+
{
1225+
my_wc_t wc;
1226+
int cnvres= (*mb_wc)(cs, &wc, (uchar*) from, from_end);
1227+
if (cnvres <= 0)
1228+
{
1229+
*valid_length= from - reinterpret_cast<const uchar *>(str);
1230+
return true;
1231+
}
1232+
from+= cnvres;
1233+
}
1234+
*valid_length= length;
1235+
return false;
1236+
}

sql/sql_string.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
#ifndef SQL_STRING_INCLUDED
22
#define SQL_STRING_INCLUDED
33

4-
/* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
4+
/* Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights
5+
* reserved.
56
67
This program is free software; you can redistribute it and/or modify
78
it under the terms of the GNU General Public License as published by
@@ -600,4 +601,7 @@ static inline bool check_if_only_end_space(const CHARSET_INFO *cs, char *str,
600601
return str+ cs->cset->scan(cs, str, end, MY_SEQ_SPACES) == end;
601602
}
602603

604+
bool
605+
validate_string(const CHARSET_INFO *cs, const char *str, uint32 length,
606+
size_t *valid_length, bool *length_error);
603607
#endif /* SQL_STRING_INCLUDED */

sql/sql_yacc.yy

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13454,7 +13454,9 @@ literal:
1345413454
str ? str->length() : 0,
1345513455
$1);
1345613456
if (!item_str ||
13457-
!item_str->check_well_formed_result(&item_str->str_value, TRUE))
13457+
!item_str->check_well_formed_result(&item_str->str_value,
13458+
true, //send error
13459+
true)) //truncate
1345813460
{
1345913461
MYSQL_YYABORT;
1346013462
}
@@ -13483,7 +13485,9 @@ literal:
1348313485
str ? str->length() : 0,
1348413486
$1);
1348513487
if (!item_str ||
13486-
!item_str->check_well_formed_result(&item_str->str_value, TRUE))
13488+
!item_str->check_well_formed_result(&item_str->str_value,
13489+
true, //send error
13490+
true)) //truncate
1348713491
{
1348813492
MYSQL_YYABORT;
1348913493
}

0 commit comments

Comments
 (0)