Skip to content

Commit 4631377

Browse files
author
Tor Didriksen
committed
Bug#35296563 performance regression since 8.0.22+ of simple query doing table scan with LIKE '%....%';
The benchmarks from Bug #32831830 REGEXP TAKES MORE THAN 10X TIME WHEN COMPARED TO LIKE have been backported to mysql-8.0.21-release Timings from ./bin/item_func_regexp-t --gtest_filter="Microbenchmarks.BM_LikeNoMatch" on my desktop, Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz mysql-8.0.21-release 22 ns/iter trunk, original 32 ns/iter trunk, with current patch 22 ns/iter The patch optimizes eval_string_arg() for the common case: do string operations with arguments belonging to the same character set. Conversion verification and error checking is now delegatedd to eval_string_arg_noinline() Change-Id: Iad1fe8675fa2aee1a6a1840629d988ee0dfd37dd
1 parent 98e3836 commit 4631377

File tree

7 files changed

+57
-49
lines changed

7 files changed

+57
-49
lines changed

include/my_sys.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -894,7 +894,6 @@ extern bool resolve_collation(const char *cl_name,
894894
const CHARSET_INFO *default_cl,
895895
const CHARSET_INFO **cl);
896896
extern char *get_charsets_dir(char *buf);
897-
extern bool my_charset_same(const CHARSET_INFO *cs1, const CHARSET_INFO *cs2);
898897
extern bool init_compiled_charsets(myf flags);
899898
extern size_t escape_string_for_mysql(const CHARSET_INFO *charset_info,
900899
char *to, size_t to_length,

include/mysql/strings/m_ctype.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,11 @@
3030

3131
#include <sys/types.h>
3232

33+
#include <cassert>
3334
#include <cstddef>
3435
#include <cstdint>
3536
#include <cstdlib>
37+
#include <cstring>
3638
#include <deque>
3739

3840
#include "mysql/attribute.h"
@@ -513,6 +515,12 @@ inline bool my_charset_is_ascii_based(const CHARSET_INFO *cs) {
513515
return (cs->state & MY_CS_NONASCII) == 0;
514516
}
515517

518+
inline bool my_charset_same(const CHARSET_INFO *cs1, const CHARSET_INFO *cs2) {
519+
assert(0 != strcmp(cs1->csname, "utf8"));
520+
assert(0 != strcmp(cs2->csname, "utf8"));
521+
return ((cs1 == cs2) || !strcmp(cs1->csname, cs2->csname));
522+
}
523+
516524
MYSQL_STRINGS_EXPORT unsigned my_charset_repertoire(const CHARSET_INFO *cs);
517525

518526
MYSQL_STRINGS_EXPORT unsigned my_strxfrm_flag_normalize(unsigned flags);

include/sql_string.h

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
#include "memory_debugging.h"
4242
#include "my_alloc.h"
4343
#include "my_compiler.h"
44+
#include "my_sys.h"
4445

4546
#include "my_inttypes.h"
4647
#include "mysql/components/services/bits/psi_bits.h"
@@ -607,6 +608,35 @@ class String {
607608
char *dup(MEM_ROOT *root) const;
608609
};
609610

611+
/**
612+
Checks that the source string can be just copied to the destination string
613+
without conversion.
614+
615+
@param arg_length Length of string to copy.
616+
@param from_cs Character set to copy from
617+
@param to_cs Character set to copy to
618+
@param *offset Returns number of unaligned characters.
619+
620+
@returns true if conversion is required, false otherwise.
621+
622+
@note
623+
to_cs may be nullptr for "no conversion" if the system variable
624+
character_set_results is NULL.
625+
*/
626+
627+
inline bool String::needs_conversion(size_t arg_length,
628+
const CHARSET_INFO *from_cs,
629+
const CHARSET_INFO *to_cs,
630+
size_t *offset) {
631+
*offset = 0;
632+
if (to_cs == nullptr || (to_cs == &my_charset_bin) || from_cs == to_cs ||
633+
my_charset_same(from_cs, to_cs) ||
634+
((from_cs == &my_charset_bin) &&
635+
(0 == (*offset = (arg_length % to_cs->mbminlen)))))
636+
return false;
637+
return true;
638+
}
639+
610640
static inline void swap(String &a, String &b) noexcept { a.swap(b); }
611641

612642
static inline std::string to_string(const String &str) {

mysys/charset.cc

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -75,12 +75,6 @@
7575

7676
extern CHARSET_INFO my_charset_cp932_japanese_ci;
7777

78-
bool my_charset_same(const CHARSET_INFO *cs1, const CHARSET_INFO *cs2) {
79-
assert(0 != strcmp(cs1->csname, "utf8"));
80-
assert(0 != strcmp(cs2->csname, "utf8"));
81-
return ((cs1 == cs2) || !strcmp(cs1->csname, cs2->csname));
82-
}
83-
8478
namespace {
8579

8680
class Mysys_charset_loader : public MY_CHARSET_LOADER {

sql-common/sql_string.cc

Lines changed: 0 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -249,39 +249,6 @@ bool String::copy(const char *str, size_t arg_length, const CHARSET_INFO *cs) {
249249
return false;
250250
}
251251

252-
/*
253-
Checks that the source string can be just copied to the destination string
254-
without conversion.
255-
256-
SYNPOSIS
257-
258-
needs_conversion()
259-
arg_length Length of string to copy.
260-
from_cs Character set to copy from
261-
to_cs Character set to copy to
262-
uint32 *offset Returns number of unaligned characters.
263-
264-
RETURN
265-
0 No conversion needed
266-
1 Either character set conversion or adding leading zeros
267-
(e.g. for UCS-2) must be done
268-
269-
NOTE
270-
to_cs may be NULL for "no conversion" if the system variable
271-
character_set_results is NULL.
272-
*/
273-
274-
bool String::needs_conversion(size_t arg_length, const CHARSET_INFO *from_cs,
275-
const CHARSET_INFO *to_cs, size_t *offset) {
276-
*offset = 0;
277-
if (!to_cs || (to_cs == &my_charset_bin) || (to_cs == from_cs) ||
278-
my_charset_same(from_cs, to_cs) ||
279-
((from_cs == &my_charset_bin) &&
280-
(!(*offset = (arg_length % to_cs->mbminlen)))))
281-
return false;
282-
return true;
283-
}
284-
285252
/*
286253
Checks that the source string can just be copied to the destination string
287254
without conversion.

sql/item_func.cc

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -258,19 +258,17 @@ bool simplify_string_args(THD *thd, const DTCollation &c, Item **args,
258258
@returns string pointer if success, NULL if error or NULL value
259259
*/
260260

261-
String *eval_string_arg(const CHARSET_INFO *to_cs, Item *arg, String *buffer) {
262-
StringBuffer<STRING_BUFFER_USUAL_SIZE> local_string(nullptr, 0, to_cs);
263-
261+
String *eval_string_arg_noinline(const CHARSET_INFO *to_cs, Item *arg,
262+
String *buffer) {
264263
size_t offset;
265264
const bool convert =
266265
String::needs_conversion(0, arg->collation.collation, to_cs, &offset);
267-
String *res = arg->val_str(convert ? &local_string : buffer);
268266

269-
// Return immediately if argument is a NULL value, or there was an error
270-
if (res == nullptr) {
271-
return nullptr;
272-
}
273267
if (convert) {
268+
StringBuffer<STRING_BUFFER_USUAL_SIZE> local_string(nullptr, 0, to_cs);
269+
String *res = arg->val_str(&local_string);
270+
// Return immediately if argument is a NULL value, or there was an error
271+
if (res == nullptr) return nullptr;
274272
/*
275273
String must be converted from source character set. It has been built
276274
in the "local_string" buffer and will be copied with conversion into the
@@ -285,6 +283,10 @@ String *eval_string_arg(const CHARSET_INFO *to_cs, Item *arg, String *buffer) {
285283
}
286284
return buffer;
287285
}
286+
String *res = arg->val_str(buffer);
287+
// Return immediately if argument is a NULL value, or there was an error
288+
if (res == nullptr) return nullptr;
289+
288290
// If source is a binary string, the string may have to be validated:
289291
if (to_cs != &my_charset_bin && arg->collation.collation == &my_charset_bin &&
290292
!res->is_valid_string(to_cs)) {

sql/item_func.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,15 @@ void report_conversion_error(const CHARSET_INFO *to_cs, const char *from,
8888
bool simplify_string_args(THD *thd, const DTCollation &c, Item **items,
8989
uint nitems);
9090

91-
String *eval_string_arg(const CHARSET_INFO *to_cs, Item *arg, String *buffer);
91+
String *eval_string_arg_noinline(const CHARSET_INFO *to_cs, Item *arg,
92+
String *buffer);
93+
94+
inline String *eval_string_arg(const CHARSET_INFO *to_cs, Item *arg,
95+
String *buffer) {
96+
if (my_charset_same(to_cs, arg->collation.collation))
97+
return arg->val_str(buffer);
98+
return eval_string_arg_noinline(to_cs, arg, buffer);
99+
}
92100

93101
class Item_func : public Item_result_field {
94102
protected:

0 commit comments

Comments
 (0)