Skip to content

Commit 5f8fe25

Browse files
Bug#30242141 MYSQLD HANGS INDEFINITELY AT STARTUP IF IT IS UNABLE TO ACQUIRE GSL
PROBLEM ======= When doing alter, mysqld holds the Global Schema Lock(GSL) for the entire time. Until cluster-7.6, when a mysqld tried to connect to the cluster, it would try to acquire the GSL during setup, if GSL is already acquired by another mysqld, it waits for a particular amount of seconds, specified by the ndb-wait-setup config and then starts the mysqld with out NDB. In 8.0, well before the server calls the hook, which preforms the wait for ndb-wait-setup, it makes some update to the DD, as a part of that, it tries to acquire mdl on the table. This gets sent down to the NDB plugin via ndbcluster_notify_exclusive_mdl() and it is stuck trying to acquire the GSL on the table. So, even the time specified in ndb-wait-setup expires, mysqld will not start and keeps waiting for GSL. FIX === Fix by delaying the registration of all handler callback functions that takes GSL until NDB startup is initiated. Change-Id: Ibcdc93663e1730ceb6abc022ad0804ce796f397a
1 parent eb7a065 commit 5f8fe25

File tree

5 files changed

+76
-13
lines changed

5 files changed

+76
-13
lines changed
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
CREATE TABLE t1 (a int) engine ndb;
2+
ALTER TABLE t1 ALGORITHM = COPY, ADD COLUMN b INT;;
3+
# restart
4+
ERROR HY000: Can't create destination table for copying alter table
5+
# restart
6+
drop table t1;
7+
include/assert_grep.inc [Checking if ndb-wait-setup timeout]
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
!include ../my.cnf
2+
3+
[mysqld]
4+
5+
ndb-wait-setup=60
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
--source include/have_debug.inc
2+
--source include/have_multi_ndb.inc
3+
--source have_ndb_error_insert.inc
4+
5+
# Delay alter table infinitely using error insert
6+
--connection server1
7+
--let $ndb_desc_file= $MYSQLTEST_VARDIR/log/mysqld.2.1.err
8+
CREATE TABLE t1 (a int) engine ndb;
9+
# error insert 6217, will keep Dbdict::execALTER_TABLE_REQ() in an loop with a interval of 1 sec.
10+
# As the signal is not allowed to excuted, ALTER will be struck with holding GSL for some period of time.
11+
--exec $NDB_MGM -e'all error 6217' > /dev/null
12+
--send ALTER TABLE t1 ALGORITHM = COPY, ADD COLUMN b INT;
13+
14+
15+
# As GSL is held by alter in Server1, restart Server2 and check if ndb-wait-setup timeout occurs
16+
# Without patch, restart will hang for more than ndb-wait-setup
17+
--connection server2
18+
--disable_query_log
19+
call mtr.add_suppression("NDB: Tables not available after [0-9]* seconds. Consider increasing --ndb-wait-setup value");
20+
--enable_query_log
21+
--let $mysqld_name=mysqld.2.1
22+
--source include/restart_mysqld.inc
23+
24+
# Reap for alter in Server1, and thereby release GSL
25+
--connection server1
26+
--error 1005
27+
--reap
28+
29+
# Restart Server2 and check if it starts with NDB
30+
--connection server2
31+
--exec $NDB_MGM -e'all error 0' > /dev/null
32+
--let $mysqld_name=mysqld.2.1
33+
--source include/restart_mysqld.inc
34+
drop table t1;
35+
36+
--let $assert_text= Checking if ndb-wait-setup timeout
37+
--let $assert_select=Tables not available after
38+
--let $assert_file= $ndb_desc_file
39+
--let $assert_count= 1
40+
--source include/assert_grep.inc

storage/ndb/plugin/ha_ndbcluster.cc

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -866,6 +866,12 @@ ulong opt_ndb_slave_conflict_role;
866866
static int handle_conflict_op_error(NdbTransaction *trans, const NdbError &err,
867867
const NdbOperation *op);
868868

869+
static bool ndbcluster_notify_alter_table(THD *, const MDL_key *,
870+
ha_notification_type);
871+
872+
static bool ndbcluster_notify_exclusive_mdl(THD *, const MDL_key *,
873+
ha_notification_type, bool *);
874+
869875
static int handle_row_conflict(
870876
NDB_CONFLICT_FN_SHARE *cfn_share, const char *tab_name,
871877
const char *handling_type, const NdbRecord *key_rec,
@@ -10156,6 +10162,10 @@ int rename_table_impl(THD *thd, Ndb *ndb,
1015610162
}
1015710163
});
1015810164

10165+
Thd_ndb *thd_ndb = get_thd_ndb(thd);
10166+
if (!thd_ndb->has_required_global_schema_lock("ha_ndbcluster::rename_table"))
10167+
return HA_ERR_NO_CONNECTION;
10168+
1015910169
NDBDICT *dict = ndb->getDictionary();
1016010170
NDBDICT::List index_list;
1016110171
if (my_strcasecmp(system_charset_info, new_dbname, old_dbname)) {
@@ -10206,7 +10216,6 @@ int rename_table_impl(THD *thd, Ndb *ndb,
1020610216
NDB_SHARE_KEY *new_key = NDB_SHARE::create_key(to);
1020710217
(void)NDB_SHARE::rename_share(share, new_key);
1020810218

10209-
Thd_ndb *thd_ndb = get_thd_ndb(thd);
1021010219
Ndb_DDL_transaction_ctx *ddl_ctx = thd_ndb->get_ddl_transaction_ctx(false);
1021110220
const bool rollback_in_progress =
1021210221
(ddl_ctx != nullptr && ddl_ctx->rollback_in_progress());
@@ -10499,10 +10508,6 @@ int ha_ndbcluster::rename_table(const char *from, const char *to,
1049910508
return HA_WRONG_CREATE_OPTION;
1050010509
}
1050110510

10502-
Thd_ndb *thd_ndb = get_thd_ndb(thd);
10503-
if (!thd_ndb->has_required_global_schema_lock("ha_ndbcluster::rename_table"))
10504-
return HA_ERR_NO_CONNECTION;
10505-
1050610511
// Open the table which is to be renamed(aka. the old)
1050710512
Ndb *ndb = get_ndb(thd);
1050810513
ndb->setDatabaseName(old_dbname);
@@ -10901,10 +10906,6 @@ int ha_ndbcluster::delete_table(const char *path, const dd::Table *) {
1090110906
}
1090210907

1090310908
Thd_ndb *thd_ndb = get_thd_ndb(thd);
10904-
if (!thd_ndb->has_required_global_schema_lock(
10905-
"ha_ndbcluster::delete_table")) {
10906-
return HA_ERR_NO_CONNECTION;
10907-
}
1090810909

1090910910
if (ndb_name_is_temp(m_tabname)) {
1091010911
const char *orig_table_name =
@@ -10930,6 +10931,11 @@ int ha_ndbcluster::delete_table(const char *path, const dd::Table *) {
1093010931
return 0;
1093110932
}
1093210933

10934+
if (!thd_ndb->has_required_global_schema_lock(
10935+
"ha_ndbcluster::delete_table")) {
10936+
return HA_ERR_NO_CONNECTION;
10937+
}
10938+
1093310939
/* This the final phase of a copy alter. Delay the drop of the table with
1093410940
temp name until after commit so that when required, a rollback would be
1093510941
possible. Log it in the ddl_ctx and return. It will be dropped after
@@ -11885,6 +11891,8 @@ static int ndb_wait_setup_func(ulong max_wait) {
1188511891
*/
1188611892

1188711893
static int ndb_wait_setup_server_startup(void *) {
11894+
ndbcluster_hton->notify_alter_table = ndbcluster_notify_alter_table;
11895+
ndbcluster_hton->notify_exclusive_mdl = ndbcluster_notify_exclusive_mdl;
1188811896
// Signal components that server is started
1188911897
ndb_index_stat_thread.set_server_started();
1189011898
ndbcluster_binlog_set_server_started();
@@ -12227,9 +12235,9 @@ static int ndbcluster_init(void *handlerton_ptr) {
1222712235

1222812236
hton->check_fk_column_compat = ndbcluster_check_fk_column_compat;
1222912237
hton->pre_dd_shutdown = ndbcluster_pre_dd_shutdown;
12230-
hton->notify_alter_table = ndbcluster_notify_alter_table;
12231-
hton->notify_exclusive_mdl = ndbcluster_notify_exclusive_mdl;
1223212238

12239+
// notify_alter_table and notify_exclusive_mdl will be registered latter
12240+
// SO, that GSL will not be held unnecessary for non-ndb tables.
1223312241
hton->post_ddl = ndbcluster_post_ddl;
1223412242

1223512243
// Initialize NdbApi

storage/ndb/plugin/ndb_schema_dist.cc

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,10 @@
2929
#include <mutex>
3030

3131
#include "my_dbug.h"
32+
#include "mysqld_error.h"
3233
#include "ndbapi/ndb_cluster_connection.hpp"
3334
#include "sql/query_options.h" // OPTION_BIN_LOG
35+
#include "sql/sql_error.h"
3436
#include "sql/sql_thd_internal_api.h"
3537
#include "storage/ndb/plugin/ndb_anyvalue.h"
3638
#include "storage/ndb/plugin/ndb_dist_priv_util.h"
@@ -117,7 +119,8 @@ bool Ndb_schema_dist_client::prepare(const char *db, const char *tabname) {
117119
DBUG_EVALUATE_IF("ndb_schema_dist_not_ready_early", true, false)) {
118120
// The NDB_SHARE for mysql.ndb_schema hasn't been created or not setup
119121
// yet -> schema distribution is not ready
120-
m_thd_ndb->push_warning("Schema distribution is not ready");
122+
push_warning(m_thd, Sql_condition::SL_WARNING, ER_GET_ERRMSG,
123+
"Schema distribution is not ready");
121124
return false;
122125
}
123126

@@ -247,7 +250,7 @@ Ndb_schema_dist_client::~Ndb_schema_dist_client() {
247250
NDB_SHARE::release_reference(m_share, "ndb_schema_dist_client");
248251
}
249252

250-
if (m_thd_ndb->is_slave_thread()) {
253+
if (m_thd_ndb && m_thd_ndb->is_slave_thread()) {
251254
// Copy-out slave thread statistics
252255
// NOTE! This is just a "convenient place" to call this
253256
// function, it could be moved to "end of statement"(if there

0 commit comments

Comments
 (0)