Skip to content

Commit 515b220

Browse files
author
Ajo Robert
committed
Bug #18075170 SQL NODE RESTART REQUIRED TO
AVOID DEADLOCK AFTER RESTORE Analysis -------- Accessing the restored NDB table in an active multi-statement transaction was resulting in deadlock found error. MySQL Server needs to discover metadata of NDB table from data nodes after table is restored from backup. Metadata discovery happens on the first access to restored table. Current code mandates this statement to be the first one in the transaction. This is because discover needs exclusive metadata lock on the table. Lock upgrade at this point can lead to MDL deadlock and the code was written at the time when MDL deadlock detector was not present. In case when discovery attempted in the statement other than the first one in transaction ER_LOCK_DEADLOCK error is reported pessimistically. Fix: --- Removed the constraint as any potential deadlock will be handled by deadlock detector. Also changed code in discover to keep metadata locks of active transaction. Same issue was present in table auto repair scenario. Same fix is added in repair path also.
1 parent e7b6e81 commit 515b220

File tree

6 files changed

+276
-17
lines changed

6 files changed

+276
-17
lines changed

mysql-test/r/merge_recover.result renamed to mysql-test/r/myisam_recover.result

Lines changed: 52 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#
2-
# Test of MyISAM MRG tables with corrupted children.
2+
# Tests for corrupted MyISAM tables and MyISAMMRG tables with corrupted
3+
# children..
4+
#
35
# Run with --myisam-recover=force option.
46
#
57
# Preparation: we need to make sure that the merge parent
@@ -44,20 +46,20 @@ drop procedure p_create;
4446
# Switching to connection 'default'
4547
#
4648
#
47-
# We have to disable the ps-protocol, to avoid
49+
# We have to disable the ps-protocol, to avoid
4850
# "Prepared statement needs to be re-prepared" errors
4951
# -- table def versions change all the time with full table cache.
50-
#
52+
#
5153
drop table if exists t1, t1_mrg, t1_copy;
5254
#
5355
# Prepare a MERGE engine table, that refers to a corrupted
5456
# child.
55-
#
57+
#
5658
create table t1 (a int, key(a)) engine=myisam;
5759
create table t1_mrg (a int) union (t1) engine=merge;
5860
#
5961
# Create a table with a corrupted index file:
60-
# save an old index file, insert more rows,
62+
# save an old index file, insert more rows,
6163
# overwrite the new index file with the old one.
6264
#
6365
insert into t1 (a) values (1), (2), (3);
@@ -101,3 +103,48 @@ execute stmt;
101103
deallocate prepare stmt;
102104
set @@global.table_definition_cache=default;
103105
set @@global.table_open_cache=default;
106+
#
107+
# 18075170 - sql node restart required to avoid deadlock after
108+
# restore
109+
#
110+
# Check that auto-repair for MyISAM tables can now happen in the
111+
# middle of transaction, without aborting it.
112+
create table t1 (a int, key(a)) engine=myisam;
113+
create table t2 (a int);
114+
insert into t2 values (1);
115+
# Create a table with a corrupted index file:
116+
# save an old index file, insert more rows,
117+
# overwrite the new index file with the old one.
118+
insert into t1 (a) values (1);
119+
flush table t1;
120+
insert into t1 (a) values (4);
121+
flush table t1;
122+
# Check table is needed to mark the table as crashed.
123+
check table t1;
124+
Table Op Msg_type Msg_text
125+
test.t1 check warning Size of datafile is: 14 Should be: 7
126+
test.t1 check error Record-count is not ok; is 2 Should be: 1
127+
test.t1 check warning Found 2 key parts. Should be: 1
128+
test.t1 check error Corrupt
129+
# At this point we have a corrupt t1
130+
set autocommit = 0;
131+
select * from t2;
132+
a
133+
1
134+
# Without fix select from t1 will break the transaction. After the fix
135+
# transaction should be active and should hold lock on table t2. Alter
136+
# table from con2 will wait only if the transaction is not broken.
137+
select * from t1;
138+
a
139+
1
140+
4
141+
Warnings:
142+
Error 145 Table './test/t1' is marked as crashed and should be repaired
143+
Error 1194 Table 't1' is marked as crashed and should be repaired
144+
Error 1034 Number of rows changed from 1 to 2
145+
ALTER TABLE t2 ADD val INT;
146+
# With fix we should have alter table waiting for t2 lock here.
147+
ROLLBACK;
148+
SET autocommit = 1;
149+
# Cleanup
150+
drop table t1, t2;
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#
2+
# 18075170 - sql node restart required to avoid deadlock after
3+
# restore
4+
#
5+
CREATE TABLE t1 (id INT) ENGINE=NDBCluster;
6+
CREATE TABLE t2 (id INT) ENGINE=NDBCluster;
7+
INSERT INTO t1 VALUES (1);
8+
INSERT INTO t2 VALUES (1);
9+
DROP TABLE t1;
10+
DROP TABLE t2;
11+
SET autocommit = 0;
12+
SELECT * FROM t1;
13+
id
14+
1
15+
SELECT * FROM t2;
16+
id
17+
1
18+
ROLLBACK;
19+
SET autocommit = 1;
20+
drop table t1;
21+
drop table t2;
22+
SET autocommit = 0;
23+
SELECT * FROM t1;
24+
id
25+
1
26+
SELECT * FROM t2;
27+
id
28+
1
29+
ALTER TABLE t1 ADD val INT;
30+
ROLLBACK;
31+
SET autocommit = 1;
32+
drop table t1;
33+
drop table t2;
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
-- source include/have_ndb.inc
2+
-- source include/count_sessions.inc
3+
4+
--echo #
5+
--echo # 18075170 - sql node restart required to avoid deadlock after
6+
--echo # restore
7+
--echo #
8+
# Test Auto Discover option within a transaction
9+
# and make sure the transaction is not broken.
10+
CREATE TABLE t1 (id INT) ENGINE=NDBCluster;
11+
CREATE TABLE t2 (id INT) ENGINE=NDBCluster;
12+
13+
INSERT INTO t1 VALUES (1);
14+
INSERT INTO t2 VALUES (1);
15+
16+
-- source include/ndb_backup.inc
17+
18+
DROP TABLE t1;
19+
DROP TABLE t2;
20+
21+
-- source include/ndb_restore_master.inc
22+
23+
SET autocommit = 0;
24+
SELECT * FROM t1;
25+
26+
# Without fix below select was resulting in DEADLOCK error. With fix select
27+
# should succeed.
28+
SELECT * FROM t2;
29+
ROLLBACK;
30+
SET autocommit = 1;
31+
32+
drop table t1;
33+
drop table t2;
34+
35+
#
36+
# Checking lock preservation in transaction
37+
#
38+
# Using existing backup to create the scenario. Tables are deleted as part of
39+
# above test cleanup. Thus restoring the backup will bring the system to
40+
# required state.
41+
-- source include/ndb_restore_master.inc
42+
43+
SET autocommit = 0;
44+
SELECT * FROM t1;
45+
SELECT * FROM t2;
46+
47+
connect(con2, localhost, root);
48+
--SEND ALTER TABLE t1 ADD val INT
49+
50+
connection default;
51+
# Alter from con2 will be in waiting state as there is a lock on t1 from
52+
# default connection due to active transaction. We check for this condition
53+
# then releasing the lock by rollbacking active transaction.
54+
let $wait_condition=
55+
SELECT count(*) = 1 FROM information_schema.processlist WHERE state
56+
LIKE "Waiting%" AND info = "ALTER TABLE t1 ADD val INT";
57+
--source include/wait_condition.inc
58+
ROLLBACK;
59+
SET autocommit = 1;
60+
61+
connection con2;
62+
--REAP
63+
64+
disconnect con2;
65+
connection default;
66+
drop table t1;
67+
drop table t2;
68+
69+
# Wait till all disconnects are completed
70+
-- source include/wait_until_count_sessions.inc

mysql-test/t/merge_recover.test renamed to mysql-test/t/myisam_recover.test

Lines changed: 70 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
1+
--source include/count_sessions.inc
2+
3+
--echo #
4+
--echo # Tests for corrupted MyISAM tables and MyISAMMRG tables with corrupted
5+
--echo # children..
16
--echo #
2-
--echo # Test of MyISAM MRG tables with corrupted children.
37
--echo # Run with --myisam-recover=force option.
48
--echo #
59
--echo # Preparation: we need to make sure that the merge parent
@@ -57,10 +61,10 @@ eval $lock;
5761
--echo #
5862
connection default;
5963
--echo #
60-
--echo # We have to disable the ps-protocol, to avoid
64+
--echo # We have to disable the ps-protocol, to avoid
6165
--echo # "Prepared statement needs to be re-prepared" errors
6266
--echo # -- table def versions change all the time with full table cache.
63-
--echo #
67+
--echo #
6468
--disable_ps_protocol
6569
--disable_warnings
6670
drop table if exists t1, t1_mrg, t1_copy;
@@ -69,12 +73,12 @@ let $MYSQLD_DATADIR=`select @@datadir`;
6973
--echo #
7074
--echo # Prepare a MERGE engine table, that refers to a corrupted
7175
--echo # child.
72-
--echo #
76+
--echo #
7377
create table t1 (a int, key(a)) engine=myisam;
7478
create table t1_mrg (a int) union (t1) engine=merge;
7579
--echo #
7680
--echo # Create a table with a corrupted index file:
77-
--echo # save an old index file, insert more rows,
81+
--echo # save an old index file, insert more rows,
7882
--echo # overwrite the new index file with the old one.
7983
--echo #
8084
insert into t1 (a) values (1), (2), (3);
@@ -111,3 +115,64 @@ set @@global.table_open_cache=default;
111115
disconnect con1;
112116
connection default;
113117
--enable_ps_protocol
118+
119+
--echo #
120+
--echo # 18075170 - sql node restart required to avoid deadlock after
121+
--echo # restore
122+
--echo #
123+
--echo # Check that auto-repair for MyISAM tables can now happen in the
124+
--echo # middle of transaction, without aborting it.
125+
126+
connection default;
127+
128+
create table t1 (a int, key(a)) engine=myisam;
129+
create table t2 (a int);
130+
insert into t2 values (1);
131+
132+
--echo # Create a table with a corrupted index file:
133+
--echo # save an old index file, insert more rows,
134+
--echo # overwrite the new index file with the old one.
135+
insert into t1 (a) values (1);
136+
flush table t1;
137+
--copy_file $MYSQLD_DATADIR/test/t1.MYI $MYSQLD_DATADIR/test/t1_copy.MYI
138+
insert into t1 (a) values (4);
139+
flush table t1;
140+
--remove_file $MYSQLD_DATADIR/test/t1.MYI
141+
--copy_file $MYSQLD_DATADIR/test/t1_copy.MYI $MYSQLD_DATADIR/test/t1.MYI
142+
--remove_file $MYSQLD_DATADIR/test/t1_copy.MYI
143+
144+
--echo # Check table is needed to mark the table as crashed.
145+
check table t1;
146+
147+
--echo # At this point we have a corrupt t1
148+
set autocommit = 0;
149+
select * from t2;
150+
--echo # Without fix select from t1 will break the transaction. After the fix
151+
--echo # transaction should be active and should hold lock on table t2. Alter
152+
--echo # table from con2 will wait only if the transaction is not broken.
153+
select * from t1;
154+
155+
connect(con2, localhost, root);
156+
--SEND ALTER TABLE t2 ADD val INT
157+
158+
connection default;
159+
--echo # With fix we should have alter table waiting for t2 lock here.
160+
let $wait_condition=
161+
SELECT count(*) = 1 FROM information_schema.processlist WHERE state
162+
LIKE "Waiting%" AND info = "ALTER TABLE t2 ADD val INT";
163+
164+
--source include/wait_condition.inc
165+
ROLLBACK;
166+
SET autocommit = 1;
167+
168+
connection con2;
169+
--REAP
170+
171+
connection default;
172+
disconnect con2;
173+
174+
--echo # Cleanup
175+
drop table t1, t2;
176+
177+
# Wait till all disconnects are completed
178+
-- source include/wait_until_count_sessions.inc

sql/sql_base.cc

Lines changed: 51 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3972,10 +3972,11 @@ request_backoff_action(enum_open_table_action action_arg,
39723972
* We met a broken table that needs repair, or a table that
39733973
is not present on this MySQL server and needs re-discovery.
39743974
To perform the action, we need an exclusive metadata lock on
3975-
the table. Acquiring an X lock while holding other shared
3976-
locks is very deadlock-prone. If this is a multi- statement
3977-
transaction that holds metadata locks for completed
3978-
statements, we don't do it, and report an error instead.
3975+
the table. Acquiring X lock while holding other shared
3976+
locks can easily lead to deadlocks. We rely on MDL deadlock
3977+
detector to discover them. If this is a multi-statement
3978+
transaction that holds metadata locks for completed statements,
3979+
we should keep these locks after discovery/repair.
39793980
The action type in this case is OT_DISCOVER or OT_REPAIR.
39803981
* Our attempt to acquire an MDL lock lead to a deadlock,
39813982
detected by the MDL deadlock detector. The current
@@ -4016,7 +4017,7 @@ request_backoff_action(enum_open_table_action action_arg,
40164017
keep tables open between statements and a livelock
40174018
is not possible.
40184019
*/
4019-
if (action_arg != OT_REOPEN_TABLES && m_has_locks)
4020+
if (action_arg == OT_BACKOFF_AND_RETRY && m_has_locks)
40204021
{
40214022
my_error(ER_LOCK_DEADLOCK, MYF(0));
40224023
m_thd->mark_transaction_to_rollback(true);
@@ -4043,6 +4044,32 @@ request_backoff_action(enum_open_table_action action_arg,
40434044
}
40444045

40454046

4047+
/**
4048+
An error handler to mark transaction to rollback on DEADLOCK error
4049+
during DISCOVER / REPAIR.
4050+
*/
4051+
class MDL_deadlock_discovery_repair_handler : public Internal_error_handler
4052+
{
4053+
public:
4054+
virtual bool handle_condition(THD *thd,
4055+
uint sql_errno,
4056+
const char* sqlstate,
4057+
MYSQL_ERROR::enum_warning_level level,
4058+
const char* msg,
4059+
MYSQL_ERROR ** cond_hdl)
4060+
{
4061+
if (sql_errno == ER_LOCK_DEADLOCK)
4062+
{
4063+
thd->mark_transaction_to_rollback(true);
4064+
}
4065+
/*
4066+
We have marked this transaction to rollback. Return false to allow
4067+
error to be reported or handled by other handlers.
4068+
*/
4069+
return false;
4070+
}
4071+
};
4072+
40464073
/**
40474074
Recover from failed attempt of open table by performing requested action.
40484075
@@ -4058,6 +4085,12 @@ Open_table_context::
40584085
recover_from_failed_open()
40594086
{
40604087
bool result= FALSE;
4088+
MDL_deadlock_discovery_repair_handler handler;
4089+
/*
4090+
Install error handler to mark transaction to rollback on DEADLOCK error.
4091+
*/
4092+
m_thd->push_internal_handler(&handler);
4093+
40614094
/* Execute the action. */
40624095
switch (m_action)
40634096
{
@@ -4079,7 +4112,12 @@ recover_from_failed_open()
40794112

40804113
m_thd->warning_info->clear_warning_info(m_thd->query_id);
40814114
m_thd->clear_error(); // Clear error message
4082-
m_thd->mdl_context.release_transactional_locks();
4115+
/*
4116+
Rollback to start of the current statement to release exclusive lock
4117+
on table which was discovered but preserve locks from previous statements
4118+
in current transaction.
4119+
*/
4120+
m_thd->mdl_context.rollback_to_savepoint(start_of_statement_svp());
40834121
break;
40844122
}
40854123
case OT_REPAIR:
@@ -4093,12 +4131,18 @@ recover_from_failed_open()
40934131
m_failed_table->table_name, FALSE);
40944132

40954133
result= auto_repair_table(m_thd, m_failed_table);
4096-
m_thd->mdl_context.release_transactional_locks();
4134+
/*
4135+
Rollback to start of the current statement to release exclusive lock
4136+
on table which was discovered but preserve locks from previous statements
4137+
in current transaction.
4138+
*/
4139+
m_thd->mdl_context.rollback_to_savepoint(start_of_statement_svp());
40974140
break;
40984141
}
40994142
default:
41004143
DBUG_ASSERT(0);
41014144
}
4145+
m_thd->pop_internal_handler();
41024146
/*
41034147
Reset the pointers to conflicting MDL request and the
41044148
TABLE_LIST element, set when we need auto-discovery or repair,

0 commit comments

Comments
 (0)