Skip to content

Commit e619295

Browse files
author
Sujatha Sivakumar
committed
Bug#24901077: RESET SLAVE ALL DOES NOT ALWAYS RESET SLAVE
Description: ============ If you have a relay log index file that has ended up with some relay log files that do not exists, then RESET SLAVE ALL is not enough to get back to a clean state. Analysis: ========= In the bug scenario slave server is in stopped state and some of the relay logs got deleted but the relay log index file is not updated. During slave server restart replication initialization fails as some of the required relay logs are missing. User executes RESET SLAVE/RESET SLAVE ALL command to start a clean slave. As per the documentation RESET SLAVE command clears the master info and relay log info repositories, deletes all the relay log files, and starts a new relay log file. But in a scenario where the slave server's Relay_log_info object is not initialized slave will not purge the existing relay logs. Hence the index file still remains in a bad state. Users will not be able to start the slave unless these files are cleared. Fix: === RESET SLAVE/RESET SLAVE ALL commands should do the cleanup even in a scenario where Relay_log_info object initialization failed. Backported a flag named 'error_on_rli_init_info' which is required to identify slave's Relay_log_info object initialization failure. This flag exists in MySQL-5.6 onwards as part of BUG#14021292 fix. During RESET SLAVE/RESET SLAVE ALL execution this flag indicates the Relay_log_info initialization failure. In such a case open the relay log index/relay log files and do the required clean up.
1 parent 9181a56 commit e619295

File tree

7 files changed

+176
-15
lines changed

7 files changed

+176
-15
lines changed
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
include/master-slave.inc
2+
[connection master]
3+
CREATE TABLE t1 (c1 INT);
4+
INSERT INTO t1 (c1) VALUES (1);
5+
include/stop_slave_sql.inc
6+
FLUSH LOGS;
7+
FLUSH LOGS;
8+
INSERT INTO t1 (c1) VALUES (2);
9+
include/sync_slave_io_with_master.inc
10+
call mtr.add_suppression("File '.*slave-relay-bin.");
11+
call mtr.add_suppression("Could not open log file");
12+
call mtr.add_suppression("Failed to open the relay log");
13+
call mtr.add_suppression("Failed to initialize the master info structure");
14+
include/rpl_stop_server.inc [server_number=2]
15+
# Removing file(s)
16+
include/rpl_start_server.inc [server_number=2]
17+
START SLAVE;
18+
ERROR HY000: Could not initialize master info structure; more error messages can be found in the MySQL error log
19+
START SLAVE;
20+
ERROR HY000: Could not initialize master info structure; more error messages can be found in the MySQL error log
21+
RESET SLAVE;
22+
DROP TABLE t1;
23+
START SLAVE UNTIL MASTER_LOG_FILE= 'MASTER_LOG_FILE', MASTER_LOG_POS= MASTER_LOG_POS;;
24+
include/wait_for_slave_sql_to_stop.inc
25+
include/stop_slave_io.inc
26+
include/start_slave.inc
27+
include/diff_tables.inc [master:t1, slave:t1]
28+
DROP TABLE t1;
29+
include/rpl_end.inc
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
###############################################################################
2+
# Bug#24901077: RESET SLAVE ALL DOES NOT ALWAYS RESET SLAVE
3+
#
4+
# Problem:
5+
# =======
6+
# If you have a relay log index file that has ended up with
7+
# some relay log files that do not exists, then RESET SLAVE
8+
# ALL is not enough to get back to a clean state.
9+
###############################################################################
10+
# Remove all slave-relay-bin.0* files (do not remove slave-relay-bin.index)
11+
# During server restart rli initialization will fail as there are no
12+
# relay logs. In case of bug RESET SLAVE will not do the required clean up
13+
# as rli is not inited and subsequent START SLAVE will fail.
14+
# Disable "Warning 1612 Being purged log ./slave-relay-bin.0* was not found"
15+
# because it is different on Unix and Windows systems.
16+
17+
--source include/have_binlog_format_mixed.inc
18+
--source include/master-slave.inc
19+
20+
--connection master
21+
CREATE TABLE t1 (c1 INT);
22+
INSERT INTO t1 (c1) VALUES (1);
23+
--sync_slave_with_master
24+
25+
--connection slave
26+
--source include/stop_slave_sql.inc
27+
--let $MYSQLD_SLAVE_DATADIR= `select @@datadir`
28+
29+
--connection master
30+
# Generate more relay logs on slave.
31+
FLUSH LOGS;
32+
FLUSH LOGS;
33+
INSERT INTO t1 (c1) VALUES (2);
34+
35+
--source include/sync_slave_io_with_master.inc
36+
call mtr.add_suppression("File '.*slave-relay-bin.");
37+
call mtr.add_suppression("Could not open log file");
38+
call mtr.add_suppression("Failed to open the relay log");
39+
call mtr.add_suppression("Failed to initialize the master info structure");
40+
41+
# Stop slave
42+
--let $rpl_server_number= 2
43+
--source include/rpl_stop_server.inc
44+
45+
# Delete file(s)
46+
--echo # Removing $remove_pattern file(s)
47+
--let $remove_pattern= slave-relay-bin.0*
48+
--remove_files_wildcard $MYSQLD_SLAVE_DATADIR $remove_pattern
49+
50+
# Start slave
51+
--let $rpl_server_number= 2
52+
--source include/rpl_start_server.inc
53+
54+
# Start slave must fail because of the removed file(s).
55+
--error ER_MASTER_INFO
56+
START SLAVE;
57+
58+
# Try a second time, it must fail again.
59+
--error ER_MASTER_INFO
60+
START SLAVE;
61+
62+
# Retrieve master executed position before reset slave.
63+
--let $master_exec_file= query_get_value("SHOW SLAVE STATUS", Relay_Master_Log_File, 1)
64+
--let $master_exec_pos= query_get_value("SHOW SLAVE STATUS", Exec_Master_Log_Pos, 1)
65+
66+
# Reset slave.
67+
# Disable "Warning 1612 Being purged log ./slave-relay-bin.0* was not found"
68+
# because it is different on Unix and Windows systems.
69+
--disable_warnings
70+
RESET SLAVE;
71+
--enable_warnings
72+
DROP TABLE t1;
73+
--replace_result $master_exec_file MASTER_LOG_FILE $master_exec_pos MASTER_LOG_POS
74+
--eval START SLAVE UNTIL MASTER_LOG_FILE= '$master_exec_file', MASTER_LOG_POS= $master_exec_pos;
75+
--source include/wait_for_slave_sql_to_stop.inc
76+
--source include/stop_slave_io.inc
77+
78+
# Start slave.
79+
--source include/start_slave.inc
80+
81+
--connection master
82+
--sync_slave_with_master
83+
# Check consistency.
84+
--let $diff_tables= master:t1, slave:t1
85+
--source include/diff_tables.inc
86+
87+
# Cleanup
88+
--connection master
89+
DROP TABLE t1;
90+
--sync_slave_with_master
91+
--source include/rpl_end.inc

sql/rpl_mi.cc

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
1+
/* Copyright (c) 2006, 2017, Oracle and/or its affiliates. All rights reserved.
22
33
This program is free software; you can redistribute it and/or modify
44
it under the terms of the GNU General Public License as published by
@@ -551,7 +551,6 @@ void end_master_info(Master_info* mi)
551551

552552
if (!mi->inited)
553553
DBUG_VOID_RETURN;
554-
end_relay_log_info(&mi->rli);
555554
if (mi->fd >= 0)
556555
{
557556
end_io_cache(&mi->file);

sql/rpl_rli.cc

Lines changed: 41 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright (c) 2006, 2013, Oracle and/or its affiliates. All rights reserved.
1+
/* Copyright (c) 2006, 2017, Oracle and/or its affiliates. All rights reserved.
22
33
This program is free software; you can redistribute it and/or modify
44
it under the terms of the GNU General Public License as published by
@@ -41,7 +41,8 @@ Relay_log_info::Relay_log_info(bool is_slave_recovery)
4141
no_storage(FALSE), replicate_same_server_id(::replicate_same_server_id),
4242
info_fd(-1), cur_log_fd(-1), relay_log(&sync_relaylog_period),
4343
sync_counter(0), is_relay_log_recovery(is_slave_recovery),
44-
save_temporary_tables(0), cur_log_old_open_count(0), group_relay_log_pos(0),
44+
save_temporary_tables(0), cur_log_old_open_count(0),
45+
error_on_rli_init_info(false), group_relay_log_pos(0),
4546
event_relay_log_pos(0),
4647
#if HAVE_purify
4748
is_fake(FALSE),
@@ -108,7 +109,7 @@ int init_relay_log_info(Relay_log_info* rli,
108109
const char* info_fname)
109110
{
110111
char fname[FN_REFLEN+128];
111-
int info_fd;
112+
int info_fd= -1;
112113
const char* msg = 0;
113114
int error = 0;
114115
DBUG_ENTER("init_relay_log_info");
@@ -118,6 +119,8 @@ int init_relay_log_info(Relay_log_info* rli,
118119
DBUG_RETURN(0);
119120
fn_format(fname, info_fname, mysql_data_home, "", 4+32);
120121
mysql_mutex_lock(&rli->data_lock);
122+
if (rli->error_on_rli_init_info)
123+
goto err;
121124
info_fd = rli->info_fd;
122125
rli->cur_log_fd = -1;
123126
rli->slave_skip_counter=0;
@@ -351,11 +354,14 @@ Failed to open the existing relay log info file '%s' (errno %d)",
351354
goto err;
352355
}
353356
rli->inited= 1;
357+
rli->error_on_rli_init_info= false;
354358
mysql_mutex_unlock(&rli->data_lock);
355359
DBUG_RETURN(error);
356360

357361
err:
358-
sql_print_error("%s", msg);
362+
rli->error_on_rli_init_info= true;
363+
if (msg)
364+
sql_print_error("%s", msg);
359365
end_io_cache(&rli->info_file);
360366
if (info_fd >= 0)
361367
mysql_file_close(info_fd, MYF(0));
@@ -942,6 +948,8 @@ int purge_relay_logs(Relay_log_info* rli, THD *thd, bool just_reset,
942948
const char** errmsg)
943949
{
944950
int error=0;
951+
const char *ln;
952+
char name_buf[FN_REFLEN];
945953
DBUG_ENTER("purge_relay_logs");
946954

947955
/*
@@ -968,12 +976,34 @@ int purge_relay_logs(Relay_log_info* rli, THD *thd, bool just_reset,
968976
if (!rli->inited)
969977
{
970978
DBUG_PRINT("info", ("rli->inited == 0"));
971-
DBUG_RETURN(0);
972-
}
973-
974-
DBUG_ASSERT(rli->slave_running == 0);
975-
DBUG_ASSERT(rli->mi->slave_running == 0);
979+
if (rli->error_on_rli_init_info)
980+
{
981+
ln= rli->relay_log.generate_name(opt_relay_logname, "-relay-bin",
982+
1, name_buf);
976983

984+
if (rli->relay_log.open_index_file(opt_relaylog_index_name, ln, TRUE))
985+
{
986+
sql_print_error("Unable to purge relay log files. Failed to open relay "
987+
"log index file:%s.", rli->relay_log.get_index_fname());
988+
DBUG_RETURN(1);
989+
}
990+
if (rli->relay_log.open(ln, LOG_BIN, 0, SEQ_READ_APPEND, 0,
991+
(max_relay_log_size ? max_relay_log_size :
992+
max_binlog_size), 1, TRUE))
993+
{
994+
sql_print_error("Unable to purge relay log files. Failed to open relay "
995+
"log file:%s.", rli->relay_log.get_log_fname());
996+
DBUG_RETURN(1);
997+
}
998+
}
999+
else
1000+
DBUG_RETURN(0);
1001+
}
1002+
else
1003+
{
1004+
DBUG_ASSERT(rli->slave_running == 0);
1005+
DBUG_ASSERT(rli->mi->slave_running == 0);
1006+
}
9771007
rli->slave_skip_counter=0;
9781008
mysql_mutex_lock(&rli->data_lock);
9791009

@@ -1013,6 +1043,8 @@ int purge_relay_logs(Relay_log_info* rli, THD *thd, bool just_reset,
10131043
rli->group_relay_log_pos,
10141044
0 /* do not need data lock */, errmsg, 0);
10151045

1046+
if (!rli->inited && rli->error_on_rli_init_info)
1047+
rli->relay_log.close(LOG_CLOSE_INDEX | LOG_CLOSE_STOP_EVENT);
10161048
err:
10171049
#ifndef DBUG_OFF
10181050
char buf[22];

sql/rpl_rli.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
1+
/* Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved.
22
33
This program is free software; you can redistribute it and/or modify
44
it under the terms of the GNU General Public License as published by
@@ -153,7 +153,14 @@ class Relay_log_info : public Slave_reporting_capability
153153
a different log under our feet
154154
*/
155155
uint32 cur_log_old_open_count;
156-
156+
157+
/*
158+
If on init_info() call error_on_rli_init_info is true that means
159+
that previous call to init_info() terminated with an error, RESET
160+
SLAVE must be executed and the problem fixed manually.
161+
*/
162+
bool error_on_rli_init_info;
163+
157164
/*
158165
Let's call a group (of events) :
159166
- a transaction

sql/slave.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
1+
/* Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
22
33
This program is free software; you can redistribute it and/or modify
44
it under the terms of the GNU General Public License as published by
@@ -881,6 +881,7 @@ void close_active_mi()
881881
if (active_mi)
882882
{
883883
end_master_info(active_mi);
884+
end_relay_log_info(&active_mi->rli);
884885
delete active_mi;
885886
active_mi= 0;
886887
}
@@ -4165,6 +4166,7 @@ void end_relay_log_info(Relay_log_info* rli)
41654166
{
41664167
DBUG_ENTER("end_relay_log_info");
41674168

4169+
rli->error_on_rli_init_info= false;
41684170
if (!rli->inited)
41694171
DBUG_VOID_RETURN;
41704172
if (rli->info_fd >= 0)

sql/sql_repl.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved.
1+
/* Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
22
33
This program is free software; you can redistribute it and/or modify
44
it under the terms of the GNU General Public License as published by
@@ -1313,6 +1313,7 @@ int reset_slave(THD *thd, Master_info* mi)
13131313

13141314
// close master_info_file, relay_log_info_file, set mi->inited=rli->inited=0
13151315
end_master_info(mi);
1316+
end_relay_log_info(&mi->rli);
13161317
// and delete these two files
13171318
fn_format(fname, master_info_file, mysql_data_home, "", 4+32);
13181319
if (mysql_file_stat(key_file_master_info, fname, &stat_area, MYF(0)) &&

0 commit comments

Comments
 (0)