Skip to content

Commit 0404fb7

Browse files
author
Manish Kumar
committed
BUG#21641780 - CRASH ON 'START/STOP SLAVE FOR CHANNEL 'GROUP_REPLICATION_APPLIER'
Problem : When we have a group running a user can execute: START SLAVE FOR CHANNEL 'group_replication_applier'; STOP SLAVE FOR CHANNEL 'group_replication_applier'; The STOP SLAVE command sets the FDE (FormatDescriptionEvent) of the channel to NULL. So an attempt to queue any event after that causes a crash in the Transaction_boundary_parser::get_event_boundary_type as a NULL value is being used. For the normal slave, queue_event() is only executed by the IO thread that controls the removal of the FDE so there is no issue there. In Group Replication we set our a "local" master info object (that is replaced if the channel IO thread connects) for scenarios where there is no IO thread and that gets deleted due to STOP SLAVE. Solution : The solution implemented here are : Inside rpl_slave.cc::queue_event() check if FDE is NULL, if it is then we throw an error. Disallow execution of START/STOP SLAVE and IO_THREAD on FOR CHANNEL 'group_replication_applier'; We also disallow the execution of SHOW SLAVE STATUS for channel group_replication_applier'.
1 parent bc5eac7 commit 0404fb7

File tree

1 file changed

+63
-0
lines changed

1 file changed

+63
-0
lines changed

sql/rpl_slave.cc

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
#include "sql_plugin.h" // opt_plugin_dir_ptr
5151
#include "transaction.h" // trans_begin
5252
#include "tztime.h" // Time_zone
53+
#include "rpl_group_replication.h"
5354

5455
// Sic: Must be after mysqld.h to get the right ER macro.
5556
#include "errmsg.h" // CR_*
@@ -690,6 +691,19 @@ bool start_slave_cmd(THD *thd)
690691
{
691692
mi= msr_map.get_mi(lex->mi.channel);
692693

694+
/*
695+
If the channel being used is a group replication channel we need to
696+
disable the START SLAVE [IO_THREAD] command as group replication does
697+
not support the command.
698+
*/
699+
if (mi && msr_map.is_group_replication_channel_name(mi->get_channel(), true)
700+
&& (!thd->lex->slave_thd_opt || (thd->lex->slave_thd_opt & SLAVE_IO)))
701+
{
702+
my_error(ER_SLAVE_CHANNEL_OPERATION_NOT_ALLOWED, MYF(0),
703+
"START SLAVE [IO_THREAD] FOR CHANNEL", mi->get_channel());
704+
goto err;
705+
}
706+
693707
if (mi)
694708
res= start_slave(thd, &thd->lex->slave_connection,
695709
&thd->lex->mi, thd->lex->slave_thd_opt, mi, true);
@@ -740,6 +754,20 @@ bool stop_slave_cmd(THD *thd)
740754
{
741755
mi= msr_map.get_mi(lex->mi.channel);
742756

757+
/*
758+
If the channel being used is a group replication channel we need to
759+
disable the STOP SLAVE [IO_THREAD] command as group replication does
760+
not support the command.
761+
*/
762+
if (mi && msr_map.is_group_replication_channel_name(mi->get_channel(), true)
763+
&& (!thd->lex->slave_thd_opt || (thd->lex->slave_thd_opt & SLAVE_IO)))
764+
{
765+
my_error(ER_SLAVE_CHANNEL_OPERATION_NOT_ALLOWED, MYF(0),
766+
"STOP SLAVE [IO_THREAD] FOR CHANNEL", mi->get_channel());
767+
mysql_mutex_unlock(&LOCK_msr_map);
768+
DBUG_RETURN(true);
769+
}
770+
743771
if (mi)
744772
res= stop_slave(thd, mi, 1 /*net report */);
745773
else if (strcmp(msr_map.get_default_channel(), lex->mi.channel))
@@ -3860,6 +3888,21 @@ bool show_slave_status_cmd(THD *thd)
38603888
{
38613889
/* when mi is 0, i.e mi doesn't exist, SSS will return an empty set */
38623890
mi= msr_map.get_mi(lex->mi.channel);
3891+
3892+
/*
3893+
If the channel being used is a group replication applier channel we
3894+
need to disable the SHOW SLAVE STATUS commannd as its output is not
3895+
compatible with this command.
3896+
*/
3897+
if (mi && msr_map.is_group_replication_channel_name(mi->get_channel(),
3898+
true))
3899+
{
3900+
my_error(ER_SLAVE_CHANNEL_OPERATION_NOT_ALLOWED, MYF(0),
3901+
"SHOW SLAVE STATUS", mi->get_channel());
3902+
mysql_mutex_unlock(&LOCK_msr_map);
3903+
DBUG_RETURN(true);
3904+
}
3905+
38633906
res= show_slave_status(thd, mi);
38643907
}
38653908

@@ -7710,6 +7753,13 @@ bool queue_event(Master_info* mi,const char* buf, ulong event_len)
77107753
DBUG_ASSERT(lock_count == 0);
77117754
lock_count= 1;
77127755

7756+
if (mi->get_mi_description_event() == NULL)
7757+
{
7758+
sql_print_error("The queue event failed for channel '%s' as its "
7759+
"configuration is invalid.", mi->get_channel());
7760+
goto err;
7761+
}
7762+
77137763
/*
77147764
Simulate an unknown ignorable log event by rewriting a Xid
77157765
log event before queuing it into relay log.
@@ -9985,6 +10035,19 @@ bool reset_slave_cmd(THD *thd)
998510035
else
998610036
{
998710037
mi= msr_map.get_mi(lex->mi.channel);
10038+
/*
10039+
If the channel being used is a group replication channel and
10040+
group_replication is still running we need to disable RESET SLAVE [ALL]
10041+
command.
10042+
*/
10043+
if (mi && msr_map.is_group_replication_channel_name(mi->get_channel(), true)
10044+
&& is_group_replication_running())
10045+
{
10046+
my_error(ER_SLAVE_CHANNEL_OPERATION_NOT_ALLOWED, MYF(0),
10047+
"RESET SLAVE [ALL] FOR CHANNEL", mi->get_channel());
10048+
mysql_mutex_unlock(&LOCK_msr_map);
10049+
DBUG_RETURN(true);
10050+
}
998810051

998910052
if (mi)
999010053
res= reset_slave(thd, mi, thd->lex->reset_slave_info.all);

0 commit comments

Comments
 (0)