Skip to content

Commit 690b35d

Browse files
BUG#25460324 - HAVING AN UNRESOLVABLE HOSTNAME IN GROUP_REPL SHOULD NOT BLOCK GROUP REPLICATION
Description --------------- Having an unresolvable hostname in group_repl should not block group replication from starting mysql> set global group_replication_group_seeds='gr-1:24901,gr-2:24901,gr-3:24901,invalid_hostna me:24901'; Query OK, 0 rows affected (0.00 sec) mysql> start group_replication; 2017-01-24T19:22:03.173670Z 5 [Note] Plugin group_replication reported: 'Group communication SSL configuration: group_replication_ssl_mode: "DISABLED"' 2017-01-24T19:22:03.173775Z 5 [Note] Plugin group_replication reported: '[GCS] Added automatically IP ranges 10.0.2.15/24,127.0.0.1/8,192.168.56.3/24 to the whitelist' 2017-01-24T19:22:03.202195Z 5 [ERROR] Plugin group_replication reported: '[GCS] Peer address "invalid_hostname:24901" is not valid.' 2017-01-24T19:22:03.202238Z 5 [ERROR] Plugin group_replication reported: 'Unable to initialize the group communication engine' 2017-01-24T19:22:03.202243Z 5 [ERROR] Plugin group_replication reported: 'Error on group communication engine initialization' 2017-01-24T19:22:03.202289Z 5 [Note] Plugin group_replication reported: 'Requesting to leave the group despite of not being a member' 2017-01-24T19:22:03.202293Z 5 [ERROR] Plugin group_replication reported: 'Error calling group communication interfaces while trying to leave the group' ERROR 3096 (HY000): The START GROUP_REPLICATION command failed as there was an error when initializing the group communication layer. Analysis -------------- Actually, when an hostname is unresolvable, GR keeps on rotating until it reaches a live seed. The problem here resides on the peer addresses validation where a all-or-nothing policy is enforced. If one address in the list is invalid from IP/Address rules point of view, the whole list is deemed invalid Suggested Fix ------------- Cleanup the seed list, discarding invalid addresses and continue only with the valid addresses. GR will still error out if none of the provided peer addresses is valid. Also correct the fact that we don't error out if we have invalid addresses in force_members via GR.
1 parent ac4aa2e commit 690b35d

File tree

8 files changed

+145
-38
lines changed

8 files changed

+145
-38
lines changed

rapid/plugin/group_replication/libmysqlgcs/src/bindings/xcom/gcs_xcom_group_management.cc

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
1+
/* Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
22
33
This program is free software; you can redistribute it and/or modify
44
it under the terms of the GNU General Public License as published by
@@ -50,11 +50,33 @@ modify_configuration(const Gcs_interface_parameters& reconfigured_group)
5050
return GCS_NOK;
5151
}
5252

53-
std::vector<std::string> processed_peers;
53+
std::vector<std::string> processed_peers, invalid_processed_peers;
5454
Gcs_xcom_utils::process_peer_nodes(peer_nodes_str,
5555
processed_peers);
56+
Gcs_xcom_utils::validate_peer_nodes(processed_peers,
57+
invalid_processed_peers);
5658

57-
if(processed_peers.size() == 0)
59+
if(!invalid_processed_peers.empty())
60+
{
61+
std::vector<std::string>::iterator invalid_processed_peers_it;
62+
for(invalid_processed_peers_it= invalid_processed_peers.begin();
63+
invalid_processed_peers_it != invalid_processed_peers.end();
64+
++invalid_processed_peers_it)
65+
{
66+
MYSQL_GCS_LOG_WARN("Peer address \"" <<
67+
(*invalid_processed_peers_it).c_str()
68+
<< "\" is not valid.");
69+
}
70+
71+
MYSQL_GCS_LOG_ERROR(
72+
"The peers list contains invalid addresses.Please provide a list with " <<
73+
"only valid addresses."
74+
)
75+
76+
return GCS_NOK;
77+
}
78+
79+
if(processed_peers.empty() && invalid_processed_peers.empty())
5880
{
5981
MYSQL_GCS_LOG_ERROR(
6082
"The peers list to reconfigure the group was empty."

rapid/plugin/group_replication/libmysqlgcs/src/bindings/xcom/gcs_xcom_interface.cc

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
1+
/* Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
22
33
This program is free software; you can redistribute it and/or modify
44
it under the terms of the GNU General Public License as published by
@@ -877,9 +877,12 @@ void Gcs_xcom_interface::initialize_peer_nodes(const std::string *peer_nodes)
877877
{
878878

879879
MYSQL_GCS_LOG_DEBUG("Initializing peers")
880-
std::vector<std::string> processed_peers;
880+
std::vector<std::string> processed_peers, invalid_processed_peers;
881881
Gcs_xcom_utils::process_peer_nodes(peer_nodes,
882882
processed_peers);
883+
Gcs_xcom_utils::validate_peer_nodes(processed_peers,
884+
invalid_processed_peers);
885+
883886
std::vector<std::string>::iterator processed_peers_it;
884887
for(processed_peers_it= processed_peers.begin();
885888
processed_peers_it != processed_peers.end();

rapid/plugin/group_replication/libmysqlgcs/src/bindings/xcom/gcs_xcom_utils.cc

Lines changed: 45 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
1+
/* Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
22
33
This program is free software; you can redistribute it and/or modify
44
it under the terms of the GNU General Public License as published by
@@ -94,6 +94,27 @@ process_peer_nodes(const std::string *peer_nodes,
9494
}
9595
}
9696

97+
void
98+
Gcs_xcom_utils::
99+
validate_peer_nodes(std::vector<std::string> &peers,
100+
std::vector<std::string> &invalid_peers)
101+
{
102+
std::vector<std::string>::iterator it;
103+
for(it= peers.begin(); it != peers.end();)
104+
{
105+
std::string server_and_port= *it;
106+
if (!is_valid_hostname(server_and_port))
107+
{
108+
invalid_peers.push_back(server_and_port);
109+
it= peers.erase(it);
110+
}
111+
else
112+
{
113+
++it;
114+
}
115+
}
116+
}
117+
97118
uint32_t
98119
Gcs_xcom_utils::mhash(unsigned char *buf, size_t length)
99120
{
@@ -1075,19 +1096,34 @@ is_parameters_syntax_correct(const Gcs_interface_parameters &interface_params)
10751096
Parse and validate hostname and ports.
10761097
*/
10771098
std::vector<std::string> hostnames_and_ports;
1099+
std::vector<std::string> invalid_hostnames_and_ports;
10781100
Gcs_xcom_utils::process_peer_nodes(peer_nodes_str, hostnames_and_ports);
1079-
std::vector<std::string>::iterator it;
1080-
for(it= hostnames_and_ports.begin(); it != hostnames_and_ports.end(); ++it)
1101+
Gcs_xcom_utils::validate_peer_nodes(hostnames_and_ports,
1102+
invalid_hostnames_and_ports);
1103+
1104+
if(!invalid_hostnames_and_ports.empty())
10811105
{
1082-
std::string server_and_port= *it;
1083-
if (!is_valid_hostname(server_and_port))
1106+
std::vector<std::string>::iterator invalid_hostnames_and_ports_it;
1107+
for(invalid_hostnames_and_ports_it= invalid_hostnames_and_ports.begin();
1108+
invalid_hostnames_and_ports_it != invalid_hostnames_and_ports.end();
1109+
++invalid_hostnames_and_ports_it)
10841110
{
1085-
MYSQL_GCS_LOG_ERROR("Peer address \"" << server_and_port << "\" is" <<
1086-
" not valid.")
1087-
error= GCS_NOK;
1088-
goto end;
1111+
MYSQL_GCS_LOG_WARN("Peer address \"" <<
1112+
(*invalid_hostnames_and_ports_it).c_str()
1113+
<< "\" is not valid.");
10891114
}
10901115
}
1116+
1117+
/*
1118+
This means that none of the provided hosts is valid and that
1119+
hostnames_and_ports had some sort of value
1120+
*/
1121+
if(!invalid_hostnames_and_ports.empty() && hostnames_and_ports.empty())
1122+
{
1123+
MYSQL_GCS_LOG_ERROR("None of the provided peer address is valid.");
1124+
error= GCS_NOK;
1125+
goto end;
1126+
}
10911127
}
10921128

10931129
// local peer address

rapid/plugin/group_replication/libmysqlgcs/src/bindings/xcom/gcs_xcom_utils.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
1+
/* Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
22
33
This program is free software; you can redistribute it and/or modify
44
it under the terms of the GNU General Public License as published by
@@ -88,6 +88,18 @@ class Gcs_xcom_utils
8888
void process_peer_nodes(const std::string *peer_nodes,
8989
std::vector<std::string> &processed_peers);
9090

91+
/**
92+
Validates peer nodes according with IP/Address rules enforced by
93+
is_valid_hostname function
94+
95+
@param [in,out] peers input list of peer nodes. It will be cleansed of
96+
invalid peers
97+
@param [in,out] invalid_peers This list will contain all invalid peers.
98+
*/
99+
static
100+
void validate_peer_nodes(std::vector<std::string> &peers,
101+
std::vector<std::string> &invalid_peers);
102+
91103
/**
92104
Simple multiplicative hash.
93105

rapid/plugin/group_replication/tests/mtr/r/gr_communication_configuration.result

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ call mtr.add_suppression("Error calling group communication interfaces while try
1313
call mtr.add_suppression("\\[GCS\\] Unable to join the group: peers not configured. ");
1414
call mtr.add_suppression("\\[GCS\\] Timeout while waiting for the group communication engine to exit!");
1515
call mtr.add_suppression("\\[GCS\\] The member has failed to gracefully leave the group.");
16+
call mtr.add_suppression("\\[GCS\\] None of the provided peer address is valid.");
1617
call mtr.add_suppression("\\[GCS\\] read failed");
1718
SET SESSION sql_log_bin= 1;
1819
[connection server2]
@@ -24,6 +25,7 @@ call mtr.add_suppression("Error calling group communication interfaces while try
2425
call mtr.add_suppression("\\[GCS\\] The group_name, peer_nodes, local_node or bootstrap_group parameters were not specified.");
2526
call mtr.add_suppression("\\[GCS\\] Timeout while waiting for the group communication engine to exit!");
2627
call mtr.add_suppression("\\[GCS\\] The member has failed to gracefully leave the group.");
28+
call mtr.add_suppression("\\[GCS\\] None of the provided peer address is valid.");
2729
call mtr.add_suppression("\\[GCS\\] read failed");
2830
SET SESSION sql_log_bin= 1;
2931
[connection server1]
@@ -117,15 +119,15 @@ START GROUP_REPLICATION;
117119
ERROR HY000: The START GROUP_REPLICATION command failed as there was an error when initializing the group communication layer.
118120
#######################################################
119121
# 11. Try start Group Replication on server2 with valid
120-
# local_address and group_seeds, start must
122+
# local_address and some invalid group_seeds, start must
121123
# succeed.
122124
SET GLOBAL group_replication_bootstrap_group= 0;
123125
SET GLOBAL group_replication_local_address= @local_address;
124-
SET GLOBAL group_replication_group_seeds= @group_seeds;
125126
CHANGE MASTER TO MASTER_USER='root', MASTER_PASSWORD='' FOR CHANNEL 'group_replication_recovery';
126127
#######################################################
127128
# 12. We must have a group with two members.
128129
[connection server1]
129130
[connection server2]
130131
#######################################################
131132
# 13. Cleanup.
133+
SET GLOBAL group_replication_group_seeds= @group_seeds;

rapid/plugin/group_replication/tests/mtr/r/gr_force_peer_addresses_option.result

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ SET SESSION sql_log_bin= 1;
1212
[connection server1]
1313
SET SESSION sql_log_bin= 0;
1414
call mtr.add_suppression("Member is OFFLINE, it is not possible to force a new group membership");
15+
call mtr.add_suppression("\\[GCS\\] Peer address .* is not valid.");
16+
call mtr.add_suppression("\\[GCS\\] The peers list contains invalid addresses.Please provide a list with only valid addresses.");
17+
call mtr.add_suppression("Error setting group_replication_force_members value .* on group communication interfaces");
1518
SET SESSION sql_log_bin= 1;
1619

1720
############################################################
@@ -37,7 +40,14 @@ SET GLOBAL group_replication_force_members= "";
3740
include/assert.inc [group_replication_force_members is empty]
3841

3942
############################################################
40-
# 4. Set empty string value to
43+
# 4. Set string value to group_replication_force_members on a ONLINE
44+
# member with invalid IP addresses.
45+
SET GLOBAL group_replication_force_members= "256.256.256.777:1234";
46+
ERROR 42000: Variable 'group_replication_force_members' can't be set to the value of '256.256.256.777:1234'
47+
include/assert.inc [group_replication_force_members is empty]
48+
49+
############################################################
50+
# 5. Set empty string value to
4151
# group_replication_force_members on a RECOVERING
4252
# member.
4353
STOP SLAVE SQL_THREAD FOR CHANNEL "group_replication_applier";
@@ -47,7 +57,7 @@ SET GLOBAL group_replication_force_members= "";
4757
include/assert.inc [group_replication_force_members is empty]
4858

4959
############################################################
50-
# 5. Set group_replication_force_members on a RECOVERING
60+
# 6. Set group_replication_force_members on a RECOVERING
5161
# member.
5262
include/gr_wait_for_member_state.inc
5363
SET GLOBAL group_replication_force_members= "127.0.0.1:10000";
@@ -58,7 +68,7 @@ START SLAVE SQL_THREAD FOR CHANNEL "group_replication_applier";
5868
include/gr_wait_for_member_state.inc
5969

6070
############################################################
61-
# 6. Restart member with group_replication_force_members
71+
# 7. Restart member with group_replication_force_members
6272
# set. Group Replication start will error out.
6373
# 2 members.
6474
[connection server2]
@@ -67,7 +77,7 @@ include/rpl_reconnect.inc
6777
include/assert.inc [Member 2 is OFFLINE]
6878

6979
############################################################
70-
# 7. Start Group Replication while
80+
# 8. Start Group Replication while
7181
# group_replication_force_members is set. Start
7282
# will error out.
7383
include/assert.inc [group_replication_force_members must be 127.0.0.1:10000]
@@ -76,7 +86,7 @@ ERROR HY000: The server is not configured properly to be an active member of the
7686
include/assert.inc [Member 2 is OFFLINE]
7787

7888
############################################################
79-
# 8. Clear group_replication_force_members and start
89+
# 9. Clear group_replication_force_members and start
8090
# Group Replication. Group will have the 2 members.
8191
SET GLOBAL group_replication_force_members= "";
8292
include/assert.inc [group_replication_force_members is empty]
@@ -92,5 +102,5 @@ include/assert.inc [Server 2 must be present on group members]
92102
include/assert.inc [group_replication_force_members must be empty]
93103

94104
############################################################
95-
# 9. Clean up.
105+
# 10. Clean up.
96106
include/group_replication_end.inc

rapid/plugin/group_replication/tests/mtr/t/gr_communication_configuration.test

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ call mtr.add_suppression("Error calling group communication interfaces while try
4646
call mtr.add_suppression("\\[GCS\\] Unable to join the group: peers not configured. ");
4747
call mtr.add_suppression("\\[GCS\\] Timeout while waiting for the group communication engine to exit!");
4848
call mtr.add_suppression("\\[GCS\\] The member has failed to gracefully leave the group.");
49+
call mtr.add_suppression("\\[GCS\\] None of the provided peer address is valid.");
4950
call mtr.add_suppression("\\[GCS\\] read failed");
5051
SET SESSION sql_log_bin= 1;
5152
--let $rpl_connection_name= server2
@@ -58,6 +59,7 @@ call mtr.add_suppression("Error calling group communication interfaces while try
5859
call mtr.add_suppression("\\[GCS\\] The group_name, peer_nodes, local_node or bootstrap_group parameters were not specified.");
5960
call mtr.add_suppression("\\[GCS\\] Timeout while waiting for the group communication engine to exit!");
6061
call mtr.add_suppression("\\[GCS\\] The member has failed to gracefully leave the group.");
62+
call mtr.add_suppression("\\[GCS\\] None of the provided peer address is valid.");
6163
call mtr.add_suppression("\\[GCS\\] read failed");
6264
SET SESSION sql_log_bin= 1;
6365

@@ -243,11 +245,15 @@ START GROUP_REPLICATION;
243245

244246
--echo #######################################################
245247
--echo # 11. Try start Group Replication on server2 with valid
246-
--echo # local_address and group_seeds, start must
248+
--echo # local_address and some invalid group_seeds, start must
247249
--echo # succeed.
248250
SET GLOBAL group_replication_bootstrap_group= 0;
249251
SET GLOBAL group_replication_local_address= @local_address;
250-
SET GLOBAL group_replication_group_seeds= @group_seeds;
252+
253+
--let $invalid_with_valid= `SELECT CONCAT(@group_seeds, ',', 'ola:1234')`
254+
--disable_query_log
255+
--eval SET GLOBAL group_replication_group_seeds= "$invalid_with_valid"
256+
--enable_query_log
251257

252258
# Need to execute change master again as the clear configuration above resets
253259
# the recovery channel created in the beginning.
@@ -269,7 +275,7 @@ CHANGE MASTER TO MASTER_USER='root', MASTER_PASSWORD='' FOR CHANNEL 'group_repli
269275
--let $group_replication_number_of_members= 2
270276
--source ../inc/gr_wait_for_number_of_members.inc
271277

272-
273278
--echo #######################################################
274279
--echo # 13. Cleanup.
280+
SET GLOBAL group_replication_group_seeds= @group_seeds;
275281
--source ../inc/group_replication_end.inc

0 commit comments

Comments
 (0)