Skip to content

Commit a68f457

Browse files
kjnilssonmergify[bot]
authored andcommitted
Better handle QQ delete member operation when member already removed
(cherry picked from commit 02b6274)
1 parent 499c087 commit a68f457

File tree

2 files changed

+34
-4
lines changed

2 files changed

+34
-4
lines changed

deps/rabbit/src/rabbit_quorum_queue.erl

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@
128128
-define(START_CLUSTER_RPC_TIMEOUT, 60_000). %% needs to be longer than START_CLUSTER_TIMEOUT
129129
-define(TICK_TIMEOUT, 5000). %% the ra server tick time
130130
-define(DELETE_TIMEOUT, 5000).
131-
-define(ADD_MEMBER_TIMEOUT, 5000).
131+
-define(MEMBER_CHANGE_TIMEOUT, 20_000).
132132
-define(SNAPSHOT_INTERVAL, 8192). %% the ra default is 4096
133133

134134
%%----------- QQ policies ---------------------------------------------------
@@ -1199,7 +1199,7 @@ add_member(Q, Node) ->
11991199
add_member(Q, Node, promotable).
12001200

12011201
add_member(Q, Node, Membership) ->
1202-
add_member(Q, Node, Membership, ?ADD_MEMBER_TIMEOUT).
1202+
add_member(Q, Node, Membership, ?MEMBER_CHANGE_TIMEOUT).
12031203

12041204
add_member(VHost, Name, Node, Timeout) when is_binary(VHost) ->
12051205
%% NOTE needed to pass mixed cluster tests.
@@ -1276,8 +1276,11 @@ delete_member(Q, Node) when ?amqqueue_is_quorum(Q) ->
12761276
%% deleting the last member is not allowed
12771277
{error, last_node};
12781278
Members ->
1279-
case ra:remove_member(Members, ServerId) of
1280-
{ok, _, _Leader} ->
1279+
case ra:remove_member(Members, ServerId, ?MEMBER_CHANGE_TIMEOUT) of
1280+
Res when element(1, Res) == ok orelse
1281+
Res == {error, not_member} ->
1282+
%% if not a member we can still proceed with updating the
1283+
%% mnesia record and clean up server if still running
12811284
Fun = fun(Q1) ->
12821285
update_type_state(
12831286
Q1,

deps/rabbit/test/quorum_queue_SUITE.erl

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ groups() ->
5959
delete_member_queue_not_found,
6060
delete_member,
6161
delete_member_not_a_member,
62+
delete_member_member_already_deleted,
6263
node_removal_is_quorum_critical]
6364
++ memory_tests()},
6465
{cluster_size_3, [], [
@@ -1955,6 +1956,32 @@ delete_member_not_a_member(Config) ->
19551956
rpc:call(Server, rabbit_quorum_queue, delete_member,
19561957
[<<"/">>, QQ, Server])).
19571958

1959+
delete_member_member_already_deleted(Config) ->
1960+
[Server, Server2] = Servers = rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
1961+
NServers = length(Servers),
1962+
Ch = rabbit_ct_client_helpers:open_channel(Config, Server),
1963+
QQ = ?config(queue_name, Config),
1964+
RaName = ra_name(QQ),
1965+
?assertEqual({'queue.declare_ok', QQ, 0, 0},
1966+
declare(Ch, QQ, [{<<"x-queue-type">>, longstr, <<"quorum">>}])),
1967+
?awaitMatch(NServers, count_online_nodes(Server, <<"/">>, QQ), ?DEFAULT_AWAIT),
1968+
ServerId = {RaName, Server},
1969+
ServerId2 = {RaName, Server2},
1970+
%% use are APU directory to simulate situation where the ra:remove_server/2
1971+
%% call timed out but later succeeded
1972+
?assertMatch(ok,
1973+
rpc:call(Server2, ra, leave_and_terminate,
1974+
[quorum_queues, ServerId, ServerId2])),
1975+
1976+
%% idempotent by design
1977+
?assertEqual(ok,
1978+
rpc:call(Server, rabbit_quorum_queue, delete_member,
1979+
[<<"/">>, QQ, Server2])),
1980+
{ok, Q} = rpc:call(Server, rabbit_amqqueue, lookup, [QQ, <<"/">>]),
1981+
#{nodes := Nodes} = amqqueue:get_type_state(Q),
1982+
?assertEqual(1, length(Nodes)),
1983+
ok.
1984+
19581985
delete_member_during_node_down(Config) ->
19591986
[Server, DownServer, Remove] = Servers = rabbit_ct_broker_helpers:get_node_configs(
19601987
Config, nodename),

0 commit comments

Comments
 (0)