Skip to content

Commit bdf1079

Browse files
committed
QQ: use a dedicated function for queue recovery after Ra system restart.
Previously we used the `registered` approach where all Ra servers that have a registered name would be recovered. This could have unintended side effects for queues that e.g. were deleted when not all members of a quorum queueu were running when the queue was deleted. In this case the Ra system would have recovered the members that were not deleted which is not ideal as a dangling member would just sit and loop in pre vote state and a future declaration of the queue may partially fail. Instead we rely on the meta data store for the truth about which members should be restarted after a ra system restart.
1 parent f4b80f4 commit bdf1079

File tree

2 files changed

+26
-1
lines changed

2 files changed

+26
-1
lines changed

deps/rabbit/src/rabbit_quorum_queue.erl

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
handle_event/3]).
1818
-export([is_recoverable/1,
1919
recover/2,
20+
system_recover/1,
2021
stop/1,
2122
start_server/1,
2223
restart_server/1,
@@ -97,6 +98,14 @@
9798
-define(RA_SYSTEM, quorum_queues).
9899
-define(RA_WAL_NAME, ra_log_wal).
99100

101+
-define(INFO(Str, Args),
102+
rabbit_log:info("[~s:~s/~b] " Str,
103+
[?MODULE, ?FUNCTION_NAME, ?FUNCTION_ARITY | Args])).
104+
-define(WARN(Str, Args),
105+
rabbit_log:warning("[~s:~s/~b] " Str,
106+
[?MODULE, ?FUNCTION_NAME, ?FUNCTION_ARITY | Args])).
107+
108+
100109
-define(STATISTICS_KEYS,
101110
[policy,
102111
operator_policy,
@@ -641,6 +650,21 @@ is_recoverable(Q) when ?is_amqqueue(Q) and ?amqqueue_is_quorum(Q) ->
641650
Nodes = get_nodes(Q),
642651
lists:member(Node, Nodes).
643652

653+
system_recover(quorum_queues) ->
654+
case rabbit:is_booted() of
655+
true ->
656+
Queues = rabbit_amqqueue:list_local_quorum_queues(),
657+
?INFO("recovering ~b queues", [length(Queues)]),
658+
{Recovered, Failed} = recover(<<>>, Queues),
659+
?INFO("recovered ~b queues, "
660+
"failed to recover ~b queues",
661+
[length(Recovered), length(Failed)]),
662+
ok;
663+
false ->
664+
?INFO("rabbit not booted, skipping queue recovery", []),
665+
ok
666+
end.
667+
644668
-spec recover(binary(), [amqqueue:amqqueue()]) ->
645669
{[amqqueue:amqqueue()], [amqqueue:amqqueue()]}.
646670
recover(_Vhost, Queues) ->

deps/rabbit/src/rabbit_ra_systems.erl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,8 @@ get_config(quorum_queues = RaSystem) ->
130130
wal_max_entries => WalMaxEntries,
131131
segment_compute_checksums => SegmentChecksums,
132132
compress_mem_tables => CompressMemTables,
133-
server_recovery_strategy => registered};
133+
server_recovery_strategy => {rabbit_quorum_queue,
134+
system_recover, []}};
134135
get_config(coordination = RaSystem) ->
135136
DefaultConfig = get_default_config(),
136137
CoordDataDir = filename:join(

0 commit comments

Comments
 (0)