Skip to content

Commit bc68570

Browse files
kjnilssonmergify[bot]
authored andcommitted
QQ: refactor and improve leader detection code.
The leader returned in rabbit_quorum_queue:info/2 only ever queried the pid field from the queue record when more up to date info could have been available in the ra_leaderboard table. (cherry picked from commit e24bd06)
1 parent 08f2994 commit bc68570

File tree

2 files changed

+39
-11
lines changed

2 files changed

+39
-11
lines changed

deps/rabbit/src/rabbit_quorum_queue.erl

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ is_compatible(_, _, _) ->
180180
init(Q) when ?is_amqqueue(Q) ->
181181
{ok, SoftLimit} = application:get_env(rabbit, quorum_commands_soft_limit),
182182
{Name, _} = MaybeLeader = amqqueue:get_pid(Q),
183-
Leader = case ra_leaderboard:lookup_leader(Name) of
183+
Leader = case find_leader(Q) of
184184
undefined ->
185185
%% leader from queue record will have to suffice
186186
MaybeLeader;
@@ -1663,10 +1663,16 @@ open_files(Name) ->
16631663
end.
16641664

16651665
leader(Q) when ?is_amqqueue(Q) ->
1666-
{Name, Leader} = amqqueue:get_pid(Q),
1667-
case is_process_alive(Name, Leader) of
1668-
true -> Leader;
1669-
false -> ''
1666+
case find_leader(Q) of
1667+
undefined ->
1668+
'';
1669+
{Name, LeaderNode} ->
1670+
case is_process_alive(Name, LeaderNode) of
1671+
true ->
1672+
LeaderNode;
1673+
false ->
1674+
''
1675+
end
16701676
end.
16711677

16721678
peek(Vhost, Queue, Pos) ->
@@ -1742,12 +1748,6 @@ format(Q, Ctx) when ?is_amqqueue(Q) ->
17421748
{leader, LeaderNode},
17431749
{online, Online}].
17441750

1745-
is_process_alive(Name, Node) ->
1746-
%% don't attempt rpc if node is not already connected
1747-
%% as this function is used for metrics and stats and the additional
1748-
%% latency isn't warranted
1749-
erlang:is_pid(erpc_call(Node, erlang, whereis, [Name], ?RPC_TIMEOUT)).
1750-
17511751
-spec quorum_messages(rabbit_amqqueue:name()) -> non_neg_integer().
17521752

17531753
quorum_messages(QName) ->
@@ -1930,3 +1930,30 @@ wait_for_projections(Node, QName, N) ->
19301930
timer:sleep(100),
19311931
wait_for_projections(Node, QName, N - 1)
19321932
end.
1933+
1934+
find_leader(Q) when ?is_amqqueue(Q) ->
1935+
%% the get_pid field in the queue record is updated async after a leader
1936+
%% change, so is likely to be the more stale than the leaderboard
1937+
{Name, _Node} = MaybeLeader = amqqueue:get_pid(Q),
1938+
Leaders = case ra_leaderboard:lookup_leader(Name) of
1939+
undefined ->
1940+
%% leader from queue record will have to suffice
1941+
[MaybeLeader];
1942+
LikelyLeader ->
1943+
[LikelyLeader, MaybeLeader]
1944+
end,
1945+
Nodes = [node() | nodes()],
1946+
case lists:search(fun ({_Nm, Nd}) ->
1947+
lists:member(Nd, Nodes)
1948+
end, Leaders) of
1949+
{value, Leader} ->
1950+
Leader;
1951+
false ->
1952+
undefined
1953+
end.
1954+
1955+
is_process_alive(Name, Node) ->
1956+
%% don't attempt rpc if node is not already connected
1957+
%% as this function is used for metrics and stats and the additional
1958+
%% latency isn't warranted
1959+
erlang:is_pid(erpc_call(Node, erlang, whereis, [Name], ?RPC_TIMEOUT)).

deps/rabbit/test/cli_forget_cluster_node_SUITE.erl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
-include_lib("eunit/include/eunit.hrl").
1313
-include_lib("rabbitmq_ct_helpers/include/rabbit_assert.hrl").
1414

15+
-compile(nowarn_export_all).
1516
-compile(export_all).
1617

1718
-import(clustering_utils, [

0 commit comments

Comments
 (0)