Skip to content

Commit cc8ec5d

Browse files
committed
rabbit_peer_discovery: Allow backends to select the node to join themselves
[Why] Before, the backend would always return a list of nodes and the subsystem would select one based on their uptimes, the nodes they are already clustered with, and the readiness of their database. This works well in general but has some limitations. For instance with the Consul backend, the discoverability of nodes depends on when each one registered and in which order. Therefore, the node with the highest uptime might not be the first that registers. In this case, the one that registers first will only discover itself and boot as a standalone node. However, the one with the highest uptime that registered after will discover both nodes. It will then select itself as the node to join because it has the highest uptime. In the end both nodes form distinct clusters. Another example is the Kubernetes backend. The current solution works fine but it could be optimized: the backend knows we always want to join the first node ("$node-0") regardless of the order in which they are started because picking the first node alphabetically is fine. Therefore we want to let the backend selects the node to join if it wants. [How] The `list_nodes()` callback can now return the following term: {ok, {SelectedNode :: node(), NodeType}} If the subsystem sees this return value, it will consider that the returned node is the one to join. It will still query properties because we want to make sure the node's database is ready before joining it. (cherry picked from commit 3147ab7)
1 parent 0c3f4bc commit cc8ec5d

File tree

2 files changed

+35
-10
lines changed

2 files changed

+35
-10
lines changed

deps/rabbit/src/rabbit_peer_discovery.erl

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -186,8 +186,22 @@ sync_desired_cluster(Backend, RetriesLeft, RetryDelay) ->
186186
#{domain => ?RMQLOG_DOMAIN_PEER_DISC}),
187187
ok;
188188
{ok, {DiscoveredNodes, NodeType}} ->
189-
NodesAndProps = query_node_props(DiscoveredNodes),
190-
case can_use_discovered_nodes(DiscoveredNodes, NodesAndProps) of
189+
NodeAlreadySelected = is_atom(DiscoveredNodes),
190+
NodesAndProps = case NodeAlreadySelected of
191+
true ->
192+
?LOG_DEBUG(
193+
"Peer discovery: node '~ts' already "
194+
"selected by backend",
195+
[DiscoveredNodes],
196+
#{domain => ?RMQLOG_DOMAIN_PEER_DISC}),
197+
query_node_props([DiscoveredNodes]);
198+
false ->
199+
query_node_props(DiscoveredNodes)
200+
end,
201+
CanUse = (
202+
NodeAlreadySelected orelse
203+
can_use_discovered_nodes(DiscoveredNodes, NodesAndProps)),
204+
case CanUse of
191205
true ->
192206
case select_node_to_join(NodesAndProps) of
193207
SelectedNode when SelectedNode =/= false ->
@@ -249,8 +263,9 @@ retry_sync_desired_cluster(_Backend, 0, _RetryDelay) ->
249263
ok.
250264

251265
-spec discover_cluster_nodes() -> {ok, Discovery} when
252-
Discovery :: {DiscoveredNodes, NodeType},
266+
Discovery :: {DiscoveredNodes | SelectedNode, NodeType},
253267
DiscoveredNodes :: [node()],
268+
SelectedNode :: node(),
254269
NodeType :: rabbit_types:node_type().
255270
%% @doc Queries the peer discovery backend to discover nodes.
256271
%%
@@ -262,10 +277,11 @@ discover_cluster_nodes() ->
262277

263278
-spec discover_cluster_nodes(Backend) -> Ret when
264279
Backend :: backend(),
265-
Discovery :: {DiscoveredNodes, NodeType},
280+
Ret :: {ok, Discovery} | {error, Reason},
281+
Discovery :: {DiscoveredNodes | SelectedNode, NodeType},
266282
DiscoveredNodes :: [node()],
283+
SelectedNode :: node(),
267284
NodeType :: rabbit_types:node_type(),
268-
Ret :: {ok, Discovery} | {error, Reason},
269285
Reason :: any().
270286
%% @private
271287

@@ -295,7 +311,7 @@ discover_cluster_nodes(Backend) ->
295311

296312
-spec check_discovered_nodes_list_validity(DiscoveredNodes, NodeType) ->
297313
Ret when
298-
DiscoveredNodes :: [node()],
314+
DiscoveredNodes :: [node()] | node(),
299315
NodeType :: rabbit_types:node_type(),
300316
Ret :: ok.
301317
%% @private
@@ -310,6 +326,12 @@ check_discovered_nodes_list_validity(DiscoveredNodes, NodeType)
310326
[] -> ok;
311327
_ -> e({invalid_cluster_node_names, BadNodenames})
312328
end;
329+
check_discovered_nodes_list_validity(SelectedNode, NodeType)
330+
when NodeType =:= disc orelse NodeType =:= disk orelse NodeType =:= ram ->
331+
case is_atom(SelectedNode) of
332+
true -> ok;
333+
false -> e({invalid_cluster_node_names, SelectedNode})
334+
end;
313335
check_discovered_nodes_list_validity(DiscoveredNodes, BadNodeType)
314336
when is_list(DiscoveredNodes) ->
315337
e({invalid_cluster_node_type, BadNodeType}).
@@ -836,7 +858,7 @@ can_use_discovered_nodes(_DiscoveredNodes, []) ->
836858
false.
837859

838860
-spec select_node_to_join(NodesAndProps) -> SelectedNode when
839-
NodesAndProps :: [node_and_props()],
861+
NodesAndProps :: nonempty_list(node_and_props()),
840862
SelectedNode :: node() | false.
841863
%% @doc Selects the node to join among the sorted list of nodes.
842864
%%
@@ -1140,10 +1162,10 @@ unlock(Backend, Data) ->
11401162
{Nodes :: [node()],
11411163
NodeType :: rabbit_types:node_type()} |
11421164
{ok, Nodes :: [node()]} |
1143-
{ok, {Nodes :: [node()],
1165+
{ok, {Nodes :: [node()] | node(),
11441166
NodeType :: rabbit_types:node_type()}} |
11451167
{error, Reason :: string()}) ->
1146-
{ok, {Nodes :: [node()], NodeType :: rabbit_types:node_type()}} |
1168+
{ok, {Nodes :: [node()] | node(), NodeType :: rabbit_types:node_type()}} |
11471169
{error, Reason :: string()}.
11481170

11491171
normalize(Nodes) when is_list(Nodes) ->
@@ -1154,6 +1176,9 @@ normalize({ok, Nodes}) when is_list(Nodes) ->
11541176
{ok, {Nodes, disc}};
11551177
normalize({ok, {Nodes, NodeType}}) when is_list(Nodes) andalso is_atom(NodeType) ->
11561178
{ok, {Nodes, NodeType}};
1179+
normalize({ok, {Node, NodeType}})
1180+
when is_atom(Node) andalso is_atom(NodeType) ->
1181+
{ok, {Node, NodeType}};
11571182
normalize({error, Reason}) ->
11581183
{error, Reason}.
11591184

deps/rabbit_common/src/rabbit_peer_discovery_backend.erl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141

4242
-callback init() -> ok | {error, Reason :: string()}.
4343

44-
-callback list_nodes() -> {ok, {Nodes :: list(), NodeType :: rabbit_types:node_type()}} |
44+
-callback list_nodes() -> {ok, {Nodes :: [node()] | node(), NodeType :: rabbit_types:node_type()}} |
4545
{error, Reason :: string()}.
4646

4747
-callback supports_registration() -> boolean().

0 commit comments

Comments
 (0)