Skip to content

Commit 9c358dd

Browse files
committed
rabbit_peer_discovery: Move peer discovery driving code from rabbit_mnesia
[Why] Peer discovery is not Mnesia-specific and will be used once we introduce Khepri. [How] The whole peer discovery driving code is moved from `rabbit_mnesia` to `rabbit_peer_discovery`. When `rabbit_mnesia` calls that code, it simply passes a callback for the Mnesia-specific cluster expansion code.
1 parent a595128 commit 9c358dd

File tree

4 files changed

+225
-145
lines changed

4 files changed

+225
-145
lines changed

deps/rabbit/src/rabbit_mnesia.erl

Lines changed: 23 additions & 140 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77

88
-module(rabbit_mnesia).
99

10+
-include_lib("kernel/include/logger.hrl").
11+
12+
-include_lib("rabbit_common/include/logging.hrl").
13+
1014
-export([%% Main interface
1115
init/0,
1216
join_cluster/2,
@@ -67,7 +71,6 @@
6771

6872
-ifdef(TEST).
6973
-compile(export_all).
70-
-export([init_with_lock/3]).
7174
-endif.
7275

7376
%%----------------------------------------------------------------------------
@@ -98,7 +101,8 @@ init() ->
98101
[dir()]),
99102
rabbit_peer_discovery:log_configured_backend(),
100103
rabbit_peer_discovery:maybe_init(),
101-
init_with_lock();
104+
rabbit_peer_discovery:maybe_create_cluster(
105+
fun create_cluster_callback/2);
102106
false ->
103107
NodeType = node_type(),
104108
case is_node_type_permitted(NodeType) of
@@ -121,117 +125,24 @@ init() ->
121125
ok = rabbit_node_monitor:global_sync(),
122126
ok.
123127

124-
init_with_lock() ->
125-
{Retries, Timeout} = rabbit_peer_discovery:locking_retry_timeout(),
126-
init_with_lock(Retries, Timeout, fun run_peer_discovery/0).
127-
128-
init_with_lock(0, _, RunPeerDiscovery) ->
129-
case rabbit_peer_discovery:lock_acquisition_failure_mode() of
130-
ignore ->
131-
rabbit_log:warning("Could not acquire a peer discovery lock, out of retries", []),
132-
RunPeerDiscovery(),
133-
rabbit_peer_discovery:maybe_register();
134-
fail ->
135-
exit(cannot_acquire_startup_lock)
136-
end;
137-
init_with_lock(Retries, Timeout, RunPeerDiscovery) ->
138-
LockResult = rabbit_peer_discovery:lock(),
139-
rabbit_log:debug("rabbit_peer_discovery:lock returned ~tp", [LockResult]),
140-
case LockResult of
141-
not_supported ->
142-
RunPeerDiscovery(),
143-
rabbit_peer_discovery:maybe_register();
144-
{ok, Data} ->
145-
try
146-
RunPeerDiscovery(),
147-
rabbit_peer_discovery:maybe_register()
148-
after
149-
rabbit_peer_discovery:unlock(Data)
150-
end;
151-
{error, _Reason} ->
152-
timer:sleep(Timeout),
153-
init_with_lock(Retries - 1, Timeout, RunPeerDiscovery)
154-
end.
155-
156-
-spec run_peer_discovery() -> ok | {[node()], rabbit_db_cluster:node_type()}.
157-
run_peer_discovery() ->
158-
{RetriesLeft, DelayInterval} = rabbit_peer_discovery:discovery_retries(),
159-
run_peer_discovery_with_retries(RetriesLeft, DelayInterval).
160-
161-
-spec run_peer_discovery_with_retries(non_neg_integer(), non_neg_integer()) -> ok | {[node()], rabbit_db_cluster:node_type()}.
162-
run_peer_discovery_with_retries(0, _DelayInterval) ->
128+
create_cluster_callback(none, NodeType) ->
129+
DiscNodes = [node()],
130+
NodeType1 = case is_node_type_permitted(NodeType) of
131+
false -> disc;
132+
true -> NodeType
133+
end,
134+
init_db_and_upgrade(DiscNodes, NodeType1, true, _Retry = true),
135+
rabbit_node_monitor:notify_joined_cluster(),
163136
ok;
164-
run_peer_discovery_with_retries(RetriesLeft, DelayInterval) ->
165-
FindBadNodeNames = fun
166-
(Name, BadNames) when is_atom(Name) -> BadNames;
167-
(Name, BadNames) -> [Name | BadNames]
168-
end,
169-
{DiscoveredNodes0, NodeType} =
170-
case rabbit_peer_discovery:discover_cluster_nodes() of
171-
{error, Reason} ->
172-
RetriesLeft1 = RetriesLeft - 1,
173-
rabbit_log:error("Peer discovery returned an error: ~tp. Will retry after a delay of ~b ms, ~b retries left...",
174-
[Reason, DelayInterval, RetriesLeft1]),
175-
timer:sleep(DelayInterval),
176-
run_peer_discovery_with_retries(RetriesLeft1, DelayInterval);
177-
{ok, {Nodes, Type} = Config}
178-
when is_list(Nodes) andalso (Type == disc orelse Type == disk orelse Type == ram) ->
179-
case lists:foldr(FindBadNodeNames, [], Nodes) of
180-
[] -> Config;
181-
BadNames -> e({invalid_cluster_node_names, BadNames})
182-
end;
183-
{ok, {_, BadType}} when BadType /= disc andalso BadType /= ram ->
184-
e({invalid_cluster_node_type, BadType});
185-
{ok, _} ->
186-
e(invalid_cluster_nodes_conf)
187-
end,
188-
DiscoveredNodes = lists:usort(DiscoveredNodes0),
189-
rabbit_log:info("All discovered existing cluster peers: ~ts",
190-
[rabbit_peer_discovery:format_discovered_nodes(DiscoveredNodes)]),
191-
Peers = rabbit_nodes:nodes_excl_me(DiscoveredNodes),
192-
case Peers of
193-
[] ->
194-
rabbit_log:info("Discovered no peer nodes to cluster with. "
195-
"Some discovery backends can filter nodes out based on a readiness criteria. "
196-
"Enabling debug logging might help troubleshoot."),
197-
init_db_and_upgrade([node()], disc, false, _Retry = true);
198-
_ ->
199-
NodeType1 = case is_node_type_permitted(NodeType) of
200-
false -> disc;
201-
true -> NodeType
202-
end,
203-
rabbit_log:info("Peer nodes we can cluster with: ~ts",
204-
[rabbit_peer_discovery:format_discovered_nodes(Peers)]),
205-
join_discovered_peers(Peers, NodeType1)
206-
end.
207-
208-
%% Attempts to join discovered,
209-
%% reachable and compatible (in terms of Mnesia internal protocol version and such)
210-
%% cluster peers in order.
211-
join_discovered_peers(TryNodes, NodeType) ->
212-
{RetriesLeft, DelayInterval} = rabbit_peer_discovery:discovery_retries(),
213-
join_discovered_peers_with_retries(TryNodes, NodeType, RetriesLeft, DelayInterval).
214-
215-
join_discovered_peers_with_retries(TryNodes, _NodeType, 0, _DelayInterval) ->
216-
rabbit_log:info(
217-
"Could not successfully contact any node of: ~ts (as in Erlang distribution). "
218-
"Starting as a blank standalone node...",
219-
[string:join(lists:map(fun atom_to_list/1, TryNodes), ",")]),
220-
init_db_and_upgrade([node()], disc, false, _Retry = true);
221-
join_discovered_peers_with_retries(TryNodes, NodeType, RetriesLeft, DelayInterval) ->
222-
case find_reachable_peer_to_cluster_with(rabbit_nodes:nodes_excl_me(TryNodes)) of
223-
{ok, Node} ->
224-
rabbit_log:info("Node '~ts' selected for auto-clustering", [Node]),
225-
{ok, {_, DiscNodes, _}} = discover_cluster0(Node),
226-
init_db_and_upgrade(DiscNodes, NodeType, true, _Retry = true),
227-
rabbit_node_monitor:notify_joined_cluster();
228-
none ->
229-
RetriesLeft1 = RetriesLeft - 1,
230-
rabbit_log:info("Trying to join discovered peers failed. Will retry after a delay of ~b ms, ~b retries left...",
231-
[DelayInterval, RetriesLeft1]),
232-
timer:sleep(DelayInterval),
233-
join_discovered_peers_with_retries(TryNodes, NodeType, RetriesLeft1, DelayInterval)
234-
end.
137+
create_cluster_callback(RemoteNode, NodeType) ->
138+
{ok, {_, DiscNodes, _}} = discover_cluster0(RemoteNode),
139+
NodeType1 = case is_node_type_permitted(NodeType) of
140+
false -> disc;
141+
true -> NodeType
142+
end,
143+
init_db_and_upgrade(DiscNodes, NodeType1, true, _Retry = true),
144+
rabbit_node_monitor:notify_joined_cluster(),
145+
ok.
235146

236147
%% Make the node join a cluster. The node will be reset automatically
237148
%% before we actually cluster it. The nodes provided will be used to
@@ -1149,23 +1060,6 @@ is_virgin_node() ->
11491060
List =:= []
11501061
end.
11511062

1152-
find_reachable_peer_to_cluster_with([]) ->
1153-
none;
1154-
find_reachable_peer_to_cluster_with([Node | Nodes]) ->
1155-
Fail = fun (Fmt, Args) ->
1156-
rabbit_log:warning(
1157-
"Could not auto-cluster with node ~ts: " ++ Fmt, [Node | Args]),
1158-
find_reachable_peer_to_cluster_with(Nodes)
1159-
end,
1160-
case rabbit_db_cluster:check_compatibility(Node) of
1161-
ok ->
1162-
{ok, Node};
1163-
{error, {badrpc, _} = Reason} ->
1164-
Fail("~tp", [Reason]);
1165-
Error ->
1166-
Fail("~tp", [Error])
1167-
end.
1168-
11691063
is_only_clustered_disc_node() ->
11701064
node_type() =:= disc andalso is_clustered() andalso
11711065
cluster_nodes(disc) =:= [node()].
@@ -1177,17 +1071,6 @@ are_we_clustered_with(Node) ->
11771071

11781072
e(Tag) -> throw({error, {Tag, error_description(Tag)}}).
11791073

1180-
error_description({invalid_cluster_node_names, BadNames}) ->
1181-
"In the 'cluster_nodes' configuration key, the following node names "
1182-
"are invalid: " ++ lists:flatten(io_lib:format("~tp", [BadNames]));
1183-
error_description({invalid_cluster_node_type, BadType}) ->
1184-
"In the 'cluster_nodes' configuration key, the node type is invalid "
1185-
"(expected 'disc' or 'ram'): " ++
1186-
lists:flatten(io_lib:format("~tp", [BadType]));
1187-
error_description(invalid_cluster_nodes_conf) ->
1188-
"The 'cluster_nodes' configuration key is invalid, it must be of the "
1189-
"form {[Nodes], Type}, where Nodes is a list of node names and "
1190-
"Type is either 'disc' or 'ram'";
11911074
error_description(clustering_only_disc_node) ->
11921075
"You cannot cluster a node if it is the only disc node in its existing "
11931076
" cluster. If new nodes joined while this node was offline, use "

0 commit comments

Comments
 (0)