Skip to content

Commit 69d2517

Browse files
Merge pull request #1258 from rabbitmq/rabbitmq-server-1257
Locking support in cluster formation
2 parents 681ee86 + c8e3210 commit 69d2517

6 files changed

+194
-14
lines changed

src/rabbit.erl

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -330,11 +330,7 @@ broker_start() ->
330330
ToBeLoaded = Plugins ++ ?APPS,
331331
start_apps(ToBeLoaded),
332332
maybe_sd_notify(),
333-
ok = log_broker_started(rabbit_plugins:strictly_plugins(rabbit_plugins:active())),
334-
%% See rabbitmq/rabbitmq-server#1202 for details.
335-
rabbit_peer_discovery:maybe_inject_randomized_delay(),
336-
rabbit_peer_discovery:maybe_register(),
337-
ok.
333+
ok = log_broker_started(rabbit_plugins:strictly_plugins(rabbit_plugins:active())).
338334

339335
%% Try to send systemd ready notification if it makes sense in the
340336
%% current environment. standard_error is used intentionally in all

src/rabbit_mnesia.erl

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,11 @@
4646
%% Used internally in rpc calls
4747
-export([node_info/0, remove_node_if_mnesia_running/1]).
4848

49+
-ifdef(TEST).
50+
-compile(export_all).
51+
-export([init_with_lock/3]).
52+
-endif.
53+
4954
-include("rabbit.hrl").
5055

5156
%%----------------------------------------------------------------------------
@@ -101,26 +106,57 @@ init() ->
101106
rabbit_log:info("Node database directory at ~s is empty. "
102107
"Assuming we need to join an existing cluster or initialise from scratch...~n",
103108
[dir()]),
104-
rabbit_peer_discovery:log_configured_backend(),
105-
init_from_config();
109+
rabbit_peer_discovery:log_configured_backend(),
110+
init_with_lock();
106111
false ->
107112
NodeType = node_type(),
108113
init_db_and_upgrade(cluster_nodes(all), NodeType,
109-
NodeType =:= ram, _Retry = true)
114+
NodeType =:= ram, _Retry = true),
115+
rabbit_peer_discovery:maybe_register()
110116
end,
111117
%% We intuitively expect the global name server to be synced when
112118
%% Mnesia is up. In fact that's not guaranteed to be the case -
113119
%% let's make it so.
114120
ok = rabbit_node_monitor:global_sync(),
115121
ok.
116122

123+
init_with_lock() ->
124+
{Retries, Timeout} = rabbit_peer_discovery:retry_timeout(),
125+
init_with_lock(Retries, Timeout, fun init_from_config/0).
126+
127+
init_with_lock(0, _, InitFromConfig) ->
128+
case rabbit_peer_discovery:lock_acquisition_failure_mode() of
129+
ignore ->
130+
rabbit_log:warning("Cannot acquire a lock during clustering", []),
131+
InitFromConfig(),
132+
rabbit_peer_discovery:maybe_register();
133+
fail ->
134+
exit(cannot_acquire_startup_lock)
135+
end;
136+
init_with_lock(Retries, Timeout, InitFromConfig) ->
137+
case rabbit_peer_discovery:lock() of
138+
not_supported ->
139+
%% See rabbitmq/rabbitmq-server#1202 for details.
140+
rabbit_peer_discovery:maybe_inject_randomized_delay(),
141+
InitFromConfig(),
142+
rabbit_peer_discovery:maybe_register();
143+
{error, _Reason} ->
144+
timer:sleep(Timeout),
145+
init_with_lock(Retries - 1, Timeout, InitFromConfig);
146+
{ok, Data} ->
147+
try
148+
InitFromConfig(),
149+
rabbit_peer_discovery:maybe_register()
150+
after
151+
rabbit_peer_discovery:unlock(Data)
152+
end
153+
end.
154+
117155
init_from_config() ->
118156
FindBadNodeNames = fun
119157
(Name, BadNames) when is_atom(Name) -> BadNames;
120158
(Name, BadNames) -> [Name | BadNames]
121159
end,
122-
%% See rabbitmq/rabbitmq-server#1202 for details.
123-
rabbit_peer_discovery:maybe_inject_randomized_delay(),
124160
{DiscoveredNodes, NodeType} =
125161
case rabbit_peer_discovery:discover_cluster_nodes() of
126162
{ok, {Nodes, Type} = Config}

src/rabbit_peer_discovery.erl

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,9 @@
2323
-export([discover_cluster_nodes/0, backend/0, node_type/0,
2424
normalize/1, format_discovered_nodes/1, log_configured_backend/0,
2525
register/0, unregister/0, maybe_register/0, maybe_unregister/0,
26-
maybe_inject_randomized_delay/0]).
27-
-export([append_node_prefix/1, node_prefix/0]).
26+
maybe_inject_randomized_delay/0, lock/0, unlock/1]).
27+
-export([append_node_prefix/1, node_prefix/0, retry_timeout/0,
28+
lock_acquisition_failure_mode/0]).
2829

2930
-define(DEFAULT_BACKEND, rabbit_peer_discovery_classic_config).
3031
%% what node type is used by default for this node when joining
@@ -60,7 +61,27 @@ node_type() ->
6061
?DEFAULT_NODE_TYPE
6162
end.
6263

64+
-spec retry_timeout() -> {Retries :: integer(), Timeout :: integer()}.
6365

66+
retry_timeout() ->
67+
case application:get_env(rabbit, cluster_formation) of
68+
{ok, Proplist} ->
69+
Retries = proplists:get_value(lock_retry_limit, Proplist, 10),
70+
Timeout = proplists:get_value(lock_retry_timeout, Proplist, 30000),
71+
{Retries, Timeout};
72+
undefined ->
73+
{10, 30000}
74+
end.
75+
76+
-spec lock_acquisition_failure_mode() -> ignore | fail.
77+
78+
lock_acquisition_failure_mode() ->
79+
case application:get_env(rabbit, cluster_formation) of
80+
{ok, Proplist} ->
81+
proplists:get_value(lock_acquisition_failure_mode, Proplist, fail);
82+
undefined ->
83+
fail
84+
end.
6485

6586
-spec log_configured_backend() -> ok.
6687

@@ -183,6 +204,34 @@ unregister() ->
183204
ok
184205
end.
185206

207+
-spec lock() -> ok | {ok, Data :: term()} | not_supported | {error, Reason :: string()}.
208+
209+
lock() ->
210+
Backend = backend(),
211+
rabbit_log:info("Will try to lock with peer discovery backend ~s", [Backend]),
212+
case Backend:lock(node()) of
213+
{error, Reason} = Error ->
214+
rabbit_log:error("Failed to lock with peer discovery backend ~s: ~p",
215+
[Backend, Reason]),
216+
Error;
217+
Any ->
218+
Any
219+
end.
220+
221+
-spec unlock(Data :: term()) -> ok | {error, Reason :: string()}.
222+
223+
unlock(Data) ->
224+
Backend = backend(),
225+
rabbit_log:info("Will try to unlock with peer discovery backend ~s", [Backend]),
226+
case Backend:unlock(Data) of
227+
{error, Reason} = Error ->
228+
rabbit_log:error("Failed to unlock with peer discovery backend ~s: ~p, "
229+
"lock data: ~p",
230+
[Backend, Reason, Data]),
231+
Error;
232+
Any ->
233+
Any
234+
end.
186235

187236
%%
188237
%% Implementation

src/rabbit_peer_discovery_classic_config.erl

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
-include("rabbit.hrl").
2121

2222
-export([list_nodes/0, supports_registration/0, register/0, unregister/0,
23-
post_registration/0]).
23+
post_registration/0, lock/1, unlock/1]).
2424

2525
%%
2626
%% API
@@ -54,3 +54,13 @@ unregister() ->
5454

5555
post_registration() ->
5656
ok.
57+
58+
-spec lock(Node :: atom()) -> not_supported.
59+
60+
lock(_Node) ->
61+
not_supported.
62+
63+
-spec unlock(Data :: term()) -> ok.
64+
65+
unlock(_Data) ->
66+
ok.

src/rabbit_peer_discovery_dns.erl

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
-include("rabbit.hrl").
2121

2222
-export([list_nodes/0, supports_registration/0, register/0, unregister/0,
23-
post_registration/0]).
23+
post_registration/0, lock/1, unlock/1]).
2424
%% for tests
2525
-export([discover_nodes/2, discover_hostnames/2]).
2626

@@ -71,6 +71,15 @@ unregister() ->
7171
post_registration() ->
7272
ok.
7373

74+
-spec lock(Node :: atom()) -> not_supported.
75+
76+
lock(_Node) ->
77+
not_supported.
78+
79+
-spec unlock(Data :: term()) -> ok.
80+
81+
unlock(_Data) ->
82+
ok.
7483

7584
%%
7685
%% Implementation
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
%% The contents of this file are subject to the Mozilla Public License
2+
%% Version 1.1 (the "License"); you may not use this file except in
3+
%% compliance with the License. You may obtain a copy of the License
4+
%% at http://www.mozilla.org/MPL/
5+
%%
6+
%% Software distributed under the License is distributed on an "AS IS"
7+
%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
8+
%% the License for the specific language governing rights and
9+
%% limitations under the License.
10+
%%
11+
%% The Original Code is RabbitMQ.
12+
%%
13+
%% The Initial Developer of the Original Code is GoPivotal, Inc.
14+
%% Copyright (c) 2007-2017 Pivotal Software, Inc. All rights reserved.
15+
%%
16+
-module(cluster_formation_locking_SUITE).
17+
18+
-include_lib("common_test/include/ct.hrl").
19+
-include_lib("eunit/include/eunit.hrl").
20+
21+
-compile(export_all).
22+
23+
all() ->
24+
[
25+
{group, non_parallel_tests}
26+
].
27+
28+
groups() ->
29+
[
30+
{non_parallel_tests, [], [
31+
init_with_lock_exits_after_errors,
32+
init_with_lock_ignore_after_errors,
33+
init_with_lock_not_supported,
34+
init_with_lock_supported
35+
]}
36+
].
37+
38+
init_per_testcase(Testcase, Config) when Testcase == init_with_lock_exits_after_errors;
39+
Testcase == init_with_lock_not_supported;
40+
Testcase == init_with_lock_supported ->
41+
application:set_env(rabbit, cluster_formation,
42+
[{peer_discover_backend, peer_discover_classic_config},
43+
{lock_acquisition_failure_mode, fail}]),
44+
ok = meck:new(rabbit_peer_discovery_classic_config, [passthrough]),
45+
Config;
46+
init_per_testcase(init_with_lock_ignore_after_errors, Config) ->
47+
application:set_env(rabbit, cluster_formation,
48+
[{peer_discover_backend, peer_discover_classic_config},
49+
{lock_acquisition_failure_mode, ignore}]),
50+
ok = meck:new(rabbit_peer_discovery_classic_config, [passthrough]),
51+
Config.
52+
53+
end_per_testcase(_, _) ->
54+
meck:unload(),
55+
application:unset_env(rabbit, cluster_formation).
56+
57+
init_with_lock_exits_after_errors(_Config) ->
58+
meck:expect(rabbit_peer_discovery_classic_config, lock, fun(_) -> {error, "test error"} end),
59+
?assertExit(cannot_acquire_startup_lock, rabbit_mnesia:init_with_lock(2, 10, fun() -> ok end)),
60+
?assert(meck:validate(rabbit_peer_discovery_classic_config)),
61+
passed.
62+
63+
init_with_lock_ignore_after_errors(_Config) ->
64+
meck:expect(rabbit_peer_discovery_classic_config, lock, fun(_) -> {error, "test error"} end),
65+
?assertEqual(ok, rabbit_mnesia:init_with_lock(2, 10, fun() -> ok end)),
66+
?assert(meck:validate(rabbit_peer_discovery_classic_config)),
67+
passed.
68+
69+
init_with_lock_not_supported(_Config) ->
70+
meck:expect(rabbit_peer_discovery_classic_config, lock, fun(_) -> not_supported end),
71+
?assertEqual(ok, rabbit_mnesia:init_with_lock(2, 10, fun() -> ok end)),
72+
?assert(meck:validate(rabbit_peer_discovery_classic_config)),
73+
passed.
74+
75+
init_with_lock_supported(_Config) ->
76+
meck:expect(rabbit_peer_discovery_classic_config, lock, fun(_) -> {ok, data} end),
77+
meck:expect(rabbit_peer_discovery_classic_config, unlock, fun(data) -> ok end),
78+
?assertEqual(ok, rabbit_mnesia:init_with_lock(2, 10, fun() -> ok end)),
79+
?assert(meck:validate(rabbit_peer_discovery_classic_config)),
80+
passed.

0 commit comments

Comments
 (0)