Skip to content

Locking support in cluster formation #1258

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 12, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions src/rabbit.erl
Original file line number Diff line number Diff line change
Expand Up @@ -330,11 +330,7 @@ broker_start() ->
ToBeLoaded = Plugins ++ ?APPS,
start_apps(ToBeLoaded),
maybe_sd_notify(),
ok = log_broker_started(rabbit_plugins:strictly_plugins(rabbit_plugins:active())),
%% See rabbitmq/rabbitmq-server#1202 for details.
rabbit_peer_discovery:maybe_inject_randomized_delay(),
rabbit_peer_discovery:maybe_register(),
ok.
ok = log_broker_started(rabbit_plugins:strictly_plugins(rabbit_plugins:active())).

%% Try to send systemd ready notification if it makes sense in the
%% current environment. standard_error is used intentionally in all
Expand Down
46 changes: 41 additions & 5 deletions src/rabbit_mnesia.erl
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@
%% Used internally in rpc calls
-export([node_info/0, remove_node_if_mnesia_running/1]).

-ifdef(TEST).
-compile(export_all).
-export([init_with_lock/3]).
-endif.

-include("rabbit.hrl").

%%----------------------------------------------------------------------------
Expand Down Expand Up @@ -101,26 +106,57 @@ init() ->
rabbit_log:info("Node database directory at ~s is empty. "
"Assuming we need to join an existing cluster or initialise from scratch...~n",
[dir()]),
rabbit_peer_discovery:log_configured_backend(),
init_from_config();
rabbit_peer_discovery:log_configured_backend(),
init_with_lock();
false ->
NodeType = node_type(),
init_db_and_upgrade(cluster_nodes(all), NodeType,
NodeType =:= ram, _Retry = true)
NodeType =:= ram, _Retry = true),
rabbit_peer_discovery:maybe_register()
end,
%% We intuitively expect the global name server to be synced when
%% Mnesia is up. In fact that's not guaranteed to be the case -
%% let's make it so.
ok = rabbit_node_monitor:global_sync(),
ok.

init_with_lock() ->
{Retries, Timeout} = rabbit_peer_discovery:retry_timeout(),
init_with_lock(Retries, Timeout, fun init_from_config/0).

init_with_lock(0, _, InitFromConfig) ->
case rabbit_peer_discovery:lock_acquisition_failure_mode() of
ignore ->
rabbit_log:warning("Cannot acquire a lock during clustering", []),
InitFromConfig(),
rabbit_peer_discovery:maybe_register();
fail ->
exit(cannot_acquire_startup_lock)
end;
init_with_lock(Retries, Timeout, InitFromConfig) ->
case rabbit_peer_discovery:lock() of
not_supported ->
%% See rabbitmq/rabbitmq-server#1202 for details.
rabbit_peer_discovery:maybe_inject_randomized_delay(),
InitFromConfig(),
rabbit_peer_discovery:maybe_register();
{error, _Reason} ->
timer:sleep(Timeout),
init_with_lock(Retries - 1, Timeout, InitFromConfig);
{ok, Data} ->
try
InitFromConfig(),
rabbit_peer_discovery:maybe_register()
after
rabbit_peer_discovery:unlock(Data)
end
end.

init_from_config() ->
FindBadNodeNames = fun
(Name, BadNames) when is_atom(Name) -> BadNames;
(Name, BadNames) -> [Name | BadNames]
end,
%% See rabbitmq/rabbitmq-server#1202 for details.
rabbit_peer_discovery:maybe_inject_randomized_delay(),
{DiscoveredNodes, NodeType} =
case rabbit_peer_discovery:discover_cluster_nodes() of
{ok, {Nodes, Type} = Config}
Expand Down
53 changes: 51 additions & 2 deletions src/rabbit_peer_discovery.erl
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,9 @@
-export([discover_cluster_nodes/0, backend/0, node_type/0,
normalize/1, format_discovered_nodes/1, log_configured_backend/0,
register/0, unregister/0, maybe_register/0, maybe_unregister/0,
maybe_inject_randomized_delay/0]).
-export([append_node_prefix/1, node_prefix/0]).
maybe_inject_randomized_delay/0, lock/0, unlock/1]).
-export([append_node_prefix/1, node_prefix/0, retry_timeout/0,
lock_acquisition_failure_mode/0]).

-define(DEFAULT_BACKEND, rabbit_peer_discovery_classic_config).
%% what node type is used by default for this node when joining
Expand Down Expand Up @@ -60,7 +61,27 @@ node_type() ->
?DEFAULT_NODE_TYPE
end.

-spec retry_timeout() -> {Retries :: integer(), Timeout :: integer()}.

retry_timeout() ->
case application:get_env(rabbit, cluster_formation) of
{ok, Proplist} ->
Retries = proplists:get_value(lock_retry_limit, Proplist, 10),
Timeout = proplists:get_value(lock_retry_timeout, Proplist, 30000),
{Retries, Timeout};
undefined ->
{10, 30000}
end.

-spec lock_acquisition_failure_mode() -> ignore | fail.

lock_acquisition_failure_mode() ->
case application:get_env(rabbit, cluster_formation) of
{ok, Proplist} ->
proplists:get_value(lock_acquisition_failure_mode, Proplist, fail);
undefined ->
fail
end.

-spec log_configured_backend() -> ok.

Expand Down Expand Up @@ -183,6 +204,34 @@ unregister() ->
ok
end.

-spec lock() -> ok | {ok, Data :: term()} | not_supported | {error, Reason :: string()}.

lock() ->
Backend = backend(),
rabbit_log:info("Will try to lock with peer discovery backend ~s", [Backend]),
case Backend:lock(node()) of
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should the backend indicate whether locking is supported, like we do for registration? Should lock/1 and unlock/1 be a part of https://github.com/rabbitmq/rabbitmq-common/blob/rabbitmq-server-1257/src/rabbit_peer_discovery_backend.erl?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There was a missing PR: https://github.com/rabbitmq/rabbitmq-common/pull/204/files.

The lock/1 return value of unsupported is not very consistent with supports_registration/0 but
it's consistent with rabbitmq-autocluster :/. Arguably this is not a show stopper for #1258 and 3.7.0 release plans.

{error, Reason} = Error ->
rabbit_log:error("Failed to lock with peer discovery backend ~s: ~p",
[Backend, Reason]),
Error;
Any ->
Any
end.

-spec unlock(Data :: term()) -> ok | {error, Reason :: string()}.

unlock(Data) ->
Backend = backend(),
rabbit_log:info("Will try to unlock with peer discovery backend ~s", [Backend]),
case Backend:unlock(Data) of
{error, Reason} = Error ->
rabbit_log:error("Failed to unlock with peer discovery backend ~s: ~p, "
"lock data: ~p",
[Backend, Reason, Data]),
Error;
Any ->
Any
end.

%%
%% Implementation
Expand Down
12 changes: 11 additions & 1 deletion src/rabbit_peer_discovery_classic_config.erl
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
-include("rabbit.hrl").

-export([list_nodes/0, supports_registration/0, register/0, unregister/0,
post_registration/0]).
post_registration/0, lock/1, unlock/1]).

%%
%% API
Expand Down Expand Up @@ -54,3 +54,13 @@ unregister() ->

post_registration() ->
ok.

-spec lock(Node :: atom()) -> not_supported.

lock(_Node) ->
not_supported.

-spec unlock(Data :: term()) -> ok.

unlock(_Data) ->
ok.
11 changes: 10 additions & 1 deletion src/rabbit_peer_discovery_dns.erl
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
-include("rabbit.hrl").

-export([list_nodes/0, supports_registration/0, register/0, unregister/0,
post_registration/0]).
post_registration/0, lock/1, unlock/1]).
%% for tests
-export([discover_nodes/2, discover_hostnames/2]).

Expand Down Expand Up @@ -71,6 +71,15 @@ unregister() ->
post_registration() ->
ok.

-spec lock(Node :: atom()) -> not_supported.

lock(_Node) ->
not_supported.

-spec unlock(Data :: term()) -> ok.

unlock(_Data) ->
ok.

%%
%% Implementation
Expand Down
80 changes: 80 additions & 0 deletions test/cluster_formation_locking_SUITE.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
%% The contents of this file are subject to the Mozilla Public License
%% Version 1.1 (the "License"); you may not use this file except in
%% compliance with the License. You may obtain a copy of the License
%% at http://www.mozilla.org/MPL/
%%
%% Software distributed under the License is distributed on an "AS IS"
%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
%% the License for the specific language governing rights and
%% limitations under the License.
%%
%% The Original Code is RabbitMQ.
%%
%% The Initial Developer of the Original Code is GoPivotal, Inc.
%% Copyright (c) 2007-2017 Pivotal Software, Inc. All rights reserved.
%%
-module(cluster_formation_locking_SUITE).

-include_lib("common_test/include/ct.hrl").
-include_lib("eunit/include/eunit.hrl").

-compile(export_all).

all() ->
[
{group, non_parallel_tests}
].

groups() ->
[
{non_parallel_tests, [], [
init_with_lock_exits_after_errors,
init_with_lock_ignore_after_errors,
init_with_lock_not_supported,
init_with_lock_supported
]}
].

init_per_testcase(Testcase, Config) when Testcase == init_with_lock_exits_after_errors;
Testcase == init_with_lock_not_supported;
Testcase == init_with_lock_supported ->
application:set_env(rabbit, cluster_formation,
[{peer_discover_backend, peer_discover_classic_config},
{lock_acquisition_failure_mode, fail}]),
ok = meck:new(rabbit_peer_discovery_classic_config, [passthrough]),
Config;
init_per_testcase(init_with_lock_ignore_after_errors, Config) ->
application:set_env(rabbit, cluster_formation,
[{peer_discover_backend, peer_discover_classic_config},
{lock_acquisition_failure_mode, ignore}]),
ok = meck:new(rabbit_peer_discovery_classic_config, [passthrough]),
Config.

end_per_testcase(_, _) ->
meck:unload(),
application:unset_env(rabbit, cluster_formation).

init_with_lock_exits_after_errors(_Config) ->
meck:expect(rabbit_peer_discovery_classic_config, lock, fun(_) -> {error, "test error"} end),
?assertExit(cannot_acquire_startup_lock, rabbit_mnesia:init_with_lock(2, 10, fun() -> ok end)),
?assert(meck:validate(rabbit_peer_discovery_classic_config)),
passed.

init_with_lock_ignore_after_errors(_Config) ->
meck:expect(rabbit_peer_discovery_classic_config, lock, fun(_) -> {error, "test error"} end),
?assertEqual(ok, rabbit_mnesia:init_with_lock(2, 10, fun() -> ok end)),
?assert(meck:validate(rabbit_peer_discovery_classic_config)),
passed.

init_with_lock_not_supported(_Config) ->
meck:expect(rabbit_peer_discovery_classic_config, lock, fun(_) -> not_supported end),
?assertEqual(ok, rabbit_mnesia:init_with_lock(2, 10, fun() -> ok end)),
?assert(meck:validate(rabbit_peer_discovery_classic_config)),
passed.

init_with_lock_supported(_Config) ->
meck:expect(rabbit_peer_discovery_classic_config, lock, fun(_) -> {ok, data} end),
meck:expect(rabbit_peer_discovery_classic_config, unlock, fun(data) -> ok end),
?assertEqual(ok, rabbit_mnesia:init_with_lock(2, 10, fun() -> ok end)),
?assert(meck:validate(rabbit_peer_discovery_classic_config)),
passed.