Skip to content

Commit c059f92

Browse files
committed
Synchronise feature flags before any changes to Mnesia membership
Left as it was, a failure enabling the feature flags leaves the cluster in an inconsistent state where the joined nodes think the joining node is already a member, but the joining node believes its a standalone node. Thus, later join_cluster commands fail with an inconsistent cluster error.
1 parent 39f3257 commit c059f92

File tree

1 file changed

+17
-15
lines changed

1 file changed

+17
-15
lines changed

deps/rabbit/src/rabbit_mnesia.erl

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -570,6 +570,23 @@ dir() -> mnesia:system_info(directory).
570570
init_db(ClusterNodes, NodeType, CheckOtherNodes) ->
571571
NodeIsVirgin = is_virgin_node(),
572572
rabbit_log:debug("Does data directory looks like that of a blank (uninitialised) node? ~tp", [NodeIsVirgin]),
573+
%% We want to synchronize feature flags first before we wait for
574+
%% tables (which is needed to ensure the local view of the tables
575+
%% matches the rest of the cluster). The reason is that some
576+
%% feature flags may add or remove tables. In this case the list
577+
%% of tables returned by `rabbit_table:definitions()' usually
578+
%% depends on the state of feature flags but this state is local.
579+
%%
580+
%% For instance, a feature flag may remove a table (so it's gone
581+
%% from the cluster). If we were to wait for that table locally
582+
%% before synchronizing feature flags, we would wait forever;
583+
%% indeed the feature flag being disabled before sync,
584+
%% `rabbit_table:definitions()' would return the old table.
585+
%%
586+
%% Feature flags need to be synced before any change to Mnesia
587+
%% membership. If enabling feature flags fails, Mnesia could remain
588+
%% in an inconsistent state that prevents later joining the nodes.
589+
ensure_feature_flags_are_in_sync(nodes_excl_me(ClusterNodes), NodeIsVirgin),
573590
Nodes = change_extra_mnesia_nodes(ClusterNodes, CheckOtherNodes),
574591
%% Note that we use `system_info' here and not the cluster status
575592
%% since when we start rabbit for the first time the cluster
@@ -583,29 +600,14 @@ init_db(ClusterNodes, NodeType, CheckOtherNodes) ->
583600
{[], false, disc} ->
584601
%% RAM -> disc, starting from scratch
585602
ok = create_schema(),
586-
ensure_feature_flags_are_in_sync(Nodes, NodeIsVirgin),
587603
ok;
588604
{[], true, disc} ->
589605
%% First disc node up
590606
_ = maybe_force_load(),
591-
ensure_feature_flags_are_in_sync(Nodes, NodeIsVirgin),
592607
ok;
593608
{[_ | _], _, _} ->
594609
%% Subsequent node in cluster, catch up
595610
_ = maybe_force_load(),
596-
%% We want to synchronize feature flags first before we wait for
597-
%% tables (which is needed to ensure the local view of the tables
598-
%% matches the rest of the cluster). The reason is that some
599-
%% feature flags may add or remove tables. In this case the list
600-
%% of tables returned by `rabbit_table:definitions()' usually
601-
%% depends on the state of feature flags but this state is local.
602-
%%
603-
%% For instance, a feature flag may remove a table (so it's gone
604-
%% from the cluster). If we were to wait for that table locally
605-
%% before synchronizing feature flags, we would wait forever;
606-
%% indeed the feature flag being disabled before sync,
607-
%% `rabbit_table:definitions()' would return the old table.
608-
ensure_feature_flags_are_in_sync(Nodes, NodeIsVirgin),
609611
ok = rabbit_table:wait_for_replicated(_Retry = true),
610612
ok = rabbit_table:ensure_local_copies(NodeType)
611613
end,

0 commit comments

Comments
 (0)