Skip to content

Commit a1ab343

Browse files
committed
rabbit_feature_flags: Protect concurrent reloads of the registry
[Why] When a node joins another node, it resets its feature flags registry (the registry is unloaded) and it copies the feature flags states from the remote cluster. Before this patch, there was a small window where a concurrent use of the registry right between these two steps would reload a registry from the default/empty states, which does not reflect the intent. This could happen because another node is running peer discovery and queries the cluster membership of the node joining a cluster. [How] We acquire the registry reload lock around the reset+copy and in `rabbit_ff_registry_wrapper`.
1 parent 4ec0ca9 commit a1ab343

File tree

2 files changed

+37
-8
lines changed

2 files changed

+37
-8
lines changed

deps/rabbit/src/rabbit_db_cluster.erl

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,23 @@ join(RemoteNode, NodeType)
118118
end
119119
end,
120120

121-
ok = rabbit_db:reset(),
122-
rabbit_feature_flags:copy_feature_states_after_reset(RemoteNode),
121+
%% We acquire the feature flags registry reload lock because
122+
%% between the time we reset the registry (as part of
123+
%% `rabbit_db:reset/0' and the states copy from the remote node,
124+
%% there could be a concurrent reload of the registry (for instance
125+
%% because of peer discovery on another node) with the
126+
%% default/empty states.
127+
%%
128+
%% To make this work, the lock is also acquired from
129+
%% `rabbit_ff_registry_wrapper'.
130+
rabbit_ff_registry_factory:acquire_state_change_lock(),
131+
try
132+
ok = rabbit_db:reset(),
133+
rabbit_feature_flags:copy_feature_states_after_reset(
134+
RemoteNode)
135+
after
136+
rabbit_ff_registry_factory:release_state_change_lock()
137+
end,
123138

124139
?LOG_INFO(
125140
"DB: joining cluster using remote nodes:~n~tp", [ClusterNodes],

deps/rabbit/src/rabbit_ff_registry_wrapper.erl

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@
5252
get(FeatureName) ->
5353
case rabbit_ff_registry:get(FeatureName) of
5454
init_required ->
55-
_ = rabbit_ff_registry_factory:initialize_registry(),
55+
initialize_registry(),
5656
get(FeatureName);
5757
Ret ->
5858
Ret
@@ -74,7 +74,7 @@ get(FeatureName) ->
7474
list(Which) ->
7575
case rabbit_ff_registry:list(Which) of
7676
init_required ->
77-
_ = rabbit_ff_registry_factory:initialize_registry(),
77+
initialize_registry(),
7878
list(Which);
7979
Ret ->
8080
Ret
@@ -93,7 +93,7 @@ list(Which) ->
9393
states() ->
9494
case rabbit_ff_registry:states() of
9595
init_required ->
96-
_ = rabbit_ff_registry_factory:initialize_registry(),
96+
initialize_registry(),
9797
states();
9898
Ret ->
9999
Ret
@@ -115,7 +115,7 @@ states() ->
115115
is_supported(FeatureName) ->
116116
case rabbit_ff_registry:is_supported(FeatureName) of
117117
init_required ->
118-
_ = rabbit_ff_registry_factory:initialize_registry(),
118+
initialize_registry(),
119119
is_supported(FeatureName);
120120
Ret ->
121121
Ret
@@ -137,7 +137,7 @@ is_supported(FeatureName) ->
137137
is_enabled(FeatureName) ->
138138
case rabbit_ff_registry:is_enabled(FeatureName) of
139139
init_required ->
140-
_ = rabbit_ff_registry_factory:initialize_registry(),
140+
initialize_registry(),
141141
is_enabled(FeatureName);
142142
Ret ->
143143
Ret
@@ -150,8 +150,22 @@ is_enabled(FeatureName) ->
150150
inventory() ->
151151
case rabbit_ff_registry:inventory() of
152152
init_required ->
153-
_ = rabbit_ff_registry_factory:initialize_registry(),
153+
initialize_registry(),
154154
inventory();
155155
Ret ->
156156
Ret
157157
end.
158+
159+
initialize_registry() ->
160+
%% We acquire the feature flags registry reload lock here to make sure we
161+
%% don't reload the registry in the middle of a cluster join. Indeed, the
162+
%% registry is reset and feature flags states are copied from a remote
163+
%% node. Therefore, there is a small window where the registry is not
164+
%% loaded and the states on disk do not reflect the intent.
165+
rabbit_ff_registry_factory:acquire_state_change_lock(),
166+
try
167+
_ = rabbit_ff_registry_factory:initialize_registry(),
168+
ok
169+
after
170+
rabbit_ff_registry_factory:release_state_change_lock()
171+
end.

0 commit comments

Comments
 (0)