@@ -97,48 +97,61 @@ init() ->
97
97
ok .
98
98
99
99
init_with_lock () ->
100
- {Retries , Timeout } = rabbit_peer_discovery :retry_timeout (),
101
- init_with_lock (Retries , Timeout , fun init_from_config /0 ).
100
+ {Retries , Timeout } = rabbit_peer_discovery :locking_retry_timeout (),
101
+ init_with_lock (Retries , Timeout , fun run_peer_discovery /0 ).
102
102
103
- init_with_lock (0 , _ , InitFromConfig ) ->
103
+ init_with_lock (0 , _ , RunPeerDiscovery ) ->
104
104
case rabbit_peer_discovery :lock_acquisition_failure_mode () of
105
105
ignore ->
106
106
rabbit_log :warning (" Cannot acquire a lock during clustering" , []),
107
- InitFromConfig (),
107
+ RunPeerDiscovery (),
108
108
rabbit_peer_discovery :maybe_register ();
109
109
fail ->
110
110
exit (cannot_acquire_startup_lock )
111
111
end ;
112
- init_with_lock (Retries , Timeout , InitFromConfig ) ->
112
+ init_with_lock (Retries , Timeout , RunPeerDiscovery ) ->
113
113
case rabbit_peer_discovery :lock () of
114
114
not_supported ->
115
115
rabbit_log :info (" Peer discovery backend does not support locking, falling back to randomized delay" ),
116
116
% % See rabbitmq/rabbitmq-server#1202 for details.
117
117
rabbit_peer_discovery :maybe_inject_randomized_delay (),
118
- InitFromConfig (),
118
+ RunPeerDiscovery (),
119
119
rabbit_peer_discovery :maybe_register ();
120
120
{error , _Reason } ->
121
121
timer :sleep (Timeout ),
122
- init_with_lock (Retries - 1 , Timeout , InitFromConfig );
122
+ init_with_lock (Retries - 1 , Timeout , RunPeerDiscovery );
123
123
{ok , Data } ->
124
124
try
125
- InitFromConfig (),
125
+ RunPeerDiscovery (),
126
126
rabbit_peer_discovery :maybe_register ()
127
127
after
128
128
rabbit_peer_discovery :unlock (Data )
129
129
end
130
130
end .
131
131
132
- init_from_config () ->
132
+ -spec run_peer_discovery () -> ok | {[node ()], node_type ()}.
133
+ run_peer_discovery () ->
134
+ {RetriesLeft , DelayInterval } = rabbit_peer_discovery :discovery_retries (),
135
+ run_peer_discovery_with_retries (RetriesLeft , DelayInterval ).
136
+
137
+ -spec run_peer_discovery_with_retries (non_neg_integer (), non_neg_integer ()) -> ok | {[node ()], node_type ()}.
138
+ run_peer_discovery_with_retries (0 , _DelayInterval ) ->
139
+ ok ;
140
+ run_peer_discovery_with_retries (RetriesLeft , DelayInterval ) ->
133
141
FindBadNodeNames = fun
134
142
(Name , BadNames ) when is_atom (Name ) -> BadNames ;
135
143
(Name , BadNames ) -> [Name | BadNames ]
136
144
end ,
137
145
{DiscoveredNodes , NodeType } =
138
146
case rabbit_peer_discovery :discover_cluster_nodes () of
147
+ {error , Reason } ->
148
+ RetriesLeft1 = RetriesLeft - 1 ,
149
+ rabbit_log :error (" Peer discovery returned an error: ~p . Will retry after a delay of ~b , ~b retries left..." ,
150
+ [Reason , DelayInterval , RetriesLeft1 ]),
151
+ timer :sleep (DelayInterval ),
152
+ run_peer_discovery_with_retries (RetriesLeft1 , DelayInterval );
139
153
{ok , {Nodes , Type } = Config }
140
- when is_list (Nodes ) andalso
141
- (Type == disc orelse Type == disk orelse Type == ram ) ->
154
+ when is_list (Nodes ) andalso (Type == disc orelse Type == disk orelse Type == ram ) ->
142
155
case lists :foldr (FindBadNodeNames , [], Nodes ) of
143
156
[] -> Config ;
144
157
BadNames -> e ({invalid_cluster_node_names , BadNames })
@@ -167,6 +180,16 @@ init_from_config() ->
167
180
% % reachable and compatible (in terms of Mnesia internal protocol version and such)
168
181
% % cluster peers in order.
169
182
join_discovered_peers (TryNodes , NodeType ) ->
183
+ {RetriesLeft , DelayInterval } = rabbit_peer_discovery :discovery_retries (),
184
+ join_discovered_peers_with_retries (TryNodes , NodeType , RetriesLeft , DelayInterval ).
185
+
186
+ join_discovered_peers_with_retries (TryNodes , _NodeType , 0 , _DelayInterval ) ->
187
+ rabbit_log :warning (
188
+ " Could not successfully contact any node of: ~s (as in Erlang distribution). "
189
+ " Starting as a blank standalone node...~n " ,
190
+ [string :join (lists :map (fun atom_to_list /1 , TryNodes ), " ," )]),
191
+ init_db_and_upgrade ([node ()], disc , false , _Retry = true );
192
+ join_discovered_peers_with_retries (TryNodes , NodeType , RetriesLeft , DelayInterval ) ->
170
193
case find_reachable_peer_to_cluster_with (nodes_excl_me (TryNodes )) of
171
194
{ok , Node } ->
172
195
rabbit_log :info (" Node '~s ' selected for auto-clustering~n " , [Node ]),
@@ -175,11 +198,11 @@ join_discovered_peers(TryNodes, NodeType) ->
175
198
rabbit_connection_tracking :boot (),
176
199
rabbit_node_monitor :notify_joined_cluster ();
177
200
none ->
178
- rabbit_log : warning (
179
- " Could not successfully contact any node of: ~s (as in Erlang distribution). "
180
- " Starting as a blank standalone node... ~n " ,
181
- [ string : join ( lists : map ( fun atom_to_list / 1 , TryNodes ), " , " )] ),
182
- init_db_and_upgrade ([ node ()], disc , false , _Retry = true )
201
+ RetriesLeft1 = RetriesLeft - 1 ,
202
+ rabbit_log : error ( " Trying to join discovered peers failed. Will retry after a delay of ~b , ~b retries left... " ,
203
+ [ DelayInterval , RetriesLeft1 ]) ,
204
+ timer : sleep ( DelayInterval ),
205
+ join_discovered_peers_with_retries ( TryNodes , NodeType , RetriesLeft1 , DelayInterval )
183
206
end .
184
207
185
208
% % Make the node join a cluster. The node will be reset automatically
0 commit comments