Skip to content

Commit 75c4ff7

Browse files
Merge pull request #4481 from rabbitmq/mergify/bp/v3.10.x/pr-4468
Fix migrate leadership timeout (backport #4468)
2 parents ba3432a + 51e2837 commit 75c4ff7

File tree

2 files changed

+38
-6
lines changed

2 files changed

+38
-6
lines changed

deps/rabbit/src/rabbit_maintenance.erl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -270,15 +270,15 @@ transfer_leadership_of_classic_mirrored_queues(TransferCandidates) ->
270270
[rabbit_misc:rs(Name), readable_candidate_list(ExistingReplicaNodes)]),
271271
case random_primary_replica_transfer_candidate_node(TransferCandidates, ExistingReplicaNodes) of
272272
{ok, Pick} ->
273-
rabbit_log:debug("Will transfer leadership of local ~s to node ~s",
273+
rabbit_log:debug("Will transfer leadership of local ~s. Planned target node: ~s",
274274
[rabbit_misc:rs(Name), Pick]),
275275
case rabbit_mirror_queue_misc:migrate_leadership_to_existing_replica(Q, Pick) of
276-
{migrated, _} ->
276+
{migrated, NewPrimary} ->
277277
rabbit_log:debug("Successfully transferred leadership of queue ~s to node ~s",
278-
[rabbit_misc:rs(Name), Pick]);
278+
[rabbit_misc:rs(Name), NewPrimary]);
279279
Other ->
280-
rabbit_log:warning("Could not transfer leadership of queue ~s to node ~s: ~p",
281-
[rabbit_misc:rs(Name), Pick, Other])
280+
rabbit_log:warning("Could not transfer leadership of queue ~s: ~p",
281+
[rabbit_misc:rs(Name), Other])
282282
end;
283283
undefined ->
284284
rabbit_log:warning("Could not transfer leadership of queue ~s: no suitable candidates?",

deps/rabbit/src/rabbit_mirror_queue_misc.erl

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -602,6 +602,8 @@ transfer_leadership(Q, Destination) ->
602602
%% Moves the primary replica (leader) of a classic mirrored queue to another node
603603
%% which already hosts a replica of this queue. In this case we can stop
604604
%% fewer replicas and reduce the load the operation has on the cluster.
605+
%% Note that there is no guarantee that the queue will actually end up on the
606+
%% destination node. The actual destination node is returned.
605607
migrate_leadership_to_existing_replica(Q, Destination) ->
606608
QName = amqqueue:get_name(Q),
607609
{PreTransferPrimaryNode, PreTransferMirrorNodes, _PreTransferInSyncMirrorNodes} = actual_queue_nodes(Q),
@@ -616,7 +618,7 @@ migrate_leadership_to_existing_replica(Q, Destination) ->
616618
NodesToDropMirrorsOn = [PreTransferPrimaryNode],
617619
drop_mirrors(QName, NodesToDropMirrorsOn),
618620

619-
case wait_for_new_master(QName, Destination) of
621+
case wait_for_different_master(QName, PreTransferPrimaryNode) of
620622
not_migrated ->
621623
{not_migrated, undefined};
622624
{{not_migrated, Destination} = Result, _Q1} ->
@@ -655,6 +657,36 @@ wait_for_new_master(QName, Destination, N) ->
655657
end
656658
end.
657659

660+
-spec wait_for_different_master(rabbit_amqqueue:name(), atom()) -> {{migrated, node()}, amqqueue:amqqueue()} | {{not_migrated, node()}, amqqueue:amqqueue()} | not_migrated.
661+
wait_for_different_master(QName, Source) ->
662+
wait_for_different_master(QName, Source, 100).
663+
664+
wait_for_different_master(QName, _, 0) ->
665+
case rabbit_amqqueue:lookup(QName) of
666+
{error, not_found} -> not_migrated;
667+
{ok, Q} -> {{not_migrated, undefined}, Q}
668+
end;
669+
wait_for_different_master(QName, Source, N) ->
670+
case rabbit_amqqueue:lookup(QName) of
671+
{error, not_found} ->
672+
not_migrated;
673+
{ok, Q} ->
674+
case amqqueue:get_pid(Q) of
675+
none ->
676+
timer:sleep(100),
677+
wait_for_different_master(QName, Source, N - 1);
678+
Pid ->
679+
case node(Pid) of
680+
Source ->
681+
timer:sleep(100),
682+
wait_for_different_master(QName, Source, N - 1);
683+
Destination ->
684+
{{migrated, Destination}, Q}
685+
end
686+
end
687+
end.
688+
689+
658690
%% The arrival of a newly synced mirror may cause the master to die if
659691
%% the policy does not want the master but it has been kept alive
660692
%% because there were no synced mirrors.

0 commit comments

Comments
 (0)