@@ -440,7 +440,12 @@ query_node_props(Nodes) when Nodes =/= [] ->
440
440
[Peer ],
441
441
#{domain => ? RMQLOG_DOMAIN_PEER_DISC }),
442
442
try
443
- peer :call (Pid , ? MODULE , do_query_node_props , [Nodes , ThisNode ], 180000 )
443
+ NodesAndProps1 = peer :call (
444
+ Pid ,
445
+ ? MODULE , do_query_node_props ,
446
+ [Nodes , ThisNode ], 180000 ),
447
+ NodesAndProps2 = sort_nodes_and_props (NodesAndProps1 ),
448
+ NodesAndProps2
444
449
after
445
450
peer :stop (Pid )
446
451
end ;
@@ -563,25 +568,31 @@ maybe_add_tls_arguments(VMArgs) ->
563
568
end ,
564
569
VMArgs2 .
565
570
566
- do_query_node_props (Nodes , ThisNode ) when Nodes =/= [] ->
571
+ do_query_node_props (Nodes , FromNode ) when Nodes =/= [] ->
567
572
% % Make sure all log messages are forwarded from this temporary hidden
568
573
% % node to the upstream node, regardless of their level.
569
574
_ = logger :set_primary_config (level , debug ),
570
575
571
576
% % TODO: Replace with `rabbit_nodes:list_members/0' when the oldest
572
577
% % supported version has it.
573
- MembersPerNode = erpc :multicall (Nodes , rabbit_nodes , all , []),
574
- query_node_props1 (Nodes , MembersPerNode , [], ThisNode ).
578
+ MembersPerNode = [try
579
+ {ok ,
580
+ erpc_call (Node , rabbit_nodes , all , [], FromNode )}
581
+ catch
582
+ Class :Reason ->
583
+ {Class , Reason }
584
+ end || Node <- Nodes ],
585
+ query_node_props1 (Nodes , MembersPerNode , [], FromNode ).
575
586
576
587
query_node_props1 (
577
588
[Node | Nodes ], [{ok , Members } | MembersPerNode ], NodesAndProps ,
578
- ThisNode ) ->
589
+ FromNode ) ->
579
590
NodeAndProps = {Node , Members },
580
591
NodesAndProps1 = [NodeAndProps | NodesAndProps ],
581
- query_node_props1 (Nodes , MembersPerNode , NodesAndProps1 , ThisNode );
592
+ query_node_props1 (Nodes , MembersPerNode , NodesAndProps1 , FromNode );
582
593
query_node_props1 (
583
- [Node | Nodes ], [{error , _ } = Error | MembersPerNode ], NodesAndProps ,
584
- ThisNode ) ->
594
+ [Node | Nodes ], [{_ , _ } = Error | MembersPerNode ], NodesAndProps ,
595
+ FromNode ) ->
585
596
% % We consider that an error means the remote node is unreachable or not
586
597
% % ready. Therefore, we exclude it from the list of discovered nodes as we
587
598
% % won't be able to join it anyway.
@@ -590,20 +601,21 @@ query_node_props1(
590
601
" Peer discovery: node '~ts ' excluded from the discovered nodes" ,
591
602
[Node , Error , Node ],
592
603
#{domain => ? RMQLOG_DOMAIN_PEER_DISC }),
593
- query_node_props1 (Nodes , MembersPerNode , NodesAndProps , ThisNode );
594
- query_node_props1 ([], [], NodesAndProps , ThisNode ) ->
604
+ query_node_props1 (Nodes , MembersPerNode , NodesAndProps , FromNode );
605
+ query_node_props1 ([], [], NodesAndProps , FromNode ) ->
595
606
NodesAndProps1 = lists :reverse (NodesAndProps ),
596
- query_node_props2 (NodesAndProps1 , [], ThisNode ).
607
+ query_node_props2 (NodesAndProps1 , [], FromNode ).
597
608
598
- query_node_props2 ([{Node , Members } | Rest ], NodesAndProps , ThisNode ) ->
609
+ query_node_props2 ([{Node , Members } | Rest ], NodesAndProps , FromNode ) ->
599
610
NodesAndProps2 = try
600
- erpc : call (
611
+ erpc_call (
601
612
Node , logger , debug ,
602
613
[" Peer discovery: temporary hidden node '~ts ' "
603
614
" queries properties from node '~ts '" ,
604
- [node (), Node ]]),
605
- StartTime = get_node_start_time (Node , microsecond ),
606
- IsReady = is_node_db_ready (Node , ThisNode ),
615
+ [node (), Node ]], FromNode ),
616
+ StartTime = get_node_start_time (
617
+ Node , microsecond , FromNode ),
618
+ IsReady = is_node_db_ready (Node , FromNode ),
607
619
NodeAndProps = {Node , Members , StartTime , IsReady },
608
620
NodesAndProps1 = [NodeAndProps | NodesAndProps ],
609
621
NodesAndProps1
@@ -623,17 +635,17 @@ query_node_props2([{Node, Members} | Rest], NodesAndProps, ThisNode) ->
623
635
#{domain => ? RMQLOG_DOMAIN_PEER_DISC }),
624
636
NodesAndProps
625
637
end ,
626
- query_node_props2 (Rest , NodesAndProps2 , ThisNode );
627
- query_node_props2 ([], NodesAndProps , _ThisNode ) ->
638
+ query_node_props2 (Rest , NodesAndProps2 , FromNode );
639
+ query_node_props2 ([], NodesAndProps , _FromNode ) ->
628
640
NodesAndProps1 = lists :reverse (NodesAndProps ),
629
- NodesAndProps2 = sort_nodes_and_props (NodesAndProps1 ),
630
641
? assertEqual ([], nodes ()),
631
- ? assert (length (NodesAndProps2 ) =< length (nodes (hidden ))),
632
- NodesAndProps2 .
642
+ ? assert (length (NodesAndProps1 ) =< length (nodes (hidden ))),
643
+ NodesAndProps1 .
633
644
634
- -spec get_node_start_time (Node , Unit ) -> StartTime when
645
+ -spec get_node_start_time (Node , Unit , FromNode ) -> StartTime when
635
646
Node :: node (),
636
647
Unit :: erlang :time_unit (),
648
+ FromNode :: node (),
637
649
StartTime :: non_neg_integer ().
638
650
% % @doc Returns the start time of the given `Node' in `Unit'.
639
651
% %
@@ -653,37 +665,62 @@ query_node_props2([], NodesAndProps, _ThisNode) ->
653
665
% %
654
666
% % @private
655
667
656
- get_node_start_time (Node , Unit ) ->
657
- NativeStartTime = erpc :call (Node , erlang , system_info , [start_time ]),
658
- TimeOffset = erpc :call (Node , erlang , time_offset , []),
668
+ get_node_start_time (Node , Unit , FromNode ) ->
669
+ NativeStartTime = erpc_call (
670
+ Node , erlang , system_info , [start_time ], FromNode ),
671
+ TimeOffset = erpc_call (Node , erlang , time_offset , [], FromNode ),
659
672
SystemStartTime = NativeStartTime + TimeOffset ,
660
- StartTime = erpc : call (
673
+ StartTime = erpc_call (
661
674
Node , erlang , convert_time_unit ,
662
- [SystemStartTime , native , Unit ]),
675
+ [SystemStartTime , native , Unit ], FromNode ),
663
676
StartTime .
664
677
665
- -spec is_node_db_ready (Node , ThisNode ) -> IsReady when
678
+ -spec is_node_db_ready (Node , FromNode ) -> IsReady when
666
679
Node :: node (),
667
- ThisNode :: node (),
680
+ FromNode :: node (),
668
681
IsReady :: boolean () | undefined .
669
682
% % @doc Returns if the node's DB layer is ready or not.
670
683
% %
671
684
% % @private
672
685
673
- is_node_db_ready (ThisNode , ThisNode ) ->
674
- % % The current node is running peer discovery, thus way before we mark the
675
- % % DB layer as ready. Consider it ready in this case, otherwise if the
676
- % % current node is selected, it will loop forever waiting for itself to be
677
- % % ready.
686
+ is_node_db_ready (FromNode , FromNode ) ->
687
+ % % The function is called for rhe current node running peer discovery, thus
688
+ % % way before we mark the DB layer as ready. Consider it ready in this
689
+ % % case, otherwise if the current node is selected, it will loop forever
690
+ % % waiting for itself to be ready.
678
691
true ;
679
- is_node_db_ready (Node , _ThisNode ) ->
692
+ is_node_db_ready (Node , FromNode ) ->
680
693
try
681
- erpc : call (Node , rabbit_db , is_init_finished , [])
694
+ erpc_call (Node , rabbit_db , is_init_finished , [], FromNode )
682
695
catch
683
696
_ :{exception , undef , [{rabbit_db , is_init_finished , _ , _ } | _ ]} ->
684
697
undefined
685
698
end .
686
699
700
+ erpc_call (Node , Mod , Fun , Args , FromNode ) ->
701
+ erpc_call (Node , Mod , Fun , Args , FromNode , 10000 ).
702
+
703
+ erpc_call (Node , Mod , Fun , Args , FromNode , Timeout ) when Timeout >= 0 ->
704
+ try
705
+ erpc :call (Node , Mod , Fun , Args )
706
+ catch
707
+ error :{erpc , _ } = Reason :Stacktrace ->
708
+ Peer = node (),
709
+ _ = catch erpc :call (
710
+ FromNode ,
711
+ logger , debug ,
712
+ [" Peer discovery: temporary hidden node '~ts ' "
713
+ " failed to connect to '~ts ': ~0p " ,
714
+ [Peer , Node , Reason ]]),
715
+ Sleep = 1000 ,
716
+ timer :sleep (Sleep ),
717
+ NewTimeout = Timeout - Sleep ,
718
+ case NewTimeout >= 0 of
719
+ true -> erpc_call (Node , Mod , Fun , Args , FromNode , NewTimeout );
720
+ false -> erlang :raise (error , Reason , Stacktrace )
721
+ end
722
+ end .
723
+
687
724
-spec sort_nodes_and_props (NodesAndProps ) ->
688
725
SortedNodesAndProps when
689
726
NodesAndProps :: [node_and_props ()],
0 commit comments