@@ -398,7 +398,7 @@ def _should_recycle_connection(self, conn):
398
398
399
399
return False
400
400
401
- def _maybe_connect (self , node_id ):
401
+ def _init_connect (self , node_id ):
402
402
"""Idempotent non-blocking connection attempt to the given node id.
403
403
404
404
Returns True if connection object exists and is connected / connecting
@@ -427,10 +427,8 @@ def _maybe_connect(self, node_id):
427
427
** self .config )
428
428
self ._conns [node_id ] = conn
429
429
430
- elif conn .connected ():
431
- return True
432
-
433
- conn .connect ()
430
+ if conn .disconnected ():
431
+ conn .connect ()
434
432
return not conn .disconnected ()
435
433
436
434
def ready (self , node_id , metadata_priority = True ):
@@ -621,15 +619,18 @@ def poll(self, timeout_ms=None, future=None):
621
619
if self ._closed :
622
620
break
623
621
624
- # Send a metadata request if needed (or initiate new connection)
625
- metadata_timeout_ms = self ._maybe_refresh_metadata ()
626
-
627
622
# Attempt to complete pending connections
628
623
for node_id in list (self ._connecting ):
629
624
# False return means no more connection progress is possible
630
625
# Connected nodes will update _connecting via state_change callback
631
- if not self ._maybe_connect (node_id ):
632
- self ._connecting .remove (node_id )
626
+ if not self ._init_connect (node_id ):
627
+ # It's possible that the connection attempt triggered a state change
628
+ # but if not, make sure to remove from _connecting list
629
+ if node_id in self ._connecting :
630
+ self ._connecting .remove (node_id )
631
+
632
+ # Send a metadata request if needed (or initiate new connection)
633
+ metadata_timeout_ms = self ._maybe_refresh_metadata ()
633
634
634
635
# If we got a future that is already done, don't block in _poll
635
636
if future is not None and future .is_done :
@@ -679,6 +680,8 @@ def _poll(self, timeout):
679
680
self ._register_send_sockets ()
680
681
681
682
start_select = time .time ()
683
+ if timeout == float ('inf' ):
684
+ timeout = None
682
685
ready = self ._selector .select (timeout )
683
686
end_select = time .time ()
684
687
if self ._sensors :
@@ -893,6 +896,26 @@ def _maybe_refresh_metadata(self, wakeup=False):
893
896
log .debug ("Give up sending metadata request since no node is available. (reconnect delay %d ms)" , next_connect_ms )
894
897
return next_connect_ms
895
898
899
+ if not self ._can_send_request (node_id ):
900
+ # If there's any connection establishment underway, wait until it completes. This prevents
901
+ # the client from unnecessarily connecting to additional nodes while a previous connection
902
+ # attempt has not been completed.
903
+ if self ._connecting :
904
+ return float ('inf' )
905
+
906
+ elif self ._can_connect (node_id ):
907
+ log .debug ("Initializing connection to node %s for metadata request" , node_id )
908
+ self ._connecting .add (node_id )
909
+ if not self ._init_connect (node_id ):
910
+ if node_id in self ._connecting :
911
+ self ._connecting .remove (node_id )
912
+ # Connection attempt failed immediately, need to retry with a different node
913
+ return self .config ['reconnect_backoff_ms' ]
914
+ else :
915
+ # Existing connection with max in flight requests. Wait for request to complete.
916
+ return self .config ['request_timeout_ms' ]
917
+
918
+ # Recheck node_id in case we were able to connect immediately above
896
919
if self ._can_send_request (node_id ):
897
920
topics = list (self ._topics )
898
921
if not topics and self .cluster .is_bootstrap (node_id ):
@@ -917,20 +940,11 @@ def refresh_done(val_or_error):
917
940
future .add_errback (refresh_done )
918
941
return self .config ['request_timeout_ms' ]
919
942
920
- # If there's any connection establishment underway, wait until it completes. This prevents
921
- # the client from unnecessarily connecting to additional nodes while a previous connection
922
- # attempt has not been completed.
943
+ # Should only get here if still connecting
923
944
if self ._connecting :
924
945
return float ('inf' )
925
-
926
- if self .maybe_connect (node_id , wakeup = wakeup ):
927
- log .debug ("Initializing connection to node %s for metadata request" , node_id )
928
- return float ('inf' )
929
-
930
- # connected but can't send more, OR connecting
931
- # In either case we just need to wait for a network event
932
- # to let us know the selected connection might be usable again.
933
- return float ('inf' )
946
+ else :
947
+ return self .config ['reconnect_backoff_ms' ]
934
948
935
949
def get_api_versions (self ):
936
950
"""Return the ApiVersions map, if available.
@@ -973,7 +987,7 @@ def check_version(self, node_id=None, timeout=None, strict=False):
973
987
if try_node is None :
974
988
self ._lock .release ()
975
989
raise Errors .NoBrokersAvailable ()
976
- if not self ._maybe_connect (try_node ):
990
+ if not self ._init_connect (try_node ):
977
991
if try_node == node_id :
978
992
raise Errors .NodeNotReadyError ("Connection failed to %s" % node_id )
979
993
else :
0 commit comments