19
19
from kafka .util import Timer
20
20
21
21
log = logging .getLogger ('kafka.coordinator' )
22
+ heartbeat_log = logging .getLogger ('kafka.coordinator.heartbeat' )
22
23
23
24
24
25
class MemberState (object ):
@@ -449,11 +450,12 @@ def join_group(self, timeout_ms=None):
449
450
timeout_ms = timer .timeout_ms )
450
451
self .rejoining = True
451
452
452
- # fence off the heartbeat thread explicitly so that it cannot
453
- # interfere with the join group. # Note that this must come after
454
- # the call to onJoinPrepare since we must be able to continue
455
- # sending heartbeats if that callback takes some time.
456
- self ._disable_heartbeat_thread ()
453
+ # fence off the heartbeat thread explicitly so that it cannot
454
+ # interfere with the join group. # Note that this must come after
455
+ # the call to onJoinPrepare since we must be able to continue
456
+ # sending heartbeats if that callback takes some time.
457
+ log .debug ("Disabling heartbeat thread during join-group" )
458
+ self ._disable_heartbeat_thread ()
457
459
458
460
# ensure that there are no pending requests to the coordinator.
459
461
# This is important in particular to avoid resending a pending
@@ -779,7 +781,7 @@ def _handle_group_coordinator_response(self, future, response):
779
781
future .failure (error )
780
782
else :
781
783
error = error_type ()
782
- log .error ("Group coordinator lookup for group %s failed: %s" ,
784
+ log .error ("Group Coordinator lookup for group %s failed: %s" ,
783
785
self .group_id , error )
784
786
future .failure (error )
785
787
@@ -815,11 +817,11 @@ def _start_heartbeat_thread(self):
815
817
raise Errors .UnsupportedVersionError ('Heartbeat APIs require 0.9+ broker' )
816
818
with self ._lock :
817
819
if self ._heartbeat_thread is None :
818
- log .info ('Starting new heartbeat thread' )
820
+ heartbeat_log .info ('Starting new heartbeat thread' )
819
821
self ._heartbeat_thread = HeartbeatThread (weakref .proxy (self ))
820
822
self ._heartbeat_thread .daemon = True
821
823
self ._heartbeat_thread .start ()
822
- log .debug ("Started heartbeat thread %s" , self ._heartbeat_thread .ident )
824
+ heartbeat_log .debug ("Started heartbeat thread %s" , self ._heartbeat_thread .ident )
823
825
824
826
def _disable_heartbeat_thread (self ):
825
827
with self ._lock :
@@ -829,7 +831,7 @@ def _disable_heartbeat_thread(self):
829
831
def _close_heartbeat_thread (self , timeout_ms = None ):
830
832
with self ._lock :
831
833
if self ._heartbeat_thread is not None :
832
- log .info ('Stopping heartbeat thread' )
834
+ heartbeat_log .info ('Stopping heartbeat thread' )
833
835
try :
834
836
self ._heartbeat_thread .close (timeout_ms = timeout_ms )
835
837
except ReferenceError :
@@ -893,7 +895,7 @@ def _send_heartbeat_request(self):
893
895
request = HeartbeatRequest [version ](self .group_id ,
894
896
self ._generation .generation_id ,
895
897
self ._generation .member_id )
896
- log .debug ("Heartbeat: %s[%s] %s" , request .group , request .generation_id , request .member_id ) # pylint: disable-msg=no-member
898
+ heartbeat_log .debug ("Heartbeat: %s[%s] %s" , request .group , request .generation_id , request .member_id ) # pylint: disable-msg=no-member
897
899
future = Future ()
898
900
_f = self ._client .send (self .coordinator_id , request )
899
901
_f .add_callback (self ._handle_heartbeat_response , future , time .time ())
@@ -906,38 +908,38 @@ def _handle_heartbeat_response(self, future, send_time, response):
906
908
self ._sensors .heartbeat_latency .record ((time .time () - send_time ) * 1000 )
907
909
error_type = Errors .for_code (response .error_code )
908
910
if error_type is Errors .NoError :
909
- log .debug ("Received successful heartbeat response for group %s" ,
911
+ heartbeat_log .debug ("Received successful heartbeat response for group %s" ,
910
912
self .group_id )
911
913
future .success (None )
912
914
elif error_type in (Errors .CoordinatorNotAvailableError ,
913
915
Errors .NotCoordinatorError ):
914
- log .warning ("Heartbeat failed for group %s: coordinator (node %s)"
916
+ heartbeat_log .warning ("Heartbeat failed for group %s: coordinator (node %s)"
915
917
" is either not started or not valid" , self .group_id ,
916
918
self .coordinator ())
917
919
self .coordinator_dead (error_type ())
918
920
future .failure (error_type ())
919
921
elif error_type is Errors .RebalanceInProgressError :
920
- log .warning ("Heartbeat failed for group %s because it is"
922
+ heartbeat_log .warning ("Heartbeat failed for group %s because it is"
921
923
" rebalancing" , self .group_id )
922
924
self .request_rejoin ()
923
925
future .failure (error_type ())
924
926
elif error_type is Errors .IllegalGenerationError :
925
- log .warning ("Heartbeat failed for group %s: generation id is not "
927
+ heartbeat_log .warning ("Heartbeat failed for group %s: generation id is not "
926
928
" current." , self .group_id )
927
929
self .reset_generation ()
928
930
future .failure (error_type ())
929
931
elif error_type is Errors .UnknownMemberIdError :
930
- log .warning ("Heartbeat: local member_id was not recognized;"
932
+ heartbeat_log .warning ("Heartbeat: local member_id was not recognized;"
931
933
" this consumer needs to re-join" )
932
934
self .reset_generation ()
933
935
future .failure (error_type )
934
936
elif error_type is Errors .GroupAuthorizationFailedError :
935
937
error = error_type (self .group_id )
936
- log .error ("Heartbeat failed: authorization error: %s" , error )
938
+ heartbeat_log .error ("Heartbeat failed: authorization error: %s" , error )
937
939
future .failure (error )
938
940
else :
939
941
error = error_type ()
940
- log .error ("Heartbeat failed: Unhandled error: %s" , error )
942
+ heartbeat_log .error ("Heartbeat failed: Unhandled error: %s" , error )
941
943
future .failure (error )
942
944
943
945
@@ -1003,14 +1005,14 @@ def __init__(self, coordinator):
1003
1005
1004
1006
def enable (self ):
1005
1007
with self .coordinator ._lock :
1006
- log .debug ('Enabling heartbeat thread' )
1008
+ heartbeat_log .debug ('Enabling heartbeat thread' )
1007
1009
self .enabled = True
1008
1010
self .coordinator .heartbeat .reset_timeouts ()
1009
1011
self .coordinator ._lock .notify ()
1010
1012
1011
1013
def disable (self ):
1012
1014
with self .coordinator ._lock :
1013
- log .debug ('Disabling heartbeat thread' )
1015
+ heartbeat_log .debug ('Disabling heartbeat thread' )
1014
1016
self .enabled = False
1015
1017
1016
1018
def close (self , timeout_ms = None ):
@@ -1032,24 +1034,24 @@ def close(self, timeout_ms=None):
1032
1034
timeout_ms = self .coordinator .config ['heartbeat_interval_ms' ]
1033
1035
self .join (timeout_ms / 1000 )
1034
1036
if self .is_alive ():
1035
- log .warning ("Heartbeat thread did not fully terminate during close" )
1037
+ heartbeat_log .warning ("Heartbeat thread did not fully terminate during close" )
1036
1038
1037
1039
def run (self ):
1038
1040
try :
1039
- log .debug ('Heartbeat thread started' )
1041
+ heartbeat_log .debug ('Heartbeat thread started' )
1040
1042
while not self .closed :
1041
1043
self ._run_once ()
1042
1044
1043
1045
except ReferenceError :
1044
- log .debug ('Heartbeat thread closed due to coordinator gc' )
1046
+ heartbeat_log .debug ('Heartbeat thread closed due to coordinator gc' )
1045
1047
1046
1048
except RuntimeError as e :
1047
- log .error ("Heartbeat thread for group %s failed due to unexpected error: %s" ,
1049
+ heartbeat_log .error ("Heartbeat thread for group %s failed due to unexpected error: %s" ,
1048
1050
self .coordinator .group_id , e )
1049
1051
self .failed = e
1050
1052
1051
1053
finally :
1052
- log .debug ('Heartbeat thread closed' )
1054
+ heartbeat_log .debug ('Heartbeat thread closed' )
1053
1055
1054
1056
def _run_once (self ):
1055
1057
with self .coordinator ._client ._lock , self .coordinator ._lock :
@@ -1063,16 +1065,16 @@ def _run_once(self):
1063
1065
1064
1066
with self .coordinator ._lock :
1065
1067
if not self .enabled :
1066
- log .debug ('Heartbeat disabled. Waiting' )
1068
+ heartbeat_log .debug ('Heartbeat disabled. Waiting' )
1067
1069
self .coordinator ._lock .wait ()
1068
- log .debug ('Heartbeat re-enabled.' )
1070
+ heartbeat_log .debug ('Heartbeat re-enabled.' )
1069
1071
return
1070
1072
1071
1073
if self .coordinator .state is not MemberState .STABLE :
1072
1074
# the group is not stable (perhaps because we left the
1073
1075
# group or because the coordinator kicked us out), so
1074
1076
# disable heartbeats and wait for the main thread to rejoin.
1075
- log .debug ('Group state is not stable, disabling heartbeats' )
1077
+ heartbeat_log .debug ('Group state is not stable, disabling heartbeats' )
1076
1078
self .disable ()
1077
1079
return
1078
1080
@@ -1088,14 +1090,14 @@ def _run_once(self):
1088
1090
# the session timeout has expired without seeing a
1089
1091
# successful heartbeat, so we should probably make sure
1090
1092
# the coordinator is still healthy.
1091
- log .warning ('Heartbeat session expired, marking coordinator dead' )
1093
+ heartbeat_log .warning ('Heartbeat session expired, marking coordinator dead' )
1092
1094
self .coordinator .coordinator_dead ('Heartbeat session expired' )
1093
1095
1094
1096
elif self .coordinator .heartbeat .poll_timeout_expired ():
1095
1097
# the poll timeout has expired, which means that the
1096
1098
# foreground thread has stalled in between calls to
1097
1099
# poll(), so we explicitly leave the group.
1098
- log .warning ('Heartbeat poll expired, leaving group' )
1100
+ heartbeat_log .warning ('Heartbeat poll expired, leaving group' )
1099
1101
### XXX
1100
1102
# maybe_leave_group acquires client + coordinator lock;
1101
1103
# if we hold coordinator lock before calling, we risk deadlock
@@ -1106,7 +1108,7 @@ def _run_once(self):
1106
1108
elif not self .coordinator .heartbeat .should_heartbeat ():
1107
1109
# poll again after waiting for the retry backoff in case
1108
1110
# the heartbeat failed or the coordinator disconnected
1109
- log .log (0 , 'Not ready to heartbeat, waiting' )
1111
+ heartbeat_log .log (0 , 'Not ready to heartbeat, waiting' )
1110
1112
self .coordinator ._lock .wait (self .coordinator .config ['retry_backoff_ms' ] / 1000 )
1111
1113
1112
1114
else :
0 commit comments