Skip to content

Branch fix: No infinite loops during metadata requests, invalidate metadata more, exception hierarchy #100

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jan 14, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
168 changes: 88 additions & 80 deletions kafka/client.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
import copy
import logging

from collections import defaultdict
from functools import partial
from itertools import count
import logging
import time

from kafka.common import (
ErrorMapping, TopicAndPartition, ConnectionError,
FailedPayloadsException
)
from kafka.common import (ErrorMapping, TopicAndPartition,
ConnectionError, FailedPayloadsError,
BrokerResponseError, PartitionUnavailableError,
KafkaUnavailableError, KafkaRequestError)

from kafka.conn import KafkaConnection
from kafka.protocol import KafkaProtocol

Expand All @@ -29,8 +30,8 @@ def __init__(self, host, port, client_id=CLIENT_ID, timeout=10):
}
self.brokers = {} # broker_id -> BrokerMetadata
self.topics_to_brokers = {} # topic_id -> broker_id
self.topic_partitions = defaultdict(list) # topic_id -> [0, 1, 2, ...]
self._load_metadata_for_topics()
self.topic_partitions = {} # topic_id -> [0, 1, 2, ...]
self.load_metadata_for_topics() # bootstrap with all metadata

##################
# Private API #
Expand All @@ -49,55 +50,13 @@ def _get_conn_for_broker(self, broker):
def _get_leader_for_partition(self, topic, partition):
key = TopicAndPartition(topic, partition)
if key not in self.topics_to_brokers:
self._load_metadata_for_topics(topic)
self.load_metadata_for_topics(topic)

if key not in self.topics_to_brokers:
raise Exception("Partition does not exist: %s" % str(key))
raise KafkaRequestError("Partition does not exist: %s" % str(key))

return self.topics_to_brokers[key]

def _load_metadata_for_topics(self, *topics):
"""
Discover brokers and metadata for a set of topics. This method will
recurse in the event of a retry.
"""
request_id = self._next_id()
request = KafkaProtocol.encode_metadata_request(self.client_id,
request_id, topics)

response = self._send_broker_unaware_request(request_id, request)
if response is None:
raise Exception("All servers failed to process request")

(brokers, topics) = KafkaProtocol.decode_metadata_response(response)

log.debug("Broker metadata: %s", brokers)
log.debug("Topic metadata: %s", topics)

self.brokers = brokers
self.topics_to_brokers = {}

for topic, partitions in topics.items():
# Clear the list once before we add it. This removes stale entries
# and avoids duplicates
self.topic_partitions.pop(topic, None)

if not partitions:
log.info("Partition is unassigned, delay for 1s and retry")
time.sleep(1)
self._load_metadata_for_topics(topic)
break

for partition, meta in partitions.items():
if meta.leader == -1:
log.info("Partition is unassigned, delay for 1s and retry")
time.sleep(1)
self._load_metadata_for_topics(topic)
else:
topic_part = TopicAndPartition(topic, partition)
self.topics_to_brokers[topic_part] = brokers[meta.leader]
self.topic_partitions[topic].append(partition)

def _next_id(self):
"""
Generate a new correlation id
Expand All @@ -119,7 +78,7 @@ def _send_broker_unaware_request(self, requestId, request):
"trying next server: %s" % (request, conn, e))
continue

return None
raise KafkaUnavailableError("All servers failed to process request")

def _send_broker_aware_request(self, payloads, encoder_fn, decoder_fn):
"""
Expand Down Expand Up @@ -150,6 +109,8 @@ def _send_broker_aware_request(self, payloads, encoder_fn, decoder_fn):
for payload in payloads:
leader = self._get_leader_for_partition(payload.topic,
payload.partition)
if leader == -1:
raise PartitionUnavailableError("Leader is unassigned for %s-%s" % payload.topic, payload.partition)
payloads_by_broker[leader].append(payload)
original_keys.append((payload.topic, payload.partition))

Expand Down Expand Up @@ -185,21 +146,51 @@ def _send_broker_aware_request(self, payloads, encoder_fn, decoder_fn):

if failed:
failed_payloads += payloads
self.topics_to_brokers = {} # reset metadata
self.reset_all_metadata()
continue

for response in decoder_fn(response):
acc[(response.topic, response.partition)] = response

if failed_payloads:
raise FailedPayloadsException(failed_payloads)
raise FailedPayloadsError(failed_payloads)

# Order the accumulated responses by the original key order
return (acc[k] for k in original_keys) if acc else ()

def _raise_on_response_error(self, resp):
if resp.error == ErrorMapping.NO_ERROR:
return

if resp.error in (ErrorMapping.UNKNOWN_TOPIC_OR_PARTITON,
ErrorMapping.NOT_LEADER_FOR_PARTITION):
self.reset_topic_metadata(resp.topic)

raise BrokerResponseError(
"Request for %s failed with errorcode=%d" %
(TopicAndPartition(resp.topic, resp.partition), resp.error))

#################
# Public API #
#################
def reset_topic_metadata(self, *topics):
for topic in topics:
try:
partitions = self.topic_partitions[topic]
except KeyError:
continue

for partition in partitions:
self.topics_to_brokers.pop(TopicAndPartition(topic, partition), None)

del self.topic_partitions[topic]

def reset_all_metadata(self):
self.topics_to_brokers.clear()
self.topic_partitions.clear()

def has_metadata_for_topic(self, topic):
return topic in self.topic_partitions

def close(self):
for conn in self.conns.values():
Expand All @@ -219,6 +210,36 @@ def reinit(self):
for conn in self.conns.values():
conn.reinit()

def load_metadata_for_topics(self, *topics):
"""
Discover brokers and metadata for a set of topics. This function is called
lazily whenever metadata is unavailable.
"""
request_id = self._next_id()
request = KafkaProtocol.encode_metadata_request(self.client_id,
request_id, topics)

response = self._send_broker_unaware_request(request_id, request)

(brokers, topics) = KafkaProtocol.decode_metadata_response(response)

log.debug("Broker metadata: %s", brokers)
log.debug("Topic metadata: %s", topics)

self.brokers = brokers

for topic, partitions in topics.items():
self.reset_topic_metadata(topic)

if not partitions:
continue

self.topic_partitions[topic] = []
for partition, meta in partitions.items():
topic_part = TopicAndPartition(topic, partition)
self.topics_to_brokers[topic_part] = brokers[meta.leader]
self.topic_partitions[topic].append(partition)

def send_produce_request(self, payloads=[], acks=1, timeout=1000,
fail_on_error=True, callback=None):
"""
Expand Down Expand Up @@ -256,14 +277,9 @@ def send_produce_request(self, payloads=[], acks=1, timeout=1000,

out = []
for resp in resps:
# Check for errors
if fail_on_error is True and resp.error != ErrorMapping.NO_ERROR:
raise Exception(
"ProduceRequest for %s failed with errorcode=%d" %
(TopicAndPartition(resp.topic, resp.partition),
resp.error))

# Run the callback
if fail_on_error is True:
self._raise_on_response_error(resp)

if callback is not None:
out.append(callback(resp))
else:
Expand All @@ -289,14 +305,9 @@ def send_fetch_request(self, payloads=[], fail_on_error=True,

out = []
for resp in resps:
# Check for errors
if fail_on_error is True and resp.error != ErrorMapping.NO_ERROR:
raise Exception(
"FetchRequest for %s failed with errorcode=%d" %
(TopicAndPartition(resp.topic, resp.partition),
resp.error))

# Run the callback
if fail_on_error is True:
self._raise_on_response_error(resp)

if callback is not None:
out.append(callback(resp))
else:
Expand All @@ -312,9 +323,8 @@ def send_offset_request(self, payloads=[], fail_on_error=True,

out = []
for resp in resps:
if fail_on_error is True and resp.error != ErrorMapping.NO_ERROR:
raise Exception("OffsetRequest failed with errorcode=%s",
resp.error)
if fail_on_error is True:
self._raise_on_response_error(resp)
if callback is not None:
out.append(callback(resp))
else:
Expand All @@ -330,9 +340,8 @@ def send_offset_commit_request(self, group, payloads=[],

out = []
for resp in resps:
if fail_on_error is True and resp.error != ErrorMapping.NO_ERROR:
raise Exception("OffsetCommitRequest failed with "
"errorcode=%s", resp.error)
if fail_on_error is True:
self._raise_on_response_error(resp)

if callback is not None:
out.append(callback(resp))
Expand All @@ -350,9 +359,8 @@ def send_offset_fetch_request(self, group, payloads=[],

out = []
for resp in resps:
if fail_on_error is True and resp.error != ErrorMapping.NO_ERROR:
raise Exception("OffsetCommitRequest failed with errorcode=%s",
resp.error)
if fail_on_error is True:
self._raise_on_response_error(resp)
if callback is not None:
out.append(callback(resp))
else:
Expand Down
35 changes: 29 additions & 6 deletions kafka/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,23 +69,46 @@ class ErrorMapping(object):
# Exceptions #
#################

class FailedPayloadsException(Exception):

class KafkaError(RuntimeError):
pass


class KafkaRequestError(KafkaError):
pass


class KafkaUnavailableError(KafkaError):
pass


class BrokerResponseError(KafkaError):
pass

class ConnectionError(Exception):

class PartitionUnavailableError(KafkaError):
pass


class FailedPayloadsError(KafkaError):
pass

class BufferUnderflowError(Exception):

class ConnectionError(KafkaError):
pass


class BufferUnderflowError(KafkaError):
pass


class ChecksumError(Exception):
class ChecksumError(KafkaError):
pass


class ConsumerFetchSizeTooSmall(Exception):
class ConsumerFetchSizeTooSmall(KafkaError):
pass


class ConsumerNoMoreData(Exception):
class ConsumerNoMoreData(KafkaError):
pass
2 changes: 1 addition & 1 deletion kafka/consumer.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def __init__(self, client, group, topic, partitions=None, auto_commit=True,
self.client = client
self.topic = topic
self.group = group
self.client._load_metadata_for_topics(topic)
self.client.load_metadata_for_topics(topic)
self.offsets = {}

if not partitions:
Expand Down
15 changes: 7 additions & 8 deletions kafka/producer.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
from __future__ import absolute_import

import logging
import time

from Queue import Empty
from collections import defaultdict
from itertools import cycle
from multiprocessing import Queue, Process
from Queue import Empty
import logging
import sys
import time

from kafka.common import ProduceRequest
from kafka.common import FailedPayloadsException
from kafka.protocol import create_message
from kafka.partitioner import HashedPartitioner
from kafka.protocol import create_message

log = logging.getLogger("kafka")

Expand Down Expand Up @@ -188,7 +187,7 @@ def __init__(self, client, topic, async=False,
batch_send_every_n=BATCH_SEND_MSG_COUNT,
batch_send_every_t=BATCH_SEND_DEFAULT_INTERVAL):
self.topic = topic
client._load_metadata_for_topics(topic)
client.load_metadata_for_topics(topic)
self.next_partition = cycle(client.topic_partitions[topic])

super(SimpleProducer, self).__init__(client, async, req_acks,
Expand Down Expand Up @@ -225,7 +224,7 @@ def __init__(self, client, topic, partitioner=None, async=False,
batch_send_every_n=BATCH_SEND_MSG_COUNT,
batch_send_every_t=BATCH_SEND_DEFAULT_INTERVAL):
self.topic = topic
client._load_metadata_for_topics(topic)
client.load_metadata_for_topics(topic)

if not partitioner:
partitioner = HashedPartitioner
Expand Down
Loading