Skip to content

Added logger with basic debugging info #86

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 30, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ Changelog
- added `py.typed` file to signal type checkers that this package is typed
- added method to update status message for a run
- added option to set up webhooks for actor builds
- added logger with basic debugging info

### Internal changes

Expand Down
19 changes: 19 additions & 0 deletions docs/docs.md
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,25 @@ async def main():
asyncio.run(main())
```

### Logging

The library logs some useful debug information to the `apify_client` logger
when sending requests to the Apify API.
To have them printed out to the standard output, you need to add a handler to the logger:

```python
import logging
apify_client_logger = logging.getLogger('apify_client')
apify_client_logger.setLevel(logging.DEBUG)
apify_client_logger.addHandler(logging.StreamHandler())
```

The log records have useful properties added with the `extra` argument,
like `attempt`, `status_code`, `url`, `client_method` and `resource_id`.
To print those out, you'll need to use a custom log formatter.
To learn more about log formatters and how to use them,
please refer to the official Python [documentation on logging](https://docs.python.org/3/howto/logging.html#formatters).

## API Reference

All public classes, methods and their parameters can be inspected in this API reference.
Expand Down
19 changes: 19 additions & 0 deletions docs/res/intro.md
Original file line number Diff line number Diff line change
Expand Up @@ -185,3 +185,22 @@ async def main():

asyncio.run(main())
```

### Logging

The library logs some useful debug information to the `apify_client` logger
when sending requests to the Apify API.
To have them printed out to the standard output, you need to add a handler to the logger:

```python
import logging
apify_client_logger = logging.getLogger('apify_client')
apify_client_logger.setLevel(logging.DEBUG)
apify_client_logger.addHandler(logging.StreamHandler())
```

The log records have useful properties added with the `extra` argument,
like `attempt`, `status_code`, `url`, `client_method` and `resource_id`.
To print those out, you'll need to use a custom log formatter.
To learn more about log formatters and how to use them,
please refer to the official Python [documentation on logging](https://docs.python.org/3/howto/logging.html#formatters).
Comment on lines +202 to +206
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe it would be beneficial here to show a simple example with custom log formatter? So the users don't need to go dig in the official docs.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was trying to write something, but then I realized there's so many ways how users can have their formatters configured, and I kinda gave up. Usually, if you need logging in your app, you're already using some log formatters, and you won't be learning about them from this library.

Anyway, the logging here is most useful to us while developing the client, not so much to the users (especially the extra log properties), maybe I'll add a mention of the _DebugLogFormatter somewhere to README or something like that.

16 changes: 16 additions & 0 deletions src/apify_client/_http_client.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import gzip
import json as jsonlib
import logging
import os
import sys
from http import HTTPStatus
Expand All @@ -8,13 +9,16 @@
import httpx

from ._errors import ApifyApiError, InvalidResponseBodyError, _is_retryable_error
from ._logging import logger_name
from ._types import JSONSerializable
from ._utils import _is_content_type_json, _is_content_type_text, _is_content_type_xml, _retry_with_exp_backoff, _retry_with_exp_backoff_async
from ._version import __version__

DEFAULT_BACKOFF_EXPONENTIAL_FACTOR = 2
DEFAULT_BACKOFF_RANDOM_FACTOR = 1

logger = logging.getLogger(logger_name)


class _BaseHTTPClient:
def __init__(
Expand Down Expand Up @@ -129,6 +133,7 @@ def call(
httpx_client = self.httpx_client

def _make_request(stop_retrying: Callable, attempt: int) -> httpx.Response:
logger.debug(f'Sending request to {url}', extra={'attempt': attempt})
try:
request = httpx_client.build_request(
method=method,
Expand All @@ -144,6 +149,7 @@ def _make_request(stop_retrying: Callable, attempt: int) -> httpx.Response:

# If response status is < 300, the request was successful, and we can return the result
if response.status_code < 300:
logger.debug(f'Request to {url} successful', extra={'attempt': attempt, 'status_code': response.status_code})
if not stream:
if parse_response:
_maybe_parsed_body = self._maybe_parse_response(response)
Expand All @@ -154,13 +160,17 @@ def _make_request(stop_retrying: Callable, attempt: int) -> httpx.Response:
return response

except Exception as e:
logger.debug(f'Request to {url} threw exception', exc_info=e, extra={'attempt': attempt})
if not _is_retryable_error(e):
logger.debug('Exception is not retryable', exc_info=e, extra={'attempt': attempt})
stop_retrying()
raise e

# We want to retry only requests which are server errors (status >= 500) and could resolve on their own,
# and also retry rate limited requests that throw 429 Too Many Requests errors
logger.debug(f'Request to {url} unsuccessful', extra={'attempt': attempt, 'status_code': response.status_code})
if response.status_code < 500 and response.status_code != HTTPStatus.TOO_MANY_REQUESTS:
logger.debug('Status code is not retryable', extra={'attempt': attempt, 'status_code': response.status_code})
stop_retrying()
raise ApifyApiError(response, attempt)

Expand Down Expand Up @@ -194,6 +204,7 @@ async def call(
httpx_async_client = self.httpx_async_client

async def _make_request(stop_retrying: Callable, attempt: int) -> httpx.Response:
logger.debug('Sending request')
try:
request = httpx_async_client.build_request(
method=method,
Expand All @@ -209,6 +220,7 @@ async def _make_request(stop_retrying: Callable, attempt: int) -> httpx.Response

# If response status is < 300, the request was successful, and we can return the result
if response.status_code < 300:
logger.debug('Request successful', extra={'status_code': response.status_code})
if not stream:
if parse_response:
_maybe_parsed_body = self._maybe_parse_response(response)
Expand All @@ -219,13 +231,17 @@ async def _make_request(stop_retrying: Callable, attempt: int) -> httpx.Response
return response

except Exception as e:
logger.debug('Request threw exception', exc_info=e)
if not _is_retryable_error(e):
logger.debug('Exception is not retryable', exc_info=e)
stop_retrying()
raise e

# We want to retry only requests which are server errors (status >= 500) and could resolve on their own,
# and also retry rate limited requests that throw 429 Too Many Requests errors
logger.debug('Request unsuccessful', extra={'status_code': response.status_code})
if response.status_code < 500 and response.status_code != HTTPStatus.TOO_MANY_REQUESTS:
logger.debug('Status code is not retryable', extra={'status_code': response.status_code})
stop_retrying()
raise ApifyApiError(response, attempt)

Expand Down
94 changes: 94 additions & 0 deletions src/apify_client/_logging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import functools
import inspect
import json
import logging
from contextvars import ContextVar
from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Tuple, Type, cast

# Conditional import only executed when type checking, otherwise we'd get circular dependency issues
if TYPE_CHECKING:
from .clients.base.base_client import _BaseBaseClient

# Name of the logger used throughout the library
logger_name = __name__.split('.')[0]

# Logger used throughout the library
logger = logging.getLogger(logger_name)


# Context variables containing the current resource client running in that context
# Used during logging to inject the resource client context to the log
ctx_client_method = ContextVar[Optional[str]]('client_method', default=None)
ctx_resource_id = ContextVar[Optional[str]]('resource_id', default=None)
ctx_url = ContextVar[Optional[str]]('url', default=None)


# Metaclass for resource clients which wraps all their public methods
# With injection of their details to the log context vars
class _WithLogDetailsClient(type):
def __new__(cls: Type[type], name: str, bases: Tuple, attrs: Dict) -> '_WithLogDetailsClient':
for attr_name, attr_value in attrs.items():
if not attr_name.startswith('_'):
if inspect.isfunction(attr_value):
attrs[attr_name] = _injects_client_details_to_log_context(attr_value)

return cast(_WithLogDetailsClient, type.__new__(cls, name, bases, attrs))


# Wraps an unbound method so that its call will inject the details
# of the resource client (which is the `self` argument of the method)
# to the log context vars
def _injects_client_details_to_log_context(fun: Callable) -> Callable:
if inspect.iscoroutinefunction(fun) or inspect.isasyncgenfunction(fun):
@functools.wraps(fun)
async def async_wrapper(resource_client: '_BaseBaseClient', *args: Any, **kwargs: Any) -> Any:
ctx_client_method.set(fun.__qualname__)
ctx_resource_id.set(resource_client.resource_id)
ctx_url.set(resource_client.url)

return await fun(resource_client, *args, **kwargs)
return async_wrapper
else:
@functools.wraps(fun)
def wrapper(resource_client: '_BaseBaseClient', *args: Any, **kwargs: Any) -> Any:
ctx_client_method.set(fun.__qualname__)
ctx_resource_id.set(resource_client.resource_id)
ctx_url.set(resource_client.url)

return fun(resource_client, *args, **kwargs)
return wrapper


# A filter which lets every log record through,
# but adds the current logging context to the record
class _ContextInjectingFilter(logging.Filter):
def filter(self, record: logging.LogRecord) -> bool:
record.client_method = ctx_client_method.get()
record.resource_id = ctx_resource_id.get()
record.url = ctx_url.get()
return True


logger.addFilter(_ContextInjectingFilter())


# Log formatter useful for debugging of the client
# Will print out all the extra fields added to the log record
class _DebugLogFormatter(logging.Formatter):
empty_record = logging.LogRecord('dummy', 0, 'dummy', 0, 'dummy', None, None)

def _get_extra_fields(self, record: logging.LogRecord) -> Dict[str, Any]:
extra_fields: Dict[str, Any] = {}
for key, value in record.__dict__.items():
if key not in self.empty_record.__dict__:
extra_fields[key] = value

return extra_fields

def format(self, record: logging.LogRecord) -> str:
extra = self._get_extra_fields(record)

log_string = super().format(record)
if extra:
log_string = f'{log_string} ({json.dumps(extra)})'
return log_string
4 changes: 3 additions & 1 deletion src/apify_client/clients/base/base_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,16 @@
from typing import TYPE_CHECKING, Any, Dict, Optional, Union

from ..._http_client import _HTTPClient, _HTTPClientAsync
from ..._logging import _WithLogDetailsClient
from ..._utils import _make_async_docs, _to_safe_id

# Conditional import only executed when type checking, otherwise we'd get circular dependency issues
if TYPE_CHECKING:
from ...client import ApifyClient, ApifyClientAsync


class _BaseBaseClient:
class _BaseBaseClient(metaclass=_WithLogDetailsClient):
resource_id: Optional[str]
url: str
params: Dict
http_client: Union[_HTTPClient, _HTTPClientAsync]
Expand Down