Skip to content

Commit 7d30b6c

Browse files
authored
Added logger with basic debugging info (#86)
Logging was missing for a long time from the client, and it made it harder to debug the client. This adds some basic logging to the HTTP calls, using the standard Python logger, enriched by contextual information from the client methods which are doing the HTTP calls by some funky metaclass programming. There is a `_DebugLogFormatter` class which is great for testing, used like this: ```python import logging from apify_client._logging import _DebugLogFormatter client_logger = logging.getLogger('apify_client') client_logger.setLevel(logging.DEBUG) handler = logging.StreamHandler() handler.setFormatter(_DebugLogFormatter()) client_logger.addHandler(handler) ```
1 parent b0fccb6 commit 7d30b6c

File tree

6 files changed

+152
-1
lines changed

6 files changed

+152
-1
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ Changelog
1717
- added `py.typed` file to signal type checkers that this package is typed
1818
- added method to update status message for a run
1919
- added option to set up webhooks for actor builds
20+
- added logger with basic debugging info
2021

2122
### Internal changes
2223

docs/docs.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,25 @@ async def main():
186186
asyncio.run(main())
187187
```
188188

189+
### Logging
190+
191+
The library logs some useful debug information to the `apify_client` logger
192+
when sending requests to the Apify API.
193+
To have them printed out to the standard output, you need to add a handler to the logger:
194+
195+
```python
196+
import logging
197+
apify_client_logger = logging.getLogger('apify_client')
198+
apify_client_logger.setLevel(logging.DEBUG)
199+
apify_client_logger.addHandler(logging.StreamHandler())
200+
```
201+
202+
The log records have useful properties added with the `extra` argument,
203+
like `attempt`, `status_code`, `url`, `client_method` and `resource_id`.
204+
To print those out, you'll need to use a custom log formatter.
205+
To learn more about log formatters and how to use them,
206+
please refer to the official Python [documentation on logging](https://docs.python.org/3/howto/logging.html#formatters).
207+
189208
## API Reference
190209

191210
All public classes, methods and their parameters can be inspected in this API reference.

docs/res/intro.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,3 +185,22 @@ async def main():
185185

186186
asyncio.run(main())
187187
```
188+
189+
### Logging
190+
191+
The library logs some useful debug information to the `apify_client` logger
192+
when sending requests to the Apify API.
193+
To have them printed out to the standard output, you need to add a handler to the logger:
194+
195+
```python
196+
import logging
197+
apify_client_logger = logging.getLogger('apify_client')
198+
apify_client_logger.setLevel(logging.DEBUG)
199+
apify_client_logger.addHandler(logging.StreamHandler())
200+
```
201+
202+
The log records have useful properties added with the `extra` argument,
203+
like `attempt`, `status_code`, `url`, `client_method` and `resource_id`.
204+
To print those out, you'll need to use a custom log formatter.
205+
To learn more about log formatters and how to use them,
206+
please refer to the official Python [documentation on logging](https://docs.python.org/3/howto/logging.html#formatters).

src/apify_client/_http_client.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import gzip
22
import json as jsonlib
3+
import logging
34
import os
45
import sys
56
from http import HTTPStatus
@@ -8,13 +9,16 @@
89
import httpx
910

1011
from ._errors import ApifyApiError, InvalidResponseBodyError, _is_retryable_error
12+
from ._logging import logger_name
1113
from ._types import JSONSerializable
1214
from ._utils import _is_content_type_json, _is_content_type_text, _is_content_type_xml, _retry_with_exp_backoff, _retry_with_exp_backoff_async
1315
from ._version import __version__
1416

1517
DEFAULT_BACKOFF_EXPONENTIAL_FACTOR = 2
1618
DEFAULT_BACKOFF_RANDOM_FACTOR = 1
1719

20+
logger = logging.getLogger(logger_name)
21+
1822

1923
class _BaseHTTPClient:
2024
def __init__(
@@ -129,6 +133,7 @@ def call(
129133
httpx_client = self.httpx_client
130134

131135
def _make_request(stop_retrying: Callable, attempt: int) -> httpx.Response:
136+
logger.debug(f'Sending request to {url}', extra={'attempt': attempt})
132137
try:
133138
request = httpx_client.build_request(
134139
method=method,
@@ -144,6 +149,7 @@ def _make_request(stop_retrying: Callable, attempt: int) -> httpx.Response:
144149

145150
# If response status is < 300, the request was successful, and we can return the result
146151
if response.status_code < 300:
152+
logger.debug(f'Request to {url} successful', extra={'attempt': attempt, 'status_code': response.status_code})
147153
if not stream:
148154
if parse_response:
149155
_maybe_parsed_body = self._maybe_parse_response(response)
@@ -154,13 +160,17 @@ def _make_request(stop_retrying: Callable, attempt: int) -> httpx.Response:
154160
return response
155161

156162
except Exception as e:
163+
logger.debug(f'Request to {url} threw exception', exc_info=e, extra={'attempt': attempt})
157164
if not _is_retryable_error(e):
165+
logger.debug('Exception is not retryable', exc_info=e, extra={'attempt': attempt})
158166
stop_retrying()
159167
raise e
160168

161169
# We want to retry only requests which are server errors (status >= 500) and could resolve on their own,
162170
# and also retry rate limited requests that throw 429 Too Many Requests errors
171+
logger.debug(f'Request to {url} unsuccessful', extra={'attempt': attempt, 'status_code': response.status_code})
163172
if response.status_code < 500 and response.status_code != HTTPStatus.TOO_MANY_REQUESTS:
173+
logger.debug('Status code is not retryable', extra={'attempt': attempt, 'status_code': response.status_code})
164174
stop_retrying()
165175
raise ApifyApiError(response, attempt)
166176

@@ -194,6 +204,7 @@ async def call(
194204
httpx_async_client = self.httpx_async_client
195205

196206
async def _make_request(stop_retrying: Callable, attempt: int) -> httpx.Response:
207+
logger.debug('Sending request')
197208
try:
198209
request = httpx_async_client.build_request(
199210
method=method,
@@ -209,6 +220,7 @@ async def _make_request(stop_retrying: Callable, attempt: int) -> httpx.Response
209220

210221
# If response status is < 300, the request was successful, and we can return the result
211222
if response.status_code < 300:
223+
logger.debug('Request successful', extra={'status_code': response.status_code})
212224
if not stream:
213225
if parse_response:
214226
_maybe_parsed_body = self._maybe_parse_response(response)
@@ -219,13 +231,17 @@ async def _make_request(stop_retrying: Callable, attempt: int) -> httpx.Response
219231
return response
220232

221233
except Exception as e:
234+
logger.debug('Request threw exception', exc_info=e)
222235
if not _is_retryable_error(e):
236+
logger.debug('Exception is not retryable', exc_info=e)
223237
stop_retrying()
224238
raise e
225239

226240
# We want to retry only requests which are server errors (status >= 500) and could resolve on their own,
227241
# and also retry rate limited requests that throw 429 Too Many Requests errors
242+
logger.debug('Request unsuccessful', extra={'status_code': response.status_code})
228243
if response.status_code < 500 and response.status_code != HTTPStatus.TOO_MANY_REQUESTS:
244+
logger.debug('Status code is not retryable', extra={'status_code': response.status_code})
229245
stop_retrying()
230246
raise ApifyApiError(response, attempt)
231247

src/apify_client/_logging.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
import functools
2+
import inspect
3+
import json
4+
import logging
5+
from contextvars import ContextVar
6+
from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Tuple, Type, cast
7+
8+
# Conditional import only executed when type checking, otherwise we'd get circular dependency issues
9+
if TYPE_CHECKING:
10+
from .clients.base.base_client import _BaseBaseClient
11+
12+
# Name of the logger used throughout the library
13+
logger_name = __name__.split('.')[0]
14+
15+
# Logger used throughout the library
16+
logger = logging.getLogger(logger_name)
17+
18+
19+
# Context variables containing the current resource client running in that context
20+
# Used during logging to inject the resource client context to the log
21+
ctx_client_method = ContextVar[Optional[str]]('client_method', default=None)
22+
ctx_resource_id = ContextVar[Optional[str]]('resource_id', default=None)
23+
ctx_url = ContextVar[Optional[str]]('url', default=None)
24+
25+
26+
# Metaclass for resource clients which wraps all their public methods
27+
# With injection of their details to the log context vars
28+
class _WithLogDetailsClient(type):
29+
def __new__(cls: Type[type], name: str, bases: Tuple, attrs: Dict) -> '_WithLogDetailsClient':
30+
for attr_name, attr_value in attrs.items():
31+
if not attr_name.startswith('_'):
32+
if inspect.isfunction(attr_value):
33+
attrs[attr_name] = _injects_client_details_to_log_context(attr_value)
34+
35+
return cast(_WithLogDetailsClient, type.__new__(cls, name, bases, attrs))
36+
37+
38+
# Wraps an unbound method so that its call will inject the details
39+
# of the resource client (which is the `self` argument of the method)
40+
# to the log context vars
41+
def _injects_client_details_to_log_context(fun: Callable) -> Callable:
42+
if inspect.iscoroutinefunction(fun) or inspect.isasyncgenfunction(fun):
43+
@functools.wraps(fun)
44+
async def async_wrapper(resource_client: '_BaseBaseClient', *args: Any, **kwargs: Any) -> Any:
45+
ctx_client_method.set(fun.__qualname__)
46+
ctx_resource_id.set(resource_client.resource_id)
47+
ctx_url.set(resource_client.url)
48+
49+
return await fun(resource_client, *args, **kwargs)
50+
return async_wrapper
51+
else:
52+
@functools.wraps(fun)
53+
def wrapper(resource_client: '_BaseBaseClient', *args: Any, **kwargs: Any) -> Any:
54+
ctx_client_method.set(fun.__qualname__)
55+
ctx_resource_id.set(resource_client.resource_id)
56+
ctx_url.set(resource_client.url)
57+
58+
return fun(resource_client, *args, **kwargs)
59+
return wrapper
60+
61+
62+
# A filter which lets every log record through,
63+
# but adds the current logging context to the record
64+
class _ContextInjectingFilter(logging.Filter):
65+
def filter(self, record: logging.LogRecord) -> bool:
66+
record.client_method = ctx_client_method.get()
67+
record.resource_id = ctx_resource_id.get()
68+
record.url = ctx_url.get()
69+
return True
70+
71+
72+
logger.addFilter(_ContextInjectingFilter())
73+
74+
75+
# Log formatter useful for debugging of the client
76+
# Will print out all the extra fields added to the log record
77+
class _DebugLogFormatter(logging.Formatter):
78+
empty_record = logging.LogRecord('dummy', 0, 'dummy', 0, 'dummy', None, None)
79+
80+
def _get_extra_fields(self, record: logging.LogRecord) -> Dict[str, Any]:
81+
extra_fields: Dict[str, Any] = {}
82+
for key, value in record.__dict__.items():
83+
if key not in self.empty_record.__dict__:
84+
extra_fields[key] = value
85+
86+
return extra_fields
87+
88+
def format(self, record: logging.LogRecord) -> str:
89+
extra = self._get_extra_fields(record)
90+
91+
log_string = super().format(record)
92+
if extra:
93+
log_string = f'{log_string} ({json.dumps(extra)})'
94+
return log_string

src/apify_client/clients/base/base_client.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,16 @@
33
from typing import TYPE_CHECKING, Any, Dict, Optional, Union
44

55
from ..._http_client import _HTTPClient, _HTTPClientAsync
6+
from ..._logging import _WithLogDetailsClient
67
from ..._utils import _make_async_docs, _to_safe_id
78

89
# Conditional import only executed when type checking, otherwise we'd get circular dependency issues
910
if TYPE_CHECKING:
1011
from ...client import ApifyClient, ApifyClientAsync
1112

1213

13-
class _BaseBaseClient:
14+
class _BaseBaseClient(metaclass=_WithLogDetailsClient):
15+
resource_id: Optional[str]
1416
url: str
1517
params: Dict
1618
http_client: Union[_HTTPClient, _HTTPClientAsync]

0 commit comments

Comments
 (0)