Skip to content

gh-122163: Add notes for JSON serialization errors #122165

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Doc/whatsnew/3.14.rst
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,13 @@ Added support for converting any objects that have the
:meth:`!as_integer_ratio` method to a :class:`~fractions.Fraction`.
(Contributed by Serhiy Storchaka in :gh:`82017`.)

json
----

Add notes for JSON serialization errors that allow to identify the source
of the error.
(Contributed by Serhiy Storchaka in :gh:`122163`.)

os
--

Expand Down
3 changes: 2 additions & 1 deletion Include/internal/pycore_pyerrors.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,8 @@ extern PyObject* _Py_Offer_Suggestions(PyObject* exception);
PyAPI_FUNC(Py_ssize_t) _Py_UTF8_Edit_Cost(PyObject *str_a, PyObject *str_b,
Py_ssize_t max_cost);

void _PyErr_FormatNote(const char *format, ...);
// Export for '_json' shared extension
PyAPI_FUNC(void) _PyErr_FormatNote(const char *format, ...);

/* Context manipulation (PEP 3134) */

Expand Down
119 changes: 67 additions & 52 deletions Lib/json/encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,37 +293,40 @@ def _iterencode_list(lst, _current_indent_level):
else:
newline_indent = None
separator = _item_separator
first = True
for value in lst:
if first:
first = False
else:
for i, value in enumerate(lst):
if i:
buf = separator
if isinstance(value, str):
yield buf + _encoder(value)
elif value is None:
yield buf + 'null'
elif value is True:
yield buf + 'true'
elif value is False:
yield buf + 'false'
elif isinstance(value, int):
# Subclasses of int/float may override __repr__, but we still
# want to encode them as integers/floats in JSON. One example
# within the standard library is IntEnum.
yield buf + _intstr(value)
elif isinstance(value, float):
# see comment above for int
yield buf + _floatstr(value)
else:
yield buf
if isinstance(value, (list, tuple)):
chunks = _iterencode_list(value, _current_indent_level)
elif isinstance(value, dict):
chunks = _iterencode_dict(value, _current_indent_level)
try:
if isinstance(value, str):
yield buf + _encoder(value)
elif value is None:
yield buf + 'null'
elif value is True:
yield buf + 'true'
elif value is False:
yield buf + 'false'
elif isinstance(value, int):
# Subclasses of int/float may override __repr__, but we still
# want to encode them as integers/floats in JSON. One example
# within the standard library is IntEnum.
yield buf + _intstr(value)
elif isinstance(value, float):
# see comment above for int
yield buf + _floatstr(value)
else:
chunks = _iterencode(value, _current_indent_level)
yield from chunks
yield buf
if isinstance(value, (list, tuple)):
chunks = _iterencode_list(value, _current_indent_level)
elif isinstance(value, dict):
chunks = _iterencode_dict(value, _current_indent_level)
else:
chunks = _iterencode(value, _current_indent_level)
yield from chunks
except GeneratorExit:
raise
except BaseException as exc:
exc.add_note(f'when serializing {type(lst).__name__} item {i}')
raise
if newline_indent is not None:
_current_indent_level -= 1
yield '\n' + _indent * _current_indent_level
Expand Down Expand Up @@ -382,28 +385,34 @@ def _iterencode_dict(dct, _current_indent_level):
yield item_separator
yield _encoder(key)
yield _key_separator
if isinstance(value, str):
yield _encoder(value)
elif value is None:
yield 'null'
elif value is True:
yield 'true'
elif value is False:
yield 'false'
elif isinstance(value, int):
# see comment for int/float in _make_iterencode
yield _intstr(value)
elif isinstance(value, float):
# see comment for int/float in _make_iterencode
yield _floatstr(value)
else:
if isinstance(value, (list, tuple)):
chunks = _iterencode_list(value, _current_indent_level)
elif isinstance(value, dict):
chunks = _iterencode_dict(value, _current_indent_level)
try:
if isinstance(value, str):
yield _encoder(value)
elif value is None:
yield 'null'
elif value is True:
yield 'true'
elif value is False:
yield 'false'
elif isinstance(value, int):
# see comment for int/float in _make_iterencode
yield _intstr(value)
elif isinstance(value, float):
# see comment for int/float in _make_iterencode
yield _floatstr(value)
else:
chunks = _iterencode(value, _current_indent_level)
yield from chunks
if isinstance(value, (list, tuple)):
chunks = _iterencode_list(value, _current_indent_level)
elif isinstance(value, dict):
chunks = _iterencode_dict(value, _current_indent_level)
else:
chunks = _iterencode(value, _current_indent_level)
yield from chunks
except GeneratorExit:
raise
except BaseException as exc:
exc.add_note(f'when serializing {type(dct).__name__} item {key!r}')
raise
if newline_indent is not None:
_current_indent_level -= 1
yield '\n' + _indent * _current_indent_level
Expand Down Expand Up @@ -436,8 +445,14 @@ def _iterencode(o, _current_indent_level):
if markerid in markers:
raise ValueError("Circular reference detected")
markers[markerid] = o
o = _default(o)
yield from _iterencode(o, _current_indent_level)
newobj = _default(o)
try:
yield from _iterencode(newobj, _current_indent_level)
except GeneratorExit:
raise
except BaseException as exc:
exc.add_note(f'when serializing {type(o).__name__} object')
raise
if markers is not None:
del markers[markerid]
return _iterencode
18 changes: 18 additions & 0 deletions Lib/test/test_json/test_default.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,24 @@ def test_default(self):
self.dumps(type, default=repr),
self.dumps(repr(type)))

def test_bad_default(self):
def default(obj):
if obj is NotImplemented:
raise ValueError
if obj is ...:
return NotImplemented
if obj is type:
return collections
return [...]

with self.assertRaises(ValueError) as cm:
self.dumps(type, default=default)
self.assertEqual(cm.exception.__notes__,
['when serializing ellipsis object',
'when serializing list item 0',
'when serializing module object',
'when serializing type object'])

def test_ordereddict(self):
od = collections.OrderedDict(a=1, b=2, c=3, d=4)
od.move_to_end('b')
Expand Down
21 changes: 20 additions & 1 deletion Lib/test/test_json/test_fail.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,27 @@ def test_non_string_keys_dict(self):
def test_not_serializable(self):
import sys
with self.assertRaisesRegex(TypeError,
'Object of type module is not JSON serializable'):
'Object of type module is not JSON serializable') as cm:
self.dumps(sys)
self.assertFalse(hasattr(cm.exception, '__notes__'))

with self.assertRaises(TypeError) as cm:
self.dumps([1, [2, 3, sys]])
self.assertEqual(cm.exception.__notes__,
['when serializing list item 2',
'when serializing list item 1'])

with self.assertRaises(TypeError) as cm:
self.dumps((1, (2, 3, sys)))
self.assertEqual(cm.exception.__notes__,
['when serializing tuple item 2',
'when serializing tuple item 1'])

with self.assertRaises(TypeError) as cm:
self.dumps({'a': {'b': sys}})
self.assertEqual(cm.exception.__notes__,
["when serializing dict item 'b'",
"when serializing dict item 'a'"])

def test_truncated_input(self):
test_cases = [
Expand Down
18 changes: 10 additions & 8 deletions Lib/test/test_json/test_recursion.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,17 @@ def test_listrecursion(self):
x.append(x)
try:
self.dumps(x)
except ValueError:
pass
except ValueError as exc:
self.assertEqual(exc.__notes__, ["when serializing list item 0"])
else:
self.fail("didn't raise ValueError on list recursion")
x = []
y = [x]
x.append(y)
try:
self.dumps(x)
except ValueError:
pass
except ValueError as exc:
self.assertEqual(exc.__notes__, ["when serializing list item 0"]*2)
else:
self.fail("didn't raise ValueError on alternating list recursion")
y = []
Expand All @@ -35,8 +35,8 @@ def test_dictrecursion(self):
x["test"] = x
try:
self.dumps(x)
except ValueError:
pass
except ValueError as exc:
self.assertEqual(exc.__notes__, ["when serializing dict item 'test'"])
else:
self.fail("didn't raise ValueError on dict recursion")
x = {}
Expand All @@ -60,8 +60,10 @@ def default(self, o):
enc.recurse = True
try:
enc.encode(JSONTestObject)
except ValueError:
pass
except ValueError as exc:
self.assertEqual(exc.__notes__,
["when serializing list item 0",
"when serializing type object"])
else:
self.fail("didn't raise ValueError on default recursion")

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Add notes for JSON serialization errors that allow to identify the source of
the error.
13 changes: 9 additions & 4 deletions Modules/_json.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "Python.h"
#include "pycore_ceval.h" // _Py_EnterRecursiveCall()
#include "pycore_runtime.h" // _PyRuntime
#include "pycore_pyerrors.h" // _PyErr_FormatNote

#include "pycore_global_strings.h" // _Py_ID()
#include <stdbool.h> // bool
Expand Down Expand Up @@ -1461,6 +1462,7 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,

Py_DECREF(newobj);
if (rv) {
_PyErr_FormatNote("when serializing %T object", obj);
Py_XDECREF(ident);
return -1;
}
Expand All @@ -1477,7 +1479,7 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,

static int
encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *first,
PyObject *key, PyObject *value,
PyObject *dct, PyObject *key, PyObject *value,
PyObject *newline_indent,
PyObject *item_separator)
{
Expand Down Expand Up @@ -1535,6 +1537,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir
return -1;
}
if (encoder_listencode_obj(s, writer, value, newline_indent) < 0) {
_PyErr_FormatNote("when serializing %T item %R", dct, key);
return -1;
}
return 0;
Expand Down Expand Up @@ -1606,7 +1609,7 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,

key = PyTuple_GET_ITEM(item, 0);
value = PyTuple_GET_ITEM(item, 1);
if (encoder_encode_key_value(s, writer, &first, key, value,
if (encoder_encode_key_value(s, writer, &first, dct, key, value,
new_newline_indent,
current_item_separator) < 0)
goto bail;
Expand All @@ -1616,7 +1619,7 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
} else {
Py_ssize_t pos = 0;
while (PyDict_Next(dct, &pos, &key, &value)) {
if (encoder_encode_key_value(s, writer, &first, key, value,
if (encoder_encode_key_value(s, writer, &first, dct, key, value,
new_newline_indent,
current_item_separator) < 0)
goto bail;
Expand Down Expand Up @@ -1710,8 +1713,10 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
if (_PyUnicodeWriter_WriteStr(writer, separator) < 0)
goto bail;
}
if (encoder_listencode_obj(s, writer, obj, new_newline_indent))
if (encoder_listencode_obj(s, writer, obj, new_newline_indent)) {
_PyErr_FormatNote("when serializing %T item %zd", seq, i);
goto bail;
}
}
if (ident != NULL) {
if (PyDict_DelItem(s->markers, ident))
Expand Down
Loading