Skip to content

Speed up JSON parsing with readinto #3728

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 24, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 27 additions & 9 deletions extmod/modujson.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ typedef struct _ujson_stream_t {
int errcode;
mp_obj_t python_readinto[2 + 1];
mp_obj_array_t bytearray_obj;
size_t start;
size_t end;
byte cur;
} ujson_stream_t;

Expand All @@ -77,28 +79,44 @@ STATIC byte ujson_stream_next(ujson_stream_t *s) {
return s->cur;
}

// We read from an object's `readinto` method in chunks larger than the json
// parser needs to reduce the number of function calls done.

#define CIRCUITPY_JSON_READ_CHUNK_SIZE 64

STATIC mp_uint_t ujson_python_readinto(mp_obj_t obj, void *buf, mp_uint_t size, int *errcode) {
(void) size; // Ignore size because we know it's always 1.
ujson_stream_t* s = obj;
s->bytearray_obj.items = buf;
s->bytearray_obj.len = size;
*errcode = 0;
mp_obj_t ret = mp_call_method_n_kw(1, 0, s->python_readinto);
if (ret == mp_const_none) {
*errcode = MP_EAGAIN;
return MP_STREAM_ERROR;

if (s->start == s->end) {
*errcode = 0;
mp_obj_t ret = mp_call_method_n_kw(1, 0, s->python_readinto);
if (ret == mp_const_none) {
*errcode = MP_EAGAIN;
return MP_STREAM_ERROR;
}
s->start = 0;
s->end = mp_obj_get_int(ret);
}
return mp_obj_get_int(ret);

*((uint8_t *)buf) = ((uint8_t*) s->bytearray_obj.items)[s->start];
s->start++;
return 1;
}

STATIC mp_obj_t _mod_ujson_load(mp_obj_t stream_obj, bool return_first_json) {
const mp_stream_p_t *stream_p = mp_proto_get(MP_QSTR_protocol_stream, stream_obj);
ujson_stream_t s;
uint8_t character_buffer[CIRCUITPY_JSON_READ_CHUNK_SIZE];
if (stream_p == NULL) {
s.start = 0;
s.end = 0;
mp_load_method(stream_obj, MP_QSTR_readinto, s.python_readinto);
s.bytearray_obj.base.type = &mp_type_bytearray;
s.bytearray_obj.typecode = BYTEARRAY_TYPECODE;
s.bytearray_obj.len = CIRCUITPY_JSON_READ_CHUNK_SIZE;
s.bytearray_obj.free = 0;
// len and items are set at read time
s.bytearray_obj.items = character_buffer;
s.python_readinto[2] = MP_OBJ_FROM_PTR(&s.bytearray_obj);
s.stream_obj = &s;
s.read = ujson_python_readinto;
Expand Down