Skip to content

Commit 62df7ab

Browse files
committed
Improve struct compatibility with CPython
1 parent 28cfd8a commit 62df7ab

File tree

5 files changed

+126
-75
lines changed

5 files changed

+126
-75
lines changed

py/binary.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ size_t mp_binary_get_size(char struct_type, char val_type, mp_uint_t *palign) {
4949
switch (struct_type) {
5050
case '<': case '>':
5151
switch (val_type) {
52-
case 'b': case 'B':
52+
case 'b': case 'B': case 'x':
5353
size = 1; break;
5454
case 'h': case 'H':
5555
size = 2; break;
@@ -79,7 +79,7 @@ size_t mp_binary_get_size(char struct_type, char val_type, mp_uint_t *palign) {
7979
// particular (or any) ABI.
8080
switch (val_type) {
8181
case BYTEARRAY_TYPECODE:
82-
case 'b': case 'B':
82+
case 'b': case 'B': case 'x':
8383
align = size = 1; break;
8484
case 'h': case 'H':
8585
align = alignof(short);
@@ -126,6 +126,7 @@ mp_obj_t mp_binary_get_val_array(char typecode, void *p, mp_uint_t index) {
126126
break;
127127
case BYTEARRAY_TYPECODE:
128128
case 'B':
129+
case 'x': // value will be discarded
129130
val = ((unsigned char*)p)[index];
130131
break;
131132
case 'h':
@@ -364,6 +365,8 @@ void mp_binary_set_val_array_from_int(char typecode, void *p, mp_uint_t index, m
364365
case 'B':
365366
((unsigned char*)p)[index] = val;
366367
break;
368+
case 'x':
369+
((unsigned char*)p)[index] = 0;
367370
case 'h':
368371
((short*)p)[index] = val;
369372
break;

py/modstruct.c

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,10 @@ STATIC size_t calc_size_items(const char *fmt, size_t *total_sz) {
9797
total_cnt += 1;
9898
size += cnt;
9999
} else {
100-
total_cnt += cnt;
100+
// Pad bytes are skipped and don't get included in the item count.
101+
if (*fmt != 'x') {
102+
total_cnt += cnt;
103+
}
101104
mp_uint_t align;
102105
size_t sz = mp_binary_get_size(fmt_type, *fmt, &align);
103106
while (cnt--) {
@@ -166,7 +169,10 @@ STATIC mp_obj_t struct_unpack_from(size_t n_args, const mp_obj_t *args) {
166169
} else {
167170
while (cnt--) {
168171
item = mp_binary_get_val(fmt_type, *fmt, &p);
169-
res->items[i++] = item;
172+
// Pad bytes ('x') are just skipped.
173+
if (*fmt != 'x') {
174+
res->items[i++] = item;
175+
}
170176
}
171177
}
172178
fmt++;
@@ -204,7 +210,11 @@ STATIC void struct_pack_into_internal(mp_obj_t fmt_in, byte *p, size_t n_args, c
204210
} else {
205211
// If we run out of args then we just finish; CPython would raise struct.error
206212
while (cnt-- && i < n_args) {
207-
mp_binary_set_val(fmt_type, *fmt, args[i++], &p);
213+
mp_binary_set_val(fmt_type, *fmt, args[i], &p);
214+
// Pad bytes don't have a corresponding argument.
215+
if (*fmt != 'x') {
216+
i++;
217+
}
208218
}
209219
}
210220
fmt++;

shared-bindings/struct/__init__.c

Lines changed: 40 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151
//|
5252
//| Supported size/byte order prefixes: *@*, *<*, *>*, *!*.
5353
//|
54-
//| Supported format codes: *b*, *B*, *h*, *H*, *i*, *I*, *l*, *L*, *q*, *Q*,
54+
//| Supported format codes: *b*, *B*, *x*, *h*, *H*, *i*, *I*, *l*, *L*, *q*, *Q*,
5555
//| *s*, *P*, *f*, *d* (the latter 2 depending on the floating-point support).
5656
//|
5757

@@ -74,7 +74,6 @@ MP_DEFINE_CONST_FUN_OBJ_1(struct_calcsize_obj, struct_calcsize);
7474
//|
7575

7676
STATIC mp_obj_t struct_pack(size_t n_args, const mp_obj_t *args) {
77-
// TODO: "The arguments must match the values required by the format exactly."
7877
mp_int_t size = MP_OBJ_SMALL_INT_VALUE(struct_calcsize(args[0]));
7978
vstr_t vstr;
8079
vstr_init_len(&vstr, size);
@@ -115,49 +114,67 @@ MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(struct_pack_into_obj, 3, MP_OBJ_FUN_ARGS_MAX
115114
//| .. function:: unpack(fmt, data)
116115
//|
117116
//| Unpack from the data according to the format string fmt. The return value
118-
//| is a tuple of the unpacked values.
117+
//| is a tuple of the unpacked values. The buffer size must match the size
118+
//| required by the format.
119119
//|
120120

121-
//| .. function:: unpack_from(fmt, data, offset)
121+
STATIC mp_obj_t struct_unpack(size_t n_args, const mp_obj_t *args) {
122+
mp_buffer_info_t bufinfo;
123+
mp_get_buffer_raise(args[1], &bufinfo, MP_BUFFER_READ);
124+
byte *p = bufinfo.buf;
125+
byte *end_p = &p[bufinfo.len];
126+
127+
// true means check the size must be exactly right.
128+
return MP_OBJ_FROM_PTR(shared_modules_struct_unpack_from(args[0] , p, end_p, true));
129+
}
130+
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(struct_unpack_obj, 2, 3, struct_unpack);
131+
132+
//| .. function:: unpack_from(fmt, data, offset=0)
122133
//|
123134
//| Unpack from the data starting at offset according to the format string fmt.
124135
//| offset may be negative to count from the end of buffer. The return value is
125-
//| a tuple of the unpacked values.
136+
//| a tuple of the unpacked values. The buffer size must be at least as big
137+
//| as the size required by the form.
126138
//|
127139

128-
STATIC mp_obj_t struct_unpack_from(size_t n_args, const mp_obj_t *args) {
129-
// unpack requires that the buffer be exactly the right size.
130-
// unpack_from requires that the buffer be "big enough".
131-
// Since we implement unpack and unpack_from using the same function
132-
// we relax the "exact" requirement, and only implement "big enough".
140+
STATIC mp_obj_t struct_unpack_from(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
141+
enum { ARG_format, ARG_buffer, ARG_offset };
142+
static const mp_arg_t allowed_args[] = {
143+
{ MP_QSTR_format, MP_ARG_REQUIRED | MP_ARG_OBJ },
144+
{ MP_QSTR_buffer, MP_ARG_REQUIRED | MP_ARG_OBJ },
145+
{ MP_QSTR_offset, MP_ARG_INT, {.u_int = 0} },
146+
};
147+
148+
mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
149+
mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
150+
133151
mp_buffer_info_t bufinfo;
134-
mp_get_buffer_raise(args[1], &bufinfo, MP_BUFFER_READ);
152+
mp_get_buffer_raise(args[ARG_buffer].u_obj, &bufinfo, MP_BUFFER_READ);
135153
byte *p = bufinfo.buf;
136154
byte *end_p = &p[bufinfo.len];
137155

138-
if (n_args > 2) {
139-
mp_int_t offset = mp_obj_get_int(args[2]);
140-
// offset arg provided
156+
mp_int_t offset = args[ARG_offset].u_int;
157+
if (offset < 0) {
158+
// negative offsets are relative to the end of the buffer
159+
offset = bufinfo.len + offset;
141160
if (offset < 0) {
142-
// negative offsets are relative to the end of the buffer
143-
offset = bufinfo.len + offset;
144-
if (offset < 0) {
145-
mp_raise_RuntimeError(translate("buffer too small"));
146-
}
161+
mp_raise_RuntimeError(translate("buffer too small"));
147162
}
148-
p += offset;
149163
}
164+
p += offset;
150165

151-
return MP_OBJ_FROM_PTR(shared_modules_struct_unpack_from(args[0] , p, end_p));
166+
// false means the size doesn't have to be exact. struct.unpack_from() only requires
167+
// that be buffer be big enough.
168+
return MP_OBJ_FROM_PTR(shared_modules_struct_unpack_from(args[ARG_format].u_obj, p, end_p, false));
152169
}
153-
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(struct_unpack_from_obj, 2, 3, struct_unpack_from);
170+
MP_DEFINE_CONST_FUN_OBJ_KW(struct_unpack_from_obj, 0, struct_unpack_from);
154171

155172
STATIC const mp_rom_map_elem_t mp_module_struct_globals_table[] = {
156173
{ MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_struct) },
157174
{ MP_ROM_QSTR(MP_QSTR_calcsize), MP_ROM_PTR(&struct_calcsize_obj) },
158175
{ MP_ROM_QSTR(MP_QSTR_pack), MP_ROM_PTR(&struct_pack_obj) },
159176
{ MP_ROM_QSTR(MP_QSTR_pack_into), MP_ROM_PTR(&struct_pack_into_obj) },
160-
{ MP_ROM_QSTR(MP_QSTR_unpack), MP_ROM_PTR(&struct_unpack_from_obj) },
177+
{ MP_ROM_QSTR(MP_QSTR_unpack), MP_ROM_PTR(&struct_unpack_obj) },
161178
{ MP_ROM_QSTR(MP_QSTR_unpack_from), MP_ROM_PTR(&struct_unpack_from_obj) },
162179
};
163180

shared-bindings/struct/__init__.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,6 @@
2929

3030
void shared_modules_struct_pack_into(mp_obj_t fmt_in, byte *p, byte* end_p, size_t n_args, const mp_obj_t *args);
3131
mp_uint_t shared_modules_struct_calcsize(mp_obj_t fmt_in);
32-
mp_obj_tuple_t * shared_modules_struct_unpack_from(mp_obj_t fmt_in, byte *p, byte *end_p);
32+
mp_obj_tuple_t * shared_modules_struct_unpack_from(mp_obj_t fmt_in, byte *p, byte *end_p, bool exact_size);
3333

3434
#endif // MICROPY_INCLUDED_SHARED_BINDINGS_RANDOM___INIT___H

shared-module/struct/__init__.c

Lines changed: 67 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -71,45 +71,6 @@ mp_uint_t get_fmt_num(const char **p) {
7171
return val;
7272
}
7373

74-
void shared_modules_struct_pack_into(mp_obj_t fmt_in, byte *p, byte* end_p, size_t n_args, const mp_obj_t *args) {
75-
const char *fmt = mp_obj_str_get_str(fmt_in);
76-
char fmt_type = get_fmt_type(&fmt);
77-
78-
size_t i;
79-
for (i = 0; i < n_args;) {
80-
mp_uint_t sz = 1;
81-
if (*fmt == '\0') {
82-
// more arguments given than used by format string; CPython raises struct.error here
83-
mp_raise_RuntimeError(translate("too many arguments provided with the given format"));
84-
}
85-
struct_validate_format(*fmt);
86-
87-
if (unichar_isdigit(*fmt)) {
88-
sz = get_fmt_num(&fmt);
89-
}
90-
if (p + sz > end_p) {
91-
mp_raise_RuntimeError(translate("buffer too small"));
92-
}
93-
94-
if (*fmt == 's') {
95-
mp_buffer_info_t bufinfo;
96-
mp_get_buffer_raise(args[i++], &bufinfo, MP_BUFFER_READ);
97-
mp_uint_t to_copy = sz;
98-
if (bufinfo.len < to_copy) {
99-
to_copy = bufinfo.len;
100-
}
101-
memcpy(p, bufinfo.buf, to_copy);
102-
memset(p + to_copy, 0, sz - to_copy);
103-
p += sz;
104-
} else {
105-
while (sz--) {
106-
mp_binary_set_val(fmt_type, *fmt, args[i++], &p);
107-
}
108-
}
109-
fmt++;
110-
}
111-
}
112-
11374
mp_uint_t calcsize_items(const char *fmt) {
11475
mp_uint_t cnt = 0;
11576
while (*fmt) {
@@ -120,7 +81,10 @@ mp_uint_t calcsize_items(const char *fmt) {
12081
num = 1;
12182
}
12283
}
123-
cnt += num;
84+
// Pad bytes are skipped and don't get included in the item count.
85+
if (*fmt != 'x') {
86+
cnt += num;
87+
}
12488
fmt++;
12589
}
12690
return cnt;
@@ -155,14 +119,71 @@ mp_uint_t shared_modules_struct_calcsize(mp_obj_t fmt_in) {
155119
return size;
156120
}
157121

122+
void shared_modules_struct_pack_into(mp_obj_t fmt_in, byte *p, byte* end_p, size_t n_args, const mp_obj_t *args) {
123+
const char *fmt = mp_obj_str_get_str(fmt_in);
124+
char fmt_type = get_fmt_type(&fmt);
125+
const mp_uint_t total_sz = shared_modules_struct_calcsize(fmt_in);
126+
127+
if (p + total_sz != end_p) {
128+
mp_raise_msg_varg(&mp_type_RuntimeError, translate("unpack requires a buffer of %d bytes"), total_sz);
129+
}
130+
131+
size_t i;
132+
for (i = 0; i < n_args;) {
133+
mp_uint_t sz = 1;
134+
if (*fmt == '\0') {
135+
// more arguments given than used by format string; CPython raises struct.error here
136+
mp_raise_RuntimeError(translate("too many arguments provided with the given format"));
137+
}
138+
struct_validate_format(*fmt);
139+
140+
if (unichar_isdigit(*fmt)) {
141+
sz = get_fmt_num(&fmt);
142+
}
143+
144+
if (*fmt == 's') {
145+
mp_buffer_info_t bufinfo;
146+
mp_get_buffer_raise(args[i++], &bufinfo, MP_BUFFER_READ);
147+
mp_uint_t to_copy = sz;
148+
if (bufinfo.len < to_copy) {
149+
to_copy = bufinfo.len;
150+
}
151+
memcpy(p, bufinfo.buf, to_copy);
152+
memset(p + to_copy, 0, sz - to_copy);
153+
p += sz;
154+
} else {
155+
while (sz--) {
156+
mp_binary_set_val(fmt_type, *fmt, args[i], &p);
157+
// Pad bytes don't have a corresponding argument.
158+
if (*fmt != 'x') {
159+
i++;
160+
}
161+
}
162+
}
163+
fmt++;
164+
}
165+
}
158166

159-
mp_obj_tuple_t * shared_modules_struct_unpack_from(mp_obj_t fmt_in, byte *p, byte *end_p) {
167+
mp_obj_tuple_t * shared_modules_struct_unpack_from(mp_obj_t fmt_in, byte *p, byte *end_p, bool exact_size) {
160168

161169
const char *fmt = mp_obj_str_get_str(fmt_in);
162170
char fmt_type = get_fmt_type(&fmt);
163-
mp_uint_t num_items = calcsize_items(fmt);
171+
const mp_uint_t num_items = calcsize_items(fmt);
172+
const mp_uint_t total_sz = shared_modules_struct_calcsize(fmt_in);
164173
mp_obj_tuple_t *res = MP_OBJ_TO_PTR(mp_obj_new_tuple(num_items, NULL));
165174

175+
// If exact_size, make sure the buffer is exactly the right size.
176+
// Otherwise just make sure it's big enough.
177+
if (exact_size) {
178+
if (p + total_sz != end_p) {
179+
mp_raise_RuntimeError(translate("buffer size must match format"));
180+
}
181+
} else {
182+
if (p + total_sz > end_p) {
183+
mp_raise_RuntimeError(translate("buffer too small"));
184+
}
185+
}
186+
166187
for (uint i = 0; i < num_items;) {
167188
mp_uint_t sz = 1;
168189

@@ -171,9 +192,6 @@ mp_obj_tuple_t * shared_modules_struct_unpack_from(mp_obj_t fmt_in, byte *p, byt
171192
if (unichar_isdigit(*fmt)) {
172193
sz = get_fmt_num(&fmt);
173194
}
174-
if (p + sz > end_p) {
175-
mp_raise_RuntimeError(translate("buffer too small"));
176-
}
177195
mp_obj_t item;
178196
if (*fmt == 's') {
179197
item = mp_obj_new_bytes(p, sz);
@@ -182,7 +200,10 @@ mp_obj_tuple_t * shared_modules_struct_unpack_from(mp_obj_t fmt_in, byte *p, byt
182200
} else {
183201
while (sz--) {
184202
item = mp_binary_get_val(fmt_type, *fmt, &p);
185-
res->items[i++] = item;
203+
// Pad bytes are not stored.
204+
if (*fmt != 'x') {
205+
res->items[i++] = item;
206+
}
186207
}
187208
}
188209
fmt++;

0 commit comments

Comments
 (0)