Skip to content

Commit 7717ab8

Browse files
authored
Merge pull request #6754 from jepler/check-read-utf8
When reading data from a file into a str, check if it's utf-8
2 parents a223102 + 606c75a commit 7717ab8

File tree

2 files changed

+13
-6
lines changed

2 files changed

+13
-6
lines changed

py/stream.c

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "py/objstr.h"
3232
#include "py/stream.h"
3333
#include "py/runtime.h"
34+
#include "py/unicode.h"
3435
#include "supervisor/shared/translate/translate.h"
3536

3637
// This file defines generic Python stream read/write methods which
@@ -43,6 +44,13 @@ STATIC mp_obj_t stream_readall(mp_obj_t self_in);
4344

4445
#define STREAM_CONTENT_TYPE(stream) (((stream)->is_text) ? &mp_type_str : &mp_type_bytes)
4546

47+
static mp_obj_t mp_obj_new_str_from_vstr_check(const mp_obj_type_t *type, vstr_t *vstr) {
48+
if (type == &mp_type_str && !utf8_check((void *)vstr->buf, vstr->len)) {
49+
mp_raise_msg(&mp_type_UnicodeError, NULL);
50+
}
51+
return mp_obj_new_str_from_vstr(type, vstr);
52+
}
53+
4654
// Returns error condition in *errcode, if non-zero, return value is number of bytes written
4755
// before error condition occurred. If *errcode == 0, returns total bytes written (which will
4856
// be equal to input size).
@@ -201,8 +209,7 @@ STATIC mp_obj_t stream_read_generic(size_t n_args, const mp_obj_t *args, byte fl
201209
}
202210
}
203211
}
204-
205-
return mp_obj_new_str_from_vstr(&mp_type_str, &vstr);
212+
return mp_obj_new_str_from_vstr_check(&mp_type_str, &vstr);
206213
}
207214
#endif
208215

@@ -223,7 +230,7 @@ STATIC mp_obj_t stream_read_generic(size_t n_args, const mp_obj_t *args, byte fl
223230
mp_raise_OSError(error);
224231
} else {
225232
vstr.len = out_sz;
226-
return mp_obj_new_str_from_vstr(STREAM_CONTENT_TYPE(stream_p), &vstr);
233+
return mp_obj_new_str_from_vstr_check(STREAM_CONTENT_TYPE(stream_p), &vstr);
227234
}
228235
}
229236

@@ -364,7 +371,7 @@ STATIC mp_obj_t stream_readall(mp_obj_t self_in) {
364371
}
365372

366373
vstr.len = total_size;
367-
return mp_obj_new_str_from_vstr(STREAM_CONTENT_TYPE(stream_p), &vstr);
374+
return mp_obj_new_str_from_vstr_check(STREAM_CONTENT_TYPE(stream_p), &vstr);
368375
}
369376

370377
// Unbuffered, inefficient implementation of readline() for raw I/O files.
@@ -417,7 +424,7 @@ STATIC mp_obj_t stream_unbuffered_readline(size_t n_args, const mp_obj_t *args)
417424
}
418425
}
419426

420-
return mp_obj_new_str_from_vstr(STREAM_CONTENT_TYPE(stream_p), &vstr);
427+
return mp_obj_new_str_from_vstr_check(STREAM_CONTENT_TYPE(stream_p), &vstr);
421428
}
422429
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mp_stream_unbuffered_readline_obj, 1, 2, stream_unbuffered_readline);
423430

tests/extmod/qrio.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
raise SystemExit
66

77
loc = __file__.rsplit("/", 1)[0]
8-
with open(f"{loc}/data/qr.pgm") as f:
8+
with open(f"{loc}/data/qr.pgm", "rb") as f:
99
content = f.read()[-320 * 240 :]
1010

1111
decoder = qrio.QRDecoder(320, 240)

0 commit comments

Comments
 (0)