Skip to content

Commit c49d520

Browse files
committed
py/persistentcode: Remove unicode feature flag from .mpy file.
Prior to this commit, even with unicode disabled .py and .mpy files could contain unicode characters, eg by entering them directly in a string as utf-8 encoded. The only thing the compiler disallowed (with unicode disabled) was using \uxxxx and \Uxxxxxxxx notation to specify a character within a string with value >= 0x100; that would give a SyntaxError. With this change mpy-cross will now accept \u and \U notation to insert a character with value >= 0x100 into a string (because the -mno-unicode option is now gone, there's no way to forbid this). The runtime will happily work with strings with such characters, just like it already works with strings with characters that were utf-8 encoded directly. This change simplifies things because there are no longer any feature flags in .mpy files, and any bytecode .mpy will now run on any target. Signed-off-by: Damien George <[email protected]>
1 parent b295b6f commit c49d520

File tree

8 files changed

+35
-56
lines changed

8 files changed

+35
-56
lines changed

mpy-cross/main.c

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,6 @@ STATIC int usage(char **argv) {
108108
"\n"
109109
"Target specific options:\n"
110110
"-msmall-int-bits=number : set the maximum bits used to encode a small-int\n"
111-
"-mno-unicode : don't support unicode in compiled strings\n"
112111
"-march=<arch> : set architecture for native emitter; x86, x64, armv6, armv7m, armv7em, armv7emsp, armv7emdp, xtensa, xtensawin\n"
113112
"\n"
114113
"Implementation specific options:\n", argv[0]
@@ -203,7 +202,6 @@ MP_NOINLINE int main_(int argc, char **argv) {
203202

204203
// set default compiler configuration
205204
mp_dynamic_compiler.small_int_bits = 31;
206-
mp_dynamic_compiler.py_builtins_str_unicode = 1;
207205
#if defined(__i386__)
208206
mp_dynamic_compiler.native_arch = MP_NATIVE_ARCH_X86;
209207
mp_dynamic_compiler.nlr_buf_num_regs = MICROPY_NLR_NUM_REGS_X86;
@@ -261,10 +259,6 @@ MP_NOINLINE int main_(int argc, char **argv) {
261259
return usage(argv);
262260
}
263261
// TODO check that small_int_bits is within range of host's capabilities
264-
} else if (strcmp(argv[a], "-mno-unicode") == 0) {
265-
mp_dynamic_compiler.py_builtins_str_unicode = 0;
266-
} else if (strcmp(argv[a], "-municode") == 0) {
267-
mp_dynamic_compiler.py_builtins_str_unicode = 1;
268262
} else if (strncmp(argv[a], "-march=", sizeof("-march=") - 1) == 0) {
269263
const char *arch = argv[a] + sizeof("-march=") - 1;
270264
if (strcmp(arch, "x86") == 0) {

py/lexer.c

Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -473,25 +473,23 @@ STATIC void parse_string_literal(mp_lexer_t *lex, bool is_raw, bool is_fstring)
473473
}
474474
}
475475
if (c != MP_LEXER_EOF) {
476-
if (MICROPY_PY_BUILTINS_STR_UNICODE_DYNAMIC) {
477-
if (c < 0x110000 && lex->tok_kind == MP_TOKEN_STRING) {
478-
vstr_add_char(&lex->vstr, c);
479-
} else if (c < 0x100 && lex->tok_kind == MP_TOKEN_BYTES) {
480-
vstr_add_byte(&lex->vstr, c);
481-
} else {
482-
// unicode character out of range
483-
// this raises a generic SyntaxError; could provide more info
484-
lex->tok_kind = MP_TOKEN_INVALID;
485-
}
486-
} else {
487-
// without unicode everything is just added as an 8-bit byte
488-
if (c < 0x100) {
489-
vstr_add_byte(&lex->vstr, c);
490-
} else {
491-
// 8-bit character out of range
492-
// this raises a generic SyntaxError; could provide more info
493-
lex->tok_kind = MP_TOKEN_INVALID;
494-
}
476+
#if MICROPY_PY_BUILTINS_STR_UNICODE
477+
if (c < 0x110000 && lex->tok_kind == MP_TOKEN_STRING) {
478+
// Valid unicode character in a str object.
479+
vstr_add_char(&lex->vstr, c);
480+
} else if (c < 0x100 && lex->tok_kind == MP_TOKEN_BYTES) {
481+
// Valid byte in a bytes object.
482+
vstr_add_byte(&lex->vstr, c);
483+
}
484+
#else
485+
if (c < 0x100) {
486+
// Without unicode everything is just added as an 8-bit byte.
487+
vstr_add_byte(&lex->vstr, c);
488+
}
489+
#endif
490+
else {
491+
// Character out of range; this raises a generic SyntaxError.
492+
lex->tok_kind = MP_TOKEN_INVALID;
495493
}
496494
}
497495
} else {

py/mpconfig.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -429,13 +429,6 @@
429429
#define MICROPY_DYNAMIC_COMPILER (0)
430430
#endif
431431

432-
// Configure dynamic compiler macros
433-
#if MICROPY_DYNAMIC_COMPILER
434-
#define MICROPY_PY_BUILTINS_STR_UNICODE_DYNAMIC (mp_dynamic_compiler.py_builtins_str_unicode)
435-
#else
436-
#define MICROPY_PY_BUILTINS_STR_UNICODE_DYNAMIC MICROPY_PY_BUILTINS_STR_UNICODE
437-
#endif
438-
439432
// Whether to enable constant folding; eg 1+2 rewritten as 3
440433
#ifndef MICROPY_COMP_CONST_FOLDING
441434
#define MICROPY_COMP_CONST_FOLDING (MICROPY_CONFIG_ROM_LEVEL_AT_LEAST_CORE_FEATURES)

py/mpstate.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@ enum {
5555
#if MICROPY_DYNAMIC_COMPILER
5656
typedef struct mp_dynamic_compiler_t {
5757
uint8_t small_int_bits; // must be <= host small_int_bits
58-
bool py_builtins_str_unicode;
5958
uint8_t native_arch;
6059
uint8_t nlr_buf_num_regs;
6160
} mp_dynamic_compiler_t;

py/persistentcode.h

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,15 +42,11 @@
4242
#define MPY_FEATURE_DECODE_ARCH(feat) ((feat) >> 2)
4343

4444
// The feature flag bits encode the compile-time config options that affect
45-
// the generate bytecode. Note: position 0 is now unused
46-
// (formerly MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE).
47-
#define MPY_FEATURE_FLAGS ( \
48-
((MICROPY_PY_BUILTINS_STR_UNICODE) << 1) \
49-
)
45+
// the generate bytecode. Note: no longer used.
46+
// (formerly MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE and MICROPY_PY_BUILTINS_STR_UNICODE).
47+
#define MPY_FEATURE_FLAGS (0)
5048
// This is a version of the flags that can be configured at runtime.
51-
#define MPY_FEATURE_FLAGS_DYNAMIC ( \
52-
((MICROPY_PY_BUILTINS_STR_UNICODE_DYNAMIC) << 1) \
53-
)
49+
#define MPY_FEATURE_FLAGS_DYNAMIC (0)
5450

5551
// Define the host architecture
5652
#if MICROPY_EMIT_X86

tests/micropython/import_mpy_native_gc.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,14 +49,14 @@ def open(self, path, mode):
4949
# by the required value of sys.implementation._mpy.
5050
features0_file_contents = {
5151
# -march=x64
52-
0xA06: b'M\x06\n\x1f\x01\x004build/features0.native.mpy\x00\x8aB\xe9/\x00\x00\x00SH\x8b\x1d\x83\x00\x00\x00\xbe\x02\x00\x00\x00\xffS\x18\xbf\x01\x00\x00\x00H\x85\xc0u\x0cH\x8bC \xbe\x02\x00\x00\x00[\xff\xe0H\x0f\xaf\xf8H\xff\xc8\xeb\xe6ATUSH\x8b\x1dQ\x00\x00\x00H\x8bG\x08L\x8bc(H\x8bx\x08A\xff\xd4H\x8d5+\x00\x00\x00H\x89\xc5H\x8b\x059\x00\x00\x00\x0f\xb78\xffShH\x89\xefA\xff\xd4H\x8b\x03[]A\\\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x85\x00\x12factorial\x00\x10\r$\x01&\x9f \x01"\xff',
52+
0x806: b'M\x06\b\x1f\x01\x004build/features0.native.mpy\x00\x8aB\xe9/\x00\x00\x00SH\x8b\x1d\x83\x00\x00\x00\xbe\x02\x00\x00\x00\xffS\x18\xbf\x01\x00\x00\x00H\x85\xc0u\x0cH\x8bC \xbe\x02\x00\x00\x00[\xff\xe0H\x0f\xaf\xf8H\xff\xc8\xeb\xe6ATUSH\x8b\x1dQ\x00\x00\x00H\x8bG\x08L\x8bc(H\x8bx\x08A\xff\xd4H\x8d5+\x00\x00\x00H\x89\xc5H\x8b\x059\x00\x00\x00\x0f\xb78\xffShH\x89\xefA\xff\xd4H\x8b\x03[]A\\\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x85\x00\x12factorial\x00\x10\r$\x01&\x9f \x01"\xff',
5353
# -march=armv7m
54-
0x1606: b"M\x06\x16\x1f\x01\x004build/features0.native.mpy\x00\x88B\x1a\xe0\x00\x00\x13\xb5\nK\nJ{D\x9cX\x02!\xe3h\x98G\x03F\x01 3\xb9\x02!#i\x01\x93\x02\xb0\xbd\xe8\x10@\x18GXC\x01;\xf4\xe7\x00\xbfn\x00\x00\x00\x00\x00\x00\x00\xf8\xb5\nN\nK~D\xf4XChgiXh\xb8G\x05F\x07K\x08I\xf2XyD\x10\x88ck\x98G(F\xb8G h\xf8\xbd\x00\xbf:\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x1e\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x84\x00\x00\x00\x00\x00\x00\x00\x01\x84\x10\x12factorial\x00\x10\r>\x01@\x9f:\x01<\xff",
54+
0x1406: b"M\x06\x14\x1f\x01\x004build/features0.native.mpy\x00\x88B\x1a\xe0\x00\x00\x13\xb5\nK\nJ{D\x9cX\x02!\xe3h\x98G\x03F\x01 3\xb9\x02!#i\x01\x93\x02\xb0\xbd\xe8\x10@\x18GXC\x01;\xf4\xe7\x00\xbfn\x00\x00\x00\x00\x00\x00\x00\xf8\xb5\nN\nK~D\xf4XChgiXh\xb8G\x05F\x07K\x08I\xf2XyD\x10\x88ck\x98G(F\xb8G h\xf8\xbd\x00\xbf:\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x1e\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x84\x00\x00\x00\x00\x00\x00\x00\x01\x84\x10\x12factorial\x00\x10\r>\x01@\x9f:\x01<\xff",
5555
}
5656

5757
# Populate other armv7m-derived archs based on armv7m.
58-
for arch in (0x1A06, 0x1E06, 0x2206):
59-
features0_file_contents[arch] = features0_file_contents[0x1606]
58+
for arch in (0x1806, 0x1C06, 0x2006):
59+
features0_file_contents[arch] = features0_file_contents[0x1406]
6060

6161
if sys.implementation._mpy not in features0_file_contents:
6262
print("SKIP")

tests/micropython/import_mpy_native_x64.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,11 +52,11 @@ def open(self, path, mode):
5252
# fmt: off
5353
user_files = {
5454
# bad architecture
55-
'/mod0.mpy': b'M\x06\xfe\x00\x10',
55+
'/mod0.mpy': b'M\x06\xfc\x00\x10',
5656

5757
# test loading of viper and asm
5858
'/mod1.mpy': (
59-
b'M\x06\x0a\x1f' # header
59+
b'M\x06\x08\x1f' # header
6060

6161
b'\x02' # n_qstr
6262
b'\x00' # n_obj
@@ -85,7 +85,7 @@ def open(self, path, mode):
8585

8686
# test loading viper with additional scope flags and relocation
8787
'/mod2.mpy': (
88-
b'M\x06\x0a\x1f' # header
88+
b'M\x06\x08\x1f' # header
8989

9090
b'\x02' # n_qstr
9191
b'\x00' # n_obj

tools/mpy_ld.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@
4848
MP_SCOPE_FLAG_VIPERRELOC = 0x10
4949
MP_SCOPE_FLAG_VIPERRODATA = 0x20
5050
MP_SCOPE_FLAG_VIPERBSS = 0x40
51-
MICROPY_PY_BUILTINS_STR_UNICODE = 2
5251
MP_SMALL_INT_BITS = 31
5352

5453
# ELF constants
@@ -116,55 +115,55 @@ def __init__(self, name, mpy_feature, qstr_entry_size, word_size, arch_got, asm_
116115
ARCH_DATA = {
117116
"x86": ArchData(
118117
"EM_386",
119-
MP_NATIVE_ARCH_X86 << 2 | MICROPY_PY_BUILTINS_STR_UNICODE,
118+
MP_NATIVE_ARCH_X86 << 2,
120119
2,
121120
4,
122121
(R_386_PC32, R_386_GOT32, R_386_GOT32X),
123122
asm_jump_x86,
124123
),
125124
"x64": ArchData(
126125
"EM_X86_64",
127-
MP_NATIVE_ARCH_X64 << 2 | MICROPY_PY_BUILTINS_STR_UNICODE,
126+
MP_NATIVE_ARCH_X64 << 2,
128127
2,
129128
8,
130129
(R_X86_64_GOTPCREL, R_X86_64_REX_GOTPCRELX),
131130
asm_jump_x86,
132131
),
133132
"armv7m": ArchData(
134133
"EM_ARM",
135-
MP_NATIVE_ARCH_ARMV7M << 2 | MICROPY_PY_BUILTINS_STR_UNICODE,
134+
MP_NATIVE_ARCH_ARMV7M << 2,
136135
2,
137136
4,
138137
(R_ARM_GOT_BREL,),
139138
asm_jump_arm,
140139
),
141140
"armv7emsp": ArchData(
142141
"EM_ARM",
143-
MP_NATIVE_ARCH_ARMV7EMSP << 2 | MICROPY_PY_BUILTINS_STR_UNICODE,
142+
MP_NATIVE_ARCH_ARMV7EMSP << 2,
144143
2,
145144
4,
146145
(R_ARM_GOT_BREL,),
147146
asm_jump_arm,
148147
),
149148
"armv7emdp": ArchData(
150149
"EM_ARM",
151-
MP_NATIVE_ARCH_ARMV7EMDP << 2 | MICROPY_PY_BUILTINS_STR_UNICODE,
150+
MP_NATIVE_ARCH_ARMV7EMDP << 2,
152151
2,
153152
4,
154153
(R_ARM_GOT_BREL,),
155154
asm_jump_arm,
156155
),
157156
"xtensa": ArchData(
158157
"EM_XTENSA",
159-
MP_NATIVE_ARCH_XTENSA << 2 | MICROPY_PY_BUILTINS_STR_UNICODE,
158+
MP_NATIVE_ARCH_XTENSA << 2,
160159
2,
161160
4,
162161
(R_XTENSA_32, R_XTENSA_PLT),
163162
asm_jump_xtensa,
164163
),
165164
"xtensawin": ArchData(
166165
"EM_XTENSA",
167-
MP_NATIVE_ARCH_XTENSAWIN << 2 | MICROPY_PY_BUILTINS_STR_UNICODE,
166+
MP_NATIVE_ARCH_XTENSAWIN << 2,
168167
4,
169168
4,
170169
(R_XTENSA_32, R_XTENSA_PLT),

0 commit comments

Comments
 (0)