Skip to content

Commit 13b13d1

Browse files
jeplerdpgeorge
andcommitted
py/parsenum: Throw an exception for invalid int literals like "01".
This includes making int("01") parse in base 10 like standard Python. When a base of 0 is specified it means auto-detect based on the prefix, and literals begining with 0 (except when the literal is all 0's) like "01" are then invalid and now throw an exception. The new error message is different from CPython. It says e.g., `SyntaxError: invalid syntax for integer with base 0: '09'` Additional test cases were added to cover the changed & added code. Co-authored-by: Damien George <[email protected]> Signed-off-by: Jeff Epler <[email protected]>
1 parent 7b3f189 commit 13b13d1

File tree

5 files changed

+37
-21
lines changed

5 files changed

+37
-21
lines changed

py/objint.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ static mp_obj_t mp_obj_int_make_new(const mp_obj_type_t *type_in, size_t n_args,
5555
return o;
5656
} else if (mp_get_buffer(args[0], &bufinfo, MP_BUFFER_READ)) {
5757
// a textual representation, parse it
58-
return mp_parse_num_integer(bufinfo.buf, bufinfo.len, 0, NULL);
58+
return mp_parse_num_integer(bufinfo.buf, bufinfo.len, 10, NULL);
5959
#if MICROPY_PY_BUILTINS_FLOAT
6060
} else if (mp_obj_is_float(args[0])) {
6161
return mp_obj_new_int_from_float(mp_obj_float_get(args[0]));

py/parsenum.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,13 +151,13 @@ mp_obj_t mp_parse_num_integer(const char *restrict str_, size_t len, int base, m
151151
raise_exc(exc, lex);
152152
#elif MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_NORMAL
153153
mp_obj_t exc = mp_obj_new_exception_msg_varg(&mp_type_ValueError,
154-
MP_ERROR_TEXT("invalid syntax for integer with base %d"), base);
154+
MP_ERROR_TEXT("invalid syntax for integer with base %d"), base == 1 ? 0 : base);
155155
raise_exc(exc, lex);
156156
#else
157157
vstr_t vstr;
158158
mp_print_t print;
159159
vstr_init_print(&vstr, 50, &print);
160-
mp_printf(&print, "invalid syntax for integer with base %d: ", base);
160+
mp_printf(&print, "invalid syntax for integer with base %d: ", base == 1 ? 0 : base);
161161
mp_str_print_quoted(&print, str_val_start, top - str_val_start, true);
162162
mp_obj_t exc = mp_obj_new_exception_arg1(&mp_type_ValueError,
163163
mp_obj_new_str_from_utf8_vstr(&vstr));

py/parsenumbase.c

Lines changed: 11 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -30,35 +30,28 @@
3030

3131
// find real radix base, and strip preceding '0x', '0o' and '0b'
3232
// puts base in *base, and returns number of bytes to skip the prefix
33+
// in base-0, puts 1 in *base to indicate a number that starts with 0, to provoke a
34+
// ValueError if it's not all-digits-zero.
3335
size_t mp_parse_num_base(const char *str, size_t len, int *base) {
3436
const byte *p = (const byte *)str;
3537
if (len <= 1) {
3638
goto no_prefix;
3739
}
3840
unichar c = *(p++);
39-
if ((*base == 0 || *base == 16) && c == '0') {
40-
c = *(p++);
41-
if ((c | 32) == 'x') {
41+
if (c == '0') {
42+
c = *(p++) | 32;
43+
int b = *base;
44+
if (c == 'x' && !(b & ~16)) {
4245
*base = 16;
43-
} else if (*base == 0 && (c | 32) == 'o') {
46+
} else if (c == 'o' && !(b & ~8)) {
4447
*base = 8;
45-
} else if (*base == 0 && (c | 32) == 'b') {
48+
} else if (c == 'b' && !(b & ~2)) {
4649
*base = 2;
4750
} else {
48-
if (*base == 0) {
49-
*base = 10;
50-
}
51-
p -= 2;
52-
}
53-
} else if (*base == 8 && c == '0') {
54-
c = *(p++);
55-
if ((c | 32) != 'o') {
56-
p -= 2;
57-
}
58-
} else if (*base == 2 && c == '0') {
59-
c = *(p++);
60-
if ((c | 32) != 'b') {
6151
p -= 2;
52+
if (b == 0) {
53+
*base = 1;
54+
}
6255
}
6356
} else {
6457
p--;

tests/basics/int1.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
print(int('+1'))
1414
print(int('-1'))
1515
print(int('01'))
16+
print(int('00'))
1617
print(int('9'))
1718
print(int('10'))
1819
print(int('+10'))
@@ -31,6 +32,7 @@
3132
print(int('0', 10))
3233
print(int('1', 10))
3334
print(int(' \t 1 \t ', 10))
35+
print(int(' \t 00 \t ', 10))
3436
print(int('11', 10))
3537
print(int('11', 16))
3638
print(int('11', 8))
@@ -52,6 +54,17 @@
5254
print(int('0o12 \t ', 8))
5355
print(int(b"12", 10))
5456
print(int(b"12"))
57+
print(int('000 ', 0))
58+
print(int('000 ', 2))
59+
print(int('000 ', 8))
60+
print(int('000 ', 10))
61+
print(int('000 ', 16))
62+
print(int('000 ', 36))
63+
print(int('010 ', 2))
64+
print(int('010 ', 8))
65+
print(int('010 ', 10))
66+
print(int('010 ', 16))
67+
print(int('010 ', 36))
5568

5669

5770
def test(value, base):
@@ -79,6 +92,8 @@ def test(value, base):
7992
test('0xg', 16)
8093
test('1 1', 16)
8194
test('123', 37)
95+
test('01', 0)
96+
test('01 ', 0)
8297

8398
# check that we don't parse this as a floating point number
8499
print(0x1e+1)

tests/basics/lexer.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,3 +83,11 @@ def a(x):
8383
exec(r"'\U0000000'")
8484
except SyntaxError:
8585
print("SyntaxError")
86+
87+
# Properly formed integer literals
88+
print(eval("00"))
89+
# badly formed integer literals
90+
try:
91+
eval("01")
92+
except SyntaxError:
93+
print("SyntaxError")

0 commit comments

Comments
 (0)