Skip to content

Commit cf7303e

Browse files
bpo-33305: Improve SyntaxError for invalid numerical literals. (GH-6517)
1 parent 2a9b8ba commit cf7303e

File tree

3 files changed

+77
-13
lines changed

3 files changed

+77
-13
lines changed

Lib/test/test_grammar.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,8 @@
100100

101101
class TokenTests(unittest.TestCase):
102102

103+
check_syntax_error = check_syntax_error
104+
103105
def test_backslash(self):
104106
# Backslash means line continuation:
105107
x = 1 \
@@ -184,6 +186,28 @@ def test_underscore_literals(self):
184186
# Sanity check: no literal begins with an underscore
185187
self.assertRaises(NameError, eval, "_0")
186188

189+
def test_bad_numerical_literals(self):
190+
check = self.check_syntax_error
191+
check("0b12", "invalid digit '2' in binary literal")
192+
check("0b1_2", "invalid digit '2' in binary literal")
193+
check("0b2", "invalid digit '2' in binary literal")
194+
check("0b1_", "invalid binary literal")
195+
check("0b", "invalid binary literal")
196+
check("0o18", "invalid digit '8' in octal literal")
197+
check("0o1_8", "invalid digit '8' in octal literal")
198+
check("0o8", "invalid digit '8' in octal literal")
199+
check("0o1_", "invalid octal literal")
200+
check("0o", "invalid octal literal")
201+
check("0x1_", "invalid hexadecimal literal")
202+
check("0x", "invalid hexadecimal literal")
203+
check("1_", "invalid decimal literal")
204+
check("012",
205+
"leading zeros in decimal integer literals are not permitted; "
206+
"use an 0o prefix for octal integers")
207+
check("1.2_", "invalid decimal literal")
208+
check("1e2_", "invalid decimal literal")
209+
check("1e+", "invalid decimal literal")
210+
187211
def test_string_literals(self):
188212
x = ''; y = ""; self.assertTrue(len(x) == 0 and x == y)
189213
x = '\''; y = "'"; self.assertTrue(len(x) == 1 and x == y and ord(x) == 39)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Improved syntax error messages for invalid numerical literals.

Parser/tokenizer.c

Lines changed: 52 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1280,6 +1280,28 @@ PyToken_ThreeChars(int c1, int c2, int c3)
12801280
return OP;
12811281
}
12821282

1283+
static int
1284+
syntaxerror(struct tok_state *tok, const char *format, ...)
1285+
{
1286+
#ifndef PGEN
1287+
va_list vargs;
1288+
#ifdef HAVE_STDARG_PROTOTYPES
1289+
va_start(vargs, format);
1290+
#else
1291+
va_start(vargs);
1292+
#endif
1293+
PyErr_FormatV(PyExc_SyntaxError, format, vargs);
1294+
va_end(vargs);
1295+
PyErr_SyntaxLocationObject(tok->filename,
1296+
tok->lineno,
1297+
tok->cur - tok->line_start);
1298+
tok->done = E_ERROR;
1299+
#else
1300+
tok->done = E_TOKEN;
1301+
#endif
1302+
return ERRORTOKEN;
1303+
}
1304+
12831305
static int
12841306
indenterror(struct tok_state *tok)
12851307
{
@@ -1333,8 +1355,8 @@ tok_decimal_tail(struct tok_state *tok)
13331355
}
13341356
c = tok_nextc(tok);
13351357
if (!isdigit(c)) {
1336-
tok->done = E_TOKEN;
13371358
tok_backup(tok, c);
1359+
syntaxerror(tok, "invalid decimal literal");
13381360
return 0;
13391361
}
13401362
}
@@ -1562,9 +1584,8 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
15621584
c = tok_nextc(tok);
15631585
}
15641586
if (!isxdigit(c)) {
1565-
tok->done = E_TOKEN;
15661587
tok_backup(tok, c);
1567-
return ERRORTOKEN;
1588+
return syntaxerror(tok, "invalid hexadecimal literal");
15681589
}
15691590
do {
15701591
c = tok_nextc(tok);
@@ -1579,14 +1600,23 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
15791600
c = tok_nextc(tok);
15801601
}
15811602
if (c < '0' || c >= '8') {
1582-
tok->done = E_TOKEN;
15831603
tok_backup(tok, c);
1584-
return ERRORTOKEN;
1604+
if (isdigit(c)) {
1605+
return syntaxerror(tok,
1606+
"invalid digit '%c' in octal literal", c);
1607+
}
1608+
else {
1609+
return syntaxerror(tok, "invalid octal literal");
1610+
}
15851611
}
15861612
do {
15871613
c = tok_nextc(tok);
15881614
} while ('0' <= c && c < '8');
15891615
} while (c == '_');
1616+
if (isdigit(c)) {
1617+
return syntaxerror(tok,
1618+
"invalid digit '%c' in octal literal", c);
1619+
}
15901620
}
15911621
else if (c == 'b' || c == 'B') {
15921622
/* Binary */
@@ -1596,14 +1626,23 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
15961626
c = tok_nextc(tok);
15971627
}
15981628
if (c != '0' && c != '1') {
1599-
tok->done = E_TOKEN;
16001629
tok_backup(tok, c);
1601-
return ERRORTOKEN;
1630+
if (isdigit(c)) {
1631+
return syntaxerror(tok,
1632+
"invalid digit '%c' in binary literal", c);
1633+
}
1634+
else {
1635+
return syntaxerror(tok, "invalid binary literal");
1636+
}
16021637
}
16031638
do {
16041639
c = tok_nextc(tok);
16051640
} while (c == '0' || c == '1');
16061641
} while (c == '_');
1642+
if (isdigit(c)) {
1643+
return syntaxerror(tok,
1644+
"invalid digit '%c' in binary literal", c);
1645+
}
16071646
}
16081647
else {
16091648
int nonzero = 0;
@@ -1613,9 +1652,8 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
16131652
if (c == '_') {
16141653
c = tok_nextc(tok);
16151654
if (!isdigit(c)) {
1616-
tok->done = E_TOKEN;
16171655
tok_backup(tok, c);
1618-
return ERRORTOKEN;
1656+
return syntaxerror(tok, "invalid decimal literal");
16191657
}
16201658
}
16211659
if (c != '0') {
@@ -1642,9 +1680,11 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
16421680
}
16431681
else if (nonzero) {
16441682
/* Old-style octal: now disallowed. */
1645-
tok->done = E_TOKEN;
16461683
tok_backup(tok, c);
1647-
return ERRORTOKEN;
1684+
return syntaxerror(tok,
1685+
"leading zeros in decimal integer "
1686+
"literals are not permitted; "
1687+
"use an 0o prefix for octal integers");
16481688
}
16491689
}
16501690
}
@@ -1676,9 +1716,8 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
16761716
if (c == '+' || c == '-') {
16771717
c = tok_nextc(tok);
16781718
if (!isdigit(c)) {
1679-
tok->done = E_TOKEN;
16801719
tok_backup(tok, c);
1681-
return ERRORTOKEN;
1720+
return syntaxerror(tok, "invalid decimal literal");
16821721
}
16831722
} else if (!isdigit(c)) {
16841723
tok_backup(tok, c);

0 commit comments

Comments
 (0)