Skip to content

Commit f24777c

Browse files
authored
bpo-44317: Improve tokenizer errors with more informative locations (GH-26555)
1 parent 7b21108 commit f24777c

File tree

3 files changed

+57
-20
lines changed

3 files changed

+57
-20
lines changed

Lib/test/test_exceptions.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -226,9 +226,9 @@ def testSyntaxErrorOffset(self):
226226
# Errors thrown by tokenizer.c
227227
check('(0x+1)', 1, 3)
228228
check('x = 0xI', 1, 6)
229-
check('0010 + 2', 1, 4)
229+
check('0010 + 2', 1, 1)
230230
check('x = 32e-+4', 1, 8)
231-
check('x = 0o9', 1, 6)
231+
check('x = 0o9', 1, 7)
232232
check('\u03b1 = 0xI', 1, 6)
233233
check(b'\xce\xb1 = 0xI', 1, 6)
234234
check(b'# -*- coding: iso8859-7 -*-\n\xe1 = 0xI', 2, 6,
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Improve tokenizer error with improved locations. Patch by Pablo Galindo.

Parser/tokenizer.c

Lines changed: 54 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1071,19 +1071,13 @@ tok_backup(struct tok_state *tok, int c)
10711071
}
10721072
}
10731073

1074-
10751074
static int
1076-
syntaxerror(struct tok_state *tok, const char *format, ...)
1075+
_syntaxerror_range(struct tok_state *tok, const char *format,
1076+
int col_offset, int end_col_offset,
1077+
va_list vargs)
10771078
{
10781079
PyObject *errmsg, *errtext, *args;
1079-
va_list vargs;
1080-
#ifdef HAVE_STDARG_PROTOTYPES
1081-
va_start(vargs, format);
1082-
#else
1083-
va_start(vargs);
1084-
#endif
10851080
errmsg = PyUnicode_FromFormatV(format, vargs);
1086-
va_end(vargs);
10871081
if (!errmsg) {
10881082
goto error;
10891083
}
@@ -1093,7 +1087,14 @@ syntaxerror(struct tok_state *tok, const char *format, ...)
10931087
if (!errtext) {
10941088
goto error;
10951089
}
1096-
int offset = (int)PyUnicode_GET_LENGTH(errtext);
1090+
1091+
if (col_offset == -1) {
1092+
col_offset = (int)PyUnicode_GET_LENGTH(errtext);
1093+
}
1094+
if (end_col_offset == -1) {
1095+
end_col_offset = col_offset;
1096+
}
1097+
10971098
Py_ssize_t line_len = strcspn(tok->line_start, "\n");
10981099
if (line_len != tok->cur - tok->line_start) {
10991100
Py_DECREF(errtext);
@@ -1104,8 +1105,8 @@ syntaxerror(struct tok_state *tok, const char *format, ...)
11041105
goto error;
11051106
}
11061107

1107-
args = Py_BuildValue("(O(OiiN))", errmsg,
1108-
tok->filename, tok->lineno, offset, errtext);
1108+
args = Py_BuildValue("(O(OiiNii))", errmsg, tok->filename, tok->lineno,
1109+
col_offset, errtext, tok->lineno, end_col_offset);
11091110
if (args) {
11101111
PyErr_SetObject(PyExc_SyntaxError, args);
11111112
Py_DECREF(args);
@@ -1117,6 +1118,38 @@ syntaxerror(struct tok_state *tok, const char *format, ...)
11171118
return ERRORTOKEN;
11181119
}
11191120

1121+
static int
1122+
syntaxerror(struct tok_state *tok, const char *format, ...)
1123+
{
1124+
va_list vargs;
1125+
#ifdef HAVE_STDARG_PROTOTYPES
1126+
va_start(vargs, format);
1127+
#else
1128+
va_start(vargs);
1129+
#endif
1130+
int ret = _syntaxerror_range(tok, format, -1, -1, vargs);
1131+
va_end(vargs);
1132+
return ret;
1133+
}
1134+
1135+
static int
1136+
syntaxerror_known_range(struct tok_state *tok,
1137+
int col_offset, int end_col_offset,
1138+
const char *format, ...)
1139+
{
1140+
va_list vargs;
1141+
#ifdef HAVE_STDARG_PROTOTYPES
1142+
va_start(vargs, format);
1143+
#else
1144+
va_start(vargs);
1145+
#endif
1146+
int ret = _syntaxerror_range(tok, format, col_offset, end_col_offset, vargs);
1147+
va_end(vargs);
1148+
return ret;
1149+
}
1150+
1151+
1152+
11201153
static int
11211154
indenterror(struct tok_state *tok)
11221155
{
@@ -1692,12 +1725,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
16921725
c = tok_nextc(tok);
16931726
}
16941727
if (c < '0' || c >= '8') {
1695-
tok_backup(tok, c);
16961728
if (isdigit(c)) {
16971729
return syntaxerror(tok,
16981730
"invalid digit '%c' in octal literal", c);
16991731
}
17001732
else {
1733+
tok_backup(tok, c);
17011734
return syntaxerror(tok, "invalid octal literal");
17021735
}
17031736
}
@@ -1721,12 +1754,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
17211754
c = tok_nextc(tok);
17221755
}
17231756
if (c != '0' && c != '1') {
1724-
tok_backup(tok, c);
17251757
if (isdigit(c)) {
17261758
return syntaxerror(tok,
17271759
"invalid digit '%c' in binary literal", c);
17281760
}
17291761
else {
1762+
tok_backup(tok, c);
17301763
return syntaxerror(tok, "invalid binary literal");
17311764
}
17321765
}
@@ -1759,6 +1792,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
17591792
}
17601793
c = tok_nextc(tok);
17611794
}
1795+
char* zeros_end = tok->cur;
17621796
if (isdigit(c)) {
17631797
nonzero = 1;
17641798
c = tok_decimal_tail(tok);
@@ -1779,10 +1813,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
17791813
else if (nonzero) {
17801814
/* Old-style octal: now disallowed. */
17811815
tok_backup(tok, c);
1782-
return syntaxerror(tok,
1783-
"leading zeros in decimal integer "
1784-
"literals are not permitted; "
1785-
"use an 0o prefix for octal integers");
1816+
return syntaxerror_known_range(
1817+
tok, (int)(tok->start + 1 - tok->line_start),
1818+
(int)(zeros_end - tok->line_start),
1819+
"leading zeros in decimal integer "
1820+
"literals are not permitted; "
1821+
"use an 0o prefix for octal integers");
17861822
}
17871823
if (!verify_end_of_number(tok, c, "decimal")) {
17881824
return ERRORTOKEN;

0 commit comments

Comments
 (0)