Skip to content

Commit bdfd6e3

Browse files
committed
Add new APIs to the parser to support end positions
1 parent 501aa62 commit bdfd6e3

File tree

2 files changed

+46
-11
lines changed

2 files changed

+46
-11
lines changed

Parser/pegen.c

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,7 @@ raise_unclosed_parentheses_error(Parser *p) {
274274
int error_lineno = p->tok->parenlinenostack[p->tok->level-1];
275275
int error_col = p->tok->parencolstack[p->tok->level-1];
276276
RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError,
277-
error_lineno, error_col,
277+
error_lineno, error_col, error_lineno, -1,
278278
"'%c' was never closed",
279279
p->tok->parenstack[p->tok->level-1]);
280280
}
@@ -366,7 +366,7 @@ tokenizer_error(Parser *p)
366366
msg = "unknown parsing error";
367367
}
368368

369-
RAISE_ERROR_KNOWN_LOCATION(p, errtype, p->tok->lineno, col_offset, msg);
369+
RAISE_ERROR_KNOWN_LOCATION(p, errtype, p->tok->lineno, col_offset, p->tok->lineno, -1, msg);
370370
return -1;
371371
}
372372

@@ -375,17 +375,21 @@ _PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
375375
{
376376
Token *t = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1];
377377
Py_ssize_t col_offset;
378+
Py_ssize_t end_col_offset = -1;
378379
if (t->col_offset == -1) {
379380
col_offset = Py_SAFE_DOWNCAST(p->tok->cur - p->tok->buf,
380381
intptr_t, int);
381382
} else {
382383
col_offset = t->col_offset + 1;
383384
}
384385

386+
if (t->end_col_offset != -1) {
387+
end_col_offset = t->end_col_offset + 1;
388+
}
389+
385390
va_list va;
386391
va_start(va, errmsg);
387-
_PyPegen_raise_error_known_location(p, errtype, t->lineno,
388-
col_offset, errmsg, va);
392+
_PyPegen_raise_error_known_location(p, errtype, t->lineno, col_offset, t->end_lineno, end_col_offset, errmsg, va);
389393
va_end(va);
390394

391395
return NULL;
@@ -416,6 +420,7 @@ get_error_line(Parser *p, Py_ssize_t lineno)
416420
void *
417421
_PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
418422
Py_ssize_t lineno, Py_ssize_t col_offset,
423+
Py_ssize_t end_lineno, Py_ssize_t end_col_offset,
419424
const char *errmsg, va_list va)
420425
{
421426
PyObject *value = NULL;
@@ -424,6 +429,13 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
424429
PyObject *tmp = NULL;
425430
p->error_indicator = 1;
426431

432+
if (end_lineno == CURRENT_POS) {
433+
end_lineno = p->tok->lineno;
434+
}
435+
if (end_col_offset == CURRENT_POS) {
436+
end_col_offset = p->tok->cur - p->tok->line_start;
437+
}
438+
427439
if (p->start_rule == Py_fstring_input) {
428440
const char *fstring_msg = "f-string: ";
429441
Py_ssize_t len = strlen(fstring_msg) + strlen(errmsg);
@@ -475,14 +487,19 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
475487

476488
if (p->start_rule == Py_fstring_input) {
477489
col_offset -= p->starting_col_offset;
490+
end_col_offset -= p->starting_col_offset;
478491
}
492+
479493
Py_ssize_t col_number = col_offset;
494+
Py_ssize_t end_col_number = end_col_offset;
480495

481496
if (p->tok->encoding != NULL) {
482497
col_number = byte_offset_to_character_offset(error_line, col_offset);
498+
end_col_number = end_col_number > 0 ?
499+
byte_offset_to_character_offset(error_line, end_col_offset) :
500+
end_col_number;
483501
}
484-
485-
tmp = Py_BuildValue("(OiiN)", p->tok->filename, lineno, col_number, error_line);
502+
tmp = Py_BuildValue("(OiiNii)", p->tok->filename, lineno, col_number, error_line, end_lineno, end_col_number);
486503
if (!tmp) {
487504
goto error;
488505
}
@@ -1494,6 +1511,13 @@ _PyPegen_seq_flatten(Parser *p, asdl_seq *seqs)
14941511
return flattened_seq;
14951512
}
14961513

1514+
void *
1515+
_PyPegen_seq_last_item(asdl_seq *seq)
1516+
{
1517+
Py_ssize_t len = asdl_seq_LEN(seq);
1518+
return asdl_seq_GET_UNTYPED(seq, len - 1);
1519+
}
1520+
14971521
/* Creates a new name of the form <first_name>.<second_name> */
14981522
expr_ty
14991523
_PyPegen_join_names_with_dot(Parser *p, expr_ty first_name, expr_ty second_name)
@@ -2398,7 +2422,7 @@ _PyPegen_nonparen_genexp_in_call(Parser *p, expr_ty args)
23982422
return NULL;
23992423
}
24002424

2401-
return RAISE_SYNTAX_ERROR_KNOWN_LOCATION(
2425+
return RAISE_SYNTAX_ERROR_STARTING_FROM(
24022426
(expr_ty) asdl_seq_GET(args->v.Call.args, len - 1),
24032427
"Generator expression must be parenthesized"
24042428
);

Parser/pegen.h

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -136,30 +136,41 @@ const char *_PyPegen_get_expr_name(expr_ty);
136136
void *_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...);
137137
void *_PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
138138
Py_ssize_t lineno, Py_ssize_t col_offset,
139+
Py_ssize_t end_lineno, Py_ssize_t end_col_offset,
139140
const char *errmsg, va_list va);
140141
void *_PyPegen_dummy_name(Parser *p, ...);
141142

143+
144+
void * _PyPegen_seq_last_item(asdl_seq *seq);
145+
146+
#define CURRENT_POS (-5)
147+
142148
Py_LOCAL_INLINE(void *)
143149
RAISE_ERROR_KNOWN_LOCATION(Parser *p, PyObject *errtype,
144150
Py_ssize_t lineno, Py_ssize_t col_offset,
151+
Py_ssize_t end_lineno, Py_ssize_t end_col_offset,
145152
const char *errmsg, ...)
146153
{
147154
va_list va;
148155
va_start(va, errmsg);
149-
_PyPegen_raise_error_known_location(p, errtype, lineno, col_offset + 1,
150-
errmsg, va);
156+
Py_ssize_t _col_offset = (col_offset == CURRENT_POS ? CURRENT_POS : col_offset + 1);
157+
Py_ssize_t _end_col_offset = (end_col_offset == CURRENT_POS ? CURRENT_POS : end_col_offset + 1);
158+
_PyPegen_raise_error_known_location(p, errtype, lineno, _col_offset, end_lineno, _end_col_offset, errmsg, va);
151159
va_end(va);
152160
return NULL;
153161
}
154162

155-
156163
#define UNUSED(expr) do { (void)(expr); } while (0)
157164
#define EXTRA_EXPR(head, tail) head->lineno, (head)->col_offset, (tail)->end_lineno, (tail)->end_col_offset, p->arena
158165
#define EXTRA _start_lineno, _start_col_offset, _end_lineno, _end_col_offset, p->arena
159166
#define RAISE_SYNTAX_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_SyntaxError, msg, ##__VA_ARGS__)
160167
#define RAISE_INDENTATION_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_IndentationError, msg, ##__VA_ARGS__)
168+
#define RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, msg, ...) \
169+
RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, (a)->lineno, (a)->col_offset, (b)->end_lineno, (b)->end_col_offset, msg, ##__VA_ARGS__)
161170
#define RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, msg, ...) \
162-
RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, (a)->lineno, (a)->col_offset, msg, ##__VA_ARGS__)
171+
RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, (a)->lineno, (a)->col_offset, (a)->end_lineno, (a)->end_col_offset, msg, ##__VA_ARGS__)
172+
#define RAISE_SYNTAX_ERROR_STARTING_FROM(a, msg, ...) \
173+
RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, (a)->lineno, (a)->col_offset, CURRENT_POS, CURRENT_POS, msg, ##__VA_ARGS__)
163174

164175
Py_LOCAL_INLINE(void *)
165176
CHECK_CALL(Parser *p, void *result)

0 commit comments

Comments
 (0)