Skip to content

Commit d00a449

Browse files
authored
Simplify _PyPegen_fill_token in pegen.c (GH-25295)
1 parent 58bafe4 commit d00a449

File tree

1 file changed

+64
-58
lines changed

1 file changed

+64
-58
lines changed

Parser/pegen.c

Lines changed: 64 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -625,6 +625,64 @@ growable_comment_array_deallocate(growable_comment_array *arr) {
625625
PyMem_Free(arr->items);
626626
}
627627

628+
static int
629+
initialize_token(Parser *p, Token *token, const char *start, const char *end, int token_type) {
630+
assert(token != NULL);
631+
632+
token->type = (token_type == NAME) ? _get_keyword_or_name_type(p, start, (int)(end - start)) : token_type;
633+
token->bytes = PyBytes_FromStringAndSize(start, end - start);
634+
if (token->bytes == NULL) {
635+
return -1;
636+
}
637+
638+
if (_PyArena_AddPyObject(p->arena, token->bytes) < 0) {
639+
Py_DECREF(token->bytes);
640+
return -1;
641+
}
642+
643+
const char *line_start = token_type == STRING ? p->tok->multi_line_start : p->tok->line_start;
644+
int lineno = token_type == STRING ? p->tok->first_lineno : p->tok->lineno;
645+
int end_lineno = p->tok->lineno;
646+
647+
int col_offset = (start != NULL && start >= line_start) ? (int)(start - line_start) : -1;
648+
int end_col_offset = (end != NULL && end >= p->tok->line_start) ? (int)(end - p->tok->line_start) : -1;
649+
650+
token->lineno = p->starting_lineno + lineno;
651+
token->col_offset = p->tok->lineno == 1 ? p->starting_col_offset + col_offset : col_offset;
652+
token->end_lineno = p->starting_lineno + end_lineno;
653+
token->end_col_offset = p->tok->lineno == 1 ? p->starting_col_offset + end_col_offset : end_col_offset;
654+
655+
p->fill += 1;
656+
657+
if (token_type == ERRORTOKEN && p->tok->done == E_DECODE) {
658+
return raise_decode_error(p);
659+
}
660+
661+
return (token_type == ERRORTOKEN ? tokenizer_error(p) : 0);
662+
}
663+
664+
static int
665+
_resize_tokens_array(Parser *p) {
666+
int newsize = p->size * 2;
667+
Token **new_tokens = PyMem_Realloc(p->tokens, newsize * sizeof(Token *));
668+
if (new_tokens == NULL) {
669+
PyErr_NoMemory();
670+
return -1;
671+
}
672+
p->tokens = new_tokens;
673+
674+
for (int i = p->size; i < newsize; i++) {
675+
p->tokens[i] = PyMem_Calloc(1, sizeof(Token));
676+
if (p->tokens[i] == NULL) {
677+
p->size = i; // Needed, in order to cleanup correctly after parser fails
678+
PyErr_NoMemory();
679+
return -1;
680+
}
681+
}
682+
p->size = newsize;
683+
return 0;
684+
}
685+
628686
int
629687
_PyPegen_fill_token(Parser *p)
630688
{
@@ -650,7 +708,8 @@ _PyPegen_fill_token(Parser *p)
650708
type = PyTokenizer_Get(p->tok, &start, &end);
651709
}
652710

653-
if (type == ENDMARKER && p->start_rule == Py_single_input && p->parsing_started) {
711+
// If we have reached the end and we are in single input mode we need to insert a newline and reset the parsing
712+
if (p->start_rule == Py_single_input && type == ENDMARKER && p->parsing_started) {
654713
type = NEWLINE; /* Add an extra newline */
655714
p->parsing_started = 0;
656715

@@ -663,66 +722,13 @@ _PyPegen_fill_token(Parser *p)
663722
p->parsing_started = 1;
664723
}
665724

666-
if (p->fill == p->size) {
667-
int newsize = p->size * 2;
668-
Token **new_tokens = PyMem_Realloc(p->tokens, newsize * sizeof(Token *));
669-
if (new_tokens == NULL) {
670-
PyErr_NoMemory();
671-
return -1;
672-
}
673-
p->tokens = new_tokens;
674-
675-
for (int i = p->size; i < newsize; i++) {
676-
p->tokens[i] = PyMem_Malloc(sizeof(Token));
677-
if (p->tokens[i] == NULL) {
678-
p->size = i; // Needed, in order to cleanup correctly after parser fails
679-
PyErr_NoMemory();
680-
return -1;
681-
}
682-
memset(p->tokens[i], '\0', sizeof(Token));
683-
}
684-
p->size = newsize;
685-
}
686-
687-
Token *t = p->tokens[p->fill];
688-
t->type = (type == NAME) ? _get_keyword_or_name_type(p, start, (int)(end - start)) : type;
689-
t->bytes = PyBytes_FromStringAndSize(start, end - start);
690-
if (t->bytes == NULL) {
691-
return -1;
692-
}
693-
if (_PyArena_AddPyObject(p->arena, t->bytes) < 0) {
694-
Py_DECREF(t->bytes);
725+
// Check if we are at the limit of the token array capacity and resize if needed
726+
if ((p->fill == p->size) && (_resize_tokens_array(p) != 0)) {
695727
return -1;
696728
}
697729

698-
int lineno = type == STRING ? p->tok->first_lineno : p->tok->lineno;
699-
const char *line_start = type == STRING ? p->tok->multi_line_start : p->tok->line_start;
700-
int end_lineno = p->tok->lineno;
701-
int col_offset = -1;
702-
int end_col_offset = -1;
703-
if (start != NULL && start >= line_start) {
704-
col_offset = (int)(start - line_start);
705-
}
706-
if (end != NULL && end >= p->tok->line_start) {
707-
end_col_offset = (int)(end - p->tok->line_start);
708-
}
709-
710-
t->lineno = p->starting_lineno + lineno;
711-
t->col_offset = p->tok->lineno == 1 ? p->starting_col_offset + col_offset : col_offset;
712-
t->end_lineno = p->starting_lineno + end_lineno;
713-
t->end_col_offset = p->tok->lineno == 1 ? p->starting_col_offset + end_col_offset : end_col_offset;
714-
715-
p->fill += 1;
716-
717-
if (type == ERRORTOKEN) {
718-
if (p->tok->done == E_DECODE) {
719-
return raise_decode_error(p);
720-
}
721-
return tokenizer_error(p);
722-
723-
}
724-
725-
return 0;
730+
Token *t = p->tokens[p->fill];
731+
return initialize_token(p, t, start, end, type);
726732
}
727733

728734

0 commit comments

Comments
 (0)