@@ -625,6 +625,64 @@ growable_comment_array_deallocate(growable_comment_array *arr) {
625
625
PyMem_Free (arr -> items );
626
626
}
627
627
628
+ static int
629
+ initialize_token (Parser * p , Token * token , const char * start , const char * end , int token_type ) {
630
+ assert (token != NULL );
631
+
632
+ token -> type = (token_type == NAME ) ? _get_keyword_or_name_type (p , start , (int )(end - start )) : token_type ;
633
+ token -> bytes = PyBytes_FromStringAndSize (start , end - start );
634
+ if (token -> bytes == NULL ) {
635
+ return -1 ;
636
+ }
637
+
638
+ if (_PyArena_AddPyObject (p -> arena , token -> bytes ) < 0 ) {
639
+ Py_DECREF (token -> bytes );
640
+ return -1 ;
641
+ }
642
+
643
+ const char * line_start = token_type == STRING ? p -> tok -> multi_line_start : p -> tok -> line_start ;
644
+ int lineno = token_type == STRING ? p -> tok -> first_lineno : p -> tok -> lineno ;
645
+ int end_lineno = p -> tok -> lineno ;
646
+
647
+ int col_offset = (start != NULL && start >= line_start ) ? (int )(start - line_start ) : -1 ;
648
+ int end_col_offset = (end != NULL && end >= p -> tok -> line_start ) ? (int )(end - p -> tok -> line_start ) : -1 ;
649
+
650
+ token -> lineno = p -> starting_lineno + lineno ;
651
+ token -> col_offset = p -> tok -> lineno == 1 ? p -> starting_col_offset + col_offset : col_offset ;
652
+ token -> end_lineno = p -> starting_lineno + end_lineno ;
653
+ token -> end_col_offset = p -> tok -> lineno == 1 ? p -> starting_col_offset + end_col_offset : end_col_offset ;
654
+
655
+ p -> fill += 1 ;
656
+
657
+ if (token_type == ERRORTOKEN && p -> tok -> done == E_DECODE ) {
658
+ return raise_decode_error (p );
659
+ }
660
+
661
+ return (token_type == ERRORTOKEN ? tokenizer_error (p ) : 0 );
662
+ }
663
+
664
+ static int
665
+ _resize_tokens_array (Parser * p ) {
666
+ int newsize = p -> size * 2 ;
667
+ Token * * new_tokens = PyMem_Realloc (p -> tokens , newsize * sizeof (Token * ));
668
+ if (new_tokens == NULL ) {
669
+ PyErr_NoMemory ();
670
+ return -1 ;
671
+ }
672
+ p -> tokens = new_tokens ;
673
+
674
+ for (int i = p -> size ; i < newsize ; i ++ ) {
675
+ p -> tokens [i ] = PyMem_Calloc (1 , sizeof (Token ));
676
+ if (p -> tokens [i ] == NULL ) {
677
+ p -> size = i ; // Needed, in order to cleanup correctly after parser fails
678
+ PyErr_NoMemory ();
679
+ return -1 ;
680
+ }
681
+ }
682
+ p -> size = newsize ;
683
+ return 0 ;
684
+ }
685
+
628
686
int
629
687
_PyPegen_fill_token (Parser * p )
630
688
{
@@ -650,7 +708,8 @@ _PyPegen_fill_token(Parser *p)
650
708
type = PyTokenizer_Get (p -> tok , & start , & end );
651
709
}
652
710
653
- if (type == ENDMARKER && p -> start_rule == Py_single_input && p -> parsing_started ) {
711
+ // If we have reached the end and we are in single input mode we need to insert a newline and reset the parsing
712
+ if (p -> start_rule == Py_single_input && type == ENDMARKER && p -> parsing_started ) {
654
713
type = NEWLINE ; /* Add an extra newline */
655
714
p -> parsing_started = 0 ;
656
715
@@ -663,66 +722,13 @@ _PyPegen_fill_token(Parser *p)
663
722
p -> parsing_started = 1 ;
664
723
}
665
724
666
- if (p -> fill == p -> size ) {
667
- int newsize = p -> size * 2 ;
668
- Token * * new_tokens = PyMem_Realloc (p -> tokens , newsize * sizeof (Token * ));
669
- if (new_tokens == NULL ) {
670
- PyErr_NoMemory ();
671
- return -1 ;
672
- }
673
- p -> tokens = new_tokens ;
674
-
675
- for (int i = p -> size ; i < newsize ; i ++ ) {
676
- p -> tokens [i ] = PyMem_Malloc (sizeof (Token ));
677
- if (p -> tokens [i ] == NULL ) {
678
- p -> size = i ; // Needed, in order to cleanup correctly after parser fails
679
- PyErr_NoMemory ();
680
- return -1 ;
681
- }
682
- memset (p -> tokens [i ], '\0' , sizeof (Token ));
683
- }
684
- p -> size = newsize ;
685
- }
686
-
687
- Token * t = p -> tokens [p -> fill ];
688
- t -> type = (type == NAME ) ? _get_keyword_or_name_type (p , start , (int )(end - start )) : type ;
689
- t -> bytes = PyBytes_FromStringAndSize (start , end - start );
690
- if (t -> bytes == NULL ) {
691
- return -1 ;
692
- }
693
- if (_PyArena_AddPyObject (p -> arena , t -> bytes ) < 0 ) {
694
- Py_DECREF (t -> bytes );
725
+ // Check if we are at the limit of the token array capacity and resize if needed
726
+ if ((p -> fill == p -> size ) && (_resize_tokens_array (p ) != 0 )) {
695
727
return -1 ;
696
728
}
697
729
698
- int lineno = type == STRING ? p -> tok -> first_lineno : p -> tok -> lineno ;
699
- const char * line_start = type == STRING ? p -> tok -> multi_line_start : p -> tok -> line_start ;
700
- int end_lineno = p -> tok -> lineno ;
701
- int col_offset = -1 ;
702
- int end_col_offset = -1 ;
703
- if (start != NULL && start >= line_start ) {
704
- col_offset = (int )(start - line_start );
705
- }
706
- if (end != NULL && end >= p -> tok -> line_start ) {
707
- end_col_offset = (int )(end - p -> tok -> line_start );
708
- }
709
-
710
- t -> lineno = p -> starting_lineno + lineno ;
711
- t -> col_offset = p -> tok -> lineno == 1 ? p -> starting_col_offset + col_offset : col_offset ;
712
- t -> end_lineno = p -> starting_lineno + end_lineno ;
713
- t -> end_col_offset = p -> tok -> lineno == 1 ? p -> starting_col_offset + end_col_offset : end_col_offset ;
714
-
715
- p -> fill += 1 ;
716
-
717
- if (type == ERRORTOKEN ) {
718
- if (p -> tok -> done == E_DECODE ) {
719
- return raise_decode_error (p );
720
- }
721
- return tokenizer_error (p );
722
-
723
- }
724
-
725
- return 0 ;
730
+ Token * t = p -> tokens [p -> fill ];
731
+ return initialize_token (p , t , start , end , type );
726
732
}
727
733
728
734
0 commit comments