@@ -683,13 +683,38 @@ backslashreplace(_PyBytesWriter *writer, char *str,
683
683
return str ;
684
684
}
685
685
686
+ static Py_ssize_t
687
+ xmlcharrefreplace_get_incr (Py_UCS4 ch )
688
+ {
689
+ Py_ssize_t incr ;
690
+
691
+ if (ch < 10 )
692
+ incr = 2 + 1 + 1 ;
693
+ else if (ch < 100 )
694
+ incr = 2 + 2 + 1 ;
695
+ else if (ch < 1000 )
696
+ incr = 2 + 3 + 1 ;
697
+ else if (ch < 10000 )
698
+ incr = 2 + 4 + 1 ;
699
+ else if (ch < 100000 )
700
+ incr = 2 + 5 + 1 ;
701
+ else if (ch < 1000000 )
702
+ incr = 2 + 6 + 1 ;
703
+ else {
704
+ assert (ch <= MAX_UNICODE );
705
+ incr = 2 + 7 + 1 ;
706
+ }
707
+
708
+ return incr ;
709
+ }
710
+
686
711
/* Implementation of the "xmlcharrefreplace" error handler for 8-bit encodings:
687
712
ASCII, Latin1, UTF-8, etc. */
688
713
static char *
689
714
xmlcharrefreplace (_PyBytesWriter * writer , char * str ,
690
715
PyObject * unicode , Py_ssize_t collstart , Py_ssize_t collend )
691
716
{
692
- Py_ssize_t size , i ;
717
+ Py_ssize_t size , incr , i ;
693
718
Py_UCS4 ch ;
694
719
int kind ;
695
720
const void * data ;
@@ -700,25 +725,9 @@ xmlcharrefreplace(_PyBytesWriter *writer, char *str,
700
725
size = 0 ;
701
726
/* determine replacement size */
702
727
for (i = collstart ; i < collend ; ++ i ) {
703
- Py_ssize_t incr ;
704
-
705
728
ch = PyUnicode_READ (kind , data , i );
706
- if (ch < 10 )
707
- incr = 2 + 1 + 1 ;
708
- else if (ch < 100 )
709
- incr = 2 + 2 + 1 ;
710
- else if (ch < 1000 )
711
- incr = 2 + 3 + 1 ;
712
- else if (ch < 10000 )
713
- incr = 2 + 4 + 1 ;
714
- else if (ch < 100000 )
715
- incr = 2 + 5 + 1 ;
716
- else if (ch < 1000000 )
717
- incr = 2 + 6 + 1 ;
718
- else {
719
- assert (ch <= MAX_UNICODE );
720
- incr = 2 + 7 + 1 ;
721
- }
729
+ incr = xmlcharrefreplace_get_incr (ch );
730
+
722
731
if (size > PY_SSIZE_T_MAX - incr ) {
723
732
PyErr_SetString (PyExc_OverflowError ,
724
733
"encoded result is too long for a Python string" );
@@ -733,8 +742,9 @@ xmlcharrefreplace(_PyBytesWriter *writer, char *str,
733
742
734
743
/* generate replacement */
735
744
for (i = collstart ; i < collend ; ++ i ) {
736
- size = PyOS_snprintf (str , 10 + 1 , // see `incr` for the size
737
- "&#%d;" , PyUnicode_READ (kind , data , i ));
745
+ ch = PyUnicode_READ (kind , data , i );
746
+ incr = xmlcharrefreplace_get_incr (ch );
747
+ size = PyOS_snprintf (str , incr + 1 , "&#%d;" , ch );
738
748
if (size < 0 ) {
739
749
return NULL ;
740
750
}
0 commit comments