Skip to content

Commit cc74727

Browse files
committed
Fix unicodeobject.c
1 parent 6c60488 commit cc74727

File tree

1 file changed

+31
-21
lines changed

1 file changed

+31
-21
lines changed

Objects/unicodeobject.c

Lines changed: 31 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -683,13 +683,38 @@ backslashreplace(_PyBytesWriter *writer, char *str,
683683
return str;
684684
}
685685

686+
static Py_ssize_t
687+
xmlcharrefreplace_get_incr(Py_UCS4 ch)
688+
{
689+
Py_ssize_t incr;
690+
691+
if (ch < 10)
692+
incr = 2+1+1;
693+
else if (ch < 100)
694+
incr = 2+2+1;
695+
else if (ch < 1000)
696+
incr = 2+3+1;
697+
else if (ch < 10000)
698+
incr = 2+4+1;
699+
else if (ch < 100000)
700+
incr = 2+5+1;
701+
else if (ch < 1000000)
702+
incr = 2+6+1;
703+
else {
704+
assert(ch <= MAX_UNICODE);
705+
incr = 2+7+1;
706+
}
707+
708+
return incr;
709+
}
710+
686711
/* Implementation of the "xmlcharrefreplace" error handler for 8-bit encodings:
687712
ASCII, Latin1, UTF-8, etc. */
688713
static char*
689714
xmlcharrefreplace(_PyBytesWriter *writer, char *str,
690715
PyObject *unicode, Py_ssize_t collstart, Py_ssize_t collend)
691716
{
692-
Py_ssize_t size, i;
717+
Py_ssize_t size, incr, i;
693718
Py_UCS4 ch;
694719
int kind;
695720
const void *data;
@@ -700,25 +725,9 @@ xmlcharrefreplace(_PyBytesWriter *writer, char *str,
700725
size = 0;
701726
/* determine replacement size */
702727
for (i = collstart; i < collend; ++i) {
703-
Py_ssize_t incr;
704-
705728
ch = PyUnicode_READ(kind, data, i);
706-
if (ch < 10)
707-
incr = 2+1+1;
708-
else if (ch < 100)
709-
incr = 2+2+1;
710-
else if (ch < 1000)
711-
incr = 2+3+1;
712-
else if (ch < 10000)
713-
incr = 2+4+1;
714-
else if (ch < 100000)
715-
incr = 2+5+1;
716-
else if (ch < 1000000)
717-
incr = 2+6+1;
718-
else {
719-
assert(ch <= MAX_UNICODE);
720-
incr = 2+7+1;
721-
}
729+
incr = xmlcharrefreplace_get_incr(ch);
730+
722731
if (size > PY_SSIZE_T_MAX - incr) {
723732
PyErr_SetString(PyExc_OverflowError,
724733
"encoded result is too long for a Python string");
@@ -733,8 +742,9 @@ xmlcharrefreplace(_PyBytesWriter *writer, char *str,
733742

734743
/* generate replacement */
735744
for (i = collstart; i < collend; ++i) {
736-
size = PyOS_snprintf(str, 10 + 1, // see `incr` for the size
737-
"&#%d;", PyUnicode_READ(kind, data, i));
745+
ch = PyUnicode_READ(kind, data, i);
746+
incr = xmlcharrefreplace_get_incr(ch);
747+
size = PyOS_snprintf(str, incr + 1, "&#%d;", ch);
738748
if (size < 0) {
739749
return NULL;
740750
}

0 commit comments

Comments
 (0)