Skip to content

Commit e18d815

Browse files
authored
bpo-45321: Add missing error codes to module xml.parsers.expat.errors (GH-30188)
The idea is to ensure that module `xml.parsers.expat.errors` contains all known error codes and messages, even when CPython is compiled or run with an outdated version of libexpat. https://bugs.python.org/issue45321
1 parent 35628e4 commit e18d815

File tree

3 files changed

+126
-49
lines changed

3 files changed

+126
-49
lines changed

Doc/library/pyexpat.rst

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -867,6 +867,40 @@ The ``errors`` module has the following attributes:
867867
.. data:: XML_ERROR_SUSPEND_PE
868868

869869

870+
.. data:: XML_ERROR_RESERVED_PREFIX_XML
871+
872+
An attempt was made to
873+
undeclare reserved namespace prefix ``xml``
874+
or to bind it to another namespace URI.
875+
876+
877+
.. data:: XML_ERROR_RESERVED_PREFIX_XMLNS
878+
879+
An attempt was made to declare or undeclare reserved namespace prefix ``xmlns``.
880+
881+
882+
.. data:: XML_ERROR_RESERVED_NAMESPACE_URI
883+
884+
An attempt was made to bind the URI of one the reserved namespace
885+
prefixes ``xml`` and ``xmlns`` to another namespace prefix.
886+
887+
888+
.. data:: XML_ERROR_INVALID_ARGUMENT
889+
890+
This should not be reported to Python applications.
891+
892+
893+
.. data:: XML_ERROR_NO_BUFFER
894+
895+
This should not be reported to Python applications.
896+
897+
898+
.. data:: XML_ERROR_AMPLIFICATION_LIMIT_BREACH
899+
900+
The limit on input amplification factor (from DTD and entities)
901+
has been breached.
902+
903+
870904
.. rubric:: Footnotes
871905

872906
.. [1] The encoding string included in XML output should conform to the
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Added missing error codes to module ``xml.parsers.expat.errors``.

Modules/pyexpat.c

Lines changed: 91 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1650,16 +1650,95 @@ add_submodule(PyObject *mod, const char *fullname)
16501650
return submodule;
16511651
}
16521652

1653+
struct ErrorInfo {
1654+
const char * name; /* Error constant name, e.g. "XML_ERROR_NO_MEMORY" */
1655+
const char * description; /* Error description as returned by XML_ErrorString(<int>) */
1656+
};
1657+
1658+
static
1659+
struct ErrorInfo error_info_of[] = {
1660+
{NULL, NULL}, /* XML_ERROR_NONE (value 0) is not exposed */
1661+
1662+
{"XML_ERROR_NO_MEMORY", "out of memory"},
1663+
{"XML_ERROR_SYNTAX", "syntax error"},
1664+
{"XML_ERROR_NO_ELEMENTS", "no element found"},
1665+
{"XML_ERROR_INVALID_TOKEN", "not well-formed (invalid token)"},
1666+
{"XML_ERROR_UNCLOSED_TOKEN", "unclosed token"},
1667+
{"XML_ERROR_PARTIAL_CHAR", "partial character"},
1668+
{"XML_ERROR_TAG_MISMATCH", "mismatched tag"},
1669+
{"XML_ERROR_DUPLICATE_ATTRIBUTE", "duplicate attribute"},
1670+
{"XML_ERROR_JUNK_AFTER_DOC_ELEMENT", "junk after document element"},
1671+
{"XML_ERROR_PARAM_ENTITY_REF", "illegal parameter entity reference"},
1672+
{"XML_ERROR_UNDEFINED_ENTITY", "undefined entity"},
1673+
{"XML_ERROR_RECURSIVE_ENTITY_REF", "recursive entity reference"},
1674+
{"XML_ERROR_ASYNC_ENTITY", "asynchronous entity"},
1675+
{"XML_ERROR_BAD_CHAR_REF", "reference to invalid character number"},
1676+
{"XML_ERROR_BINARY_ENTITY_REF", "reference to binary entity"},
1677+
{"XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF", "reference to external entity in attribute"},
1678+
{"XML_ERROR_MISPLACED_XML_PI", "XML or text declaration not at start of entity"},
1679+
{"XML_ERROR_UNKNOWN_ENCODING", "unknown encoding"},
1680+
{"XML_ERROR_INCORRECT_ENCODING", "encoding specified in XML declaration is incorrect"},
1681+
{"XML_ERROR_UNCLOSED_CDATA_SECTION", "unclosed CDATA section"},
1682+
{"XML_ERROR_EXTERNAL_ENTITY_HANDLING", "error in processing external entity reference"},
1683+
{"XML_ERROR_NOT_STANDALONE", "document is not standalone"},
1684+
{"XML_ERROR_UNEXPECTED_STATE", "unexpected parser state - please send a bug report"},
1685+
{"XML_ERROR_ENTITY_DECLARED_IN_PE", "entity declared in parameter entity"},
1686+
{"XML_ERROR_FEATURE_REQUIRES_XML_DTD", "requested feature requires XML_DTD support in Expat"},
1687+
{"XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING", "cannot change setting once parsing has begun"},
1688+
1689+
/* Added in Expat 1.95.7. */
1690+
{"XML_ERROR_UNBOUND_PREFIX", "unbound prefix"},
1691+
1692+
/* Added in Expat 1.95.8. */
1693+
{"XML_ERROR_UNDECLARING_PREFIX", "must not undeclare prefix"},
1694+
{"XML_ERROR_INCOMPLETE_PE", "incomplete markup in parameter entity"},
1695+
{"XML_ERROR_XML_DECL", "XML declaration not well-formed"},
1696+
{"XML_ERROR_TEXT_DECL", "text declaration not well-formed"},
1697+
{"XML_ERROR_PUBLICID", "illegal character(s) in public id"},
1698+
{"XML_ERROR_SUSPENDED", "parser suspended"},
1699+
{"XML_ERROR_NOT_SUSPENDED", "parser not suspended"},
1700+
{"XML_ERROR_ABORTED", "parsing aborted"},
1701+
{"XML_ERROR_FINISHED", "parsing finished"},
1702+
{"XML_ERROR_SUSPEND_PE", "cannot suspend in external parameter entity"},
1703+
1704+
/* Added in 2.0.0. */
1705+
{"XML_ERROR_RESERVED_PREFIX_XML", "reserved prefix (xml) must not be undeclared or bound to another namespace name"},
1706+
{"XML_ERROR_RESERVED_PREFIX_XMLNS", "reserved prefix (xmlns) must not be declared or undeclared"},
1707+
{"XML_ERROR_RESERVED_NAMESPACE_URI", "prefix must not be bound to one of the reserved namespace names"},
1708+
1709+
/* Added in 2.2.1. */
1710+
{"XML_ERROR_INVALID_ARGUMENT", "invalid argument"},
1711+
1712+
/* Added in 2.3.0. */
1713+
{"XML_ERROR_NO_BUFFER", "a successful prior call to function XML_GetBuffer is required"},
1714+
1715+
/* Added in 2.4.0. */
1716+
{"XML_ERROR_AMPLIFICATION_LIMIT_BREACH", "limit on input amplification factor (from DTD and entities) breached"}
1717+
};
1718+
16531719
static int
16541720
add_error(PyObject *errors_module, PyObject *codes_dict,
1655-
PyObject *rev_codes_dict, const char *name, int value)
1721+
PyObject *rev_codes_dict, size_t error_index)
16561722
{
1657-
const char *error_string = XML_ErrorString(value);
1723+
const char * const name = error_info_of[error_index].name;
1724+
const int error_code = (int)error_index;
1725+
1726+
/* NOTE: This keeps the source of truth regarding error
1727+
* messages with libexpat and (by definiton) in bulletproof sync
1728+
* with the other uses of the XML_ErrorString function
1729+
* elsewhere within this file. pyexpat's copy of the messages
1730+
* only acts as a fallback in case of outdated runtime libexpat,
1731+
* where it returns NULL. */
1732+
const char *error_string = XML_ErrorString(error_code);
1733+
if (error_string == NULL) {
1734+
error_string = error_info_of[error_index].description;
1735+
}
1736+
16581737
if (PyModule_AddStringConstant(errors_module, name, error_string) < 0) {
16591738
return -1;
16601739
}
16611740

1662-
PyObject *num = PyLong_FromLong(value);
1741+
PyObject *num = PyLong_FromLong(error_code);
16631742
if (num == NULL) {
16641743
return -1;
16651744
}
@@ -1699,53 +1778,16 @@ add_errors_module(PyObject *mod)
16991778
goto error;
17001779
}
17011780

1702-
#define ADD_CONST(name) do { \
1703-
if (add_error(errors_module, codes_dict, rev_codes_dict, \
1704-
#name, name) < 0) { \
1705-
goto error; \
1706-
} \
1707-
} while(0)
1781+
size_t error_index = 0;
1782+
for (; error_index < sizeof(error_info_of) / sizeof(struct ErrorInfo); error_index++) {
1783+
if (error_info_of[error_index].name == NULL) {
1784+
continue;
1785+
}
17081786

1709-
ADD_CONST(XML_ERROR_NO_MEMORY);
1710-
ADD_CONST(XML_ERROR_SYNTAX);
1711-
ADD_CONST(XML_ERROR_NO_ELEMENTS);
1712-
ADD_CONST(XML_ERROR_INVALID_TOKEN);
1713-
ADD_CONST(XML_ERROR_UNCLOSED_TOKEN);
1714-
ADD_CONST(XML_ERROR_PARTIAL_CHAR);
1715-
ADD_CONST(XML_ERROR_TAG_MISMATCH);
1716-
ADD_CONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
1717-
ADD_CONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
1718-
ADD_CONST(XML_ERROR_PARAM_ENTITY_REF);
1719-
ADD_CONST(XML_ERROR_UNDEFINED_ENTITY);
1720-
ADD_CONST(XML_ERROR_RECURSIVE_ENTITY_REF);
1721-
ADD_CONST(XML_ERROR_ASYNC_ENTITY);
1722-
ADD_CONST(XML_ERROR_BAD_CHAR_REF);
1723-
ADD_CONST(XML_ERROR_BINARY_ENTITY_REF);
1724-
ADD_CONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
1725-
ADD_CONST(XML_ERROR_MISPLACED_XML_PI);
1726-
ADD_CONST(XML_ERROR_UNKNOWN_ENCODING);
1727-
ADD_CONST(XML_ERROR_INCORRECT_ENCODING);
1728-
ADD_CONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
1729-
ADD_CONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
1730-
ADD_CONST(XML_ERROR_NOT_STANDALONE);
1731-
ADD_CONST(XML_ERROR_UNEXPECTED_STATE);
1732-
ADD_CONST(XML_ERROR_ENTITY_DECLARED_IN_PE);
1733-
ADD_CONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD);
1734-
ADD_CONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING);
1735-
/* Added in Expat 1.95.7. */
1736-
ADD_CONST(XML_ERROR_UNBOUND_PREFIX);
1737-
/* Added in Expat 1.95.8. */
1738-
ADD_CONST(XML_ERROR_UNDECLARING_PREFIX);
1739-
ADD_CONST(XML_ERROR_INCOMPLETE_PE);
1740-
ADD_CONST(XML_ERROR_XML_DECL);
1741-
ADD_CONST(XML_ERROR_TEXT_DECL);
1742-
ADD_CONST(XML_ERROR_PUBLICID);
1743-
ADD_CONST(XML_ERROR_SUSPENDED);
1744-
ADD_CONST(XML_ERROR_NOT_SUSPENDED);
1745-
ADD_CONST(XML_ERROR_ABORTED);
1746-
ADD_CONST(XML_ERROR_FINISHED);
1747-
ADD_CONST(XML_ERROR_SUSPEND_PE);
1748-
#undef ADD_CONST
1787+
if (add_error(errors_module, codes_dict, rev_codes_dict, error_index) < 0) {
1788+
goto error;
1789+
}
1790+
}
17491791

17501792
if (PyModule_AddStringConstant(errors_module, "__doc__",
17511793
"Constants used to describe "

0 commit comments

Comments
 (0)