Skip to content

[3.8] bpo-46794: Bump up the libexpat version into 2.4.6 (GH-31487) #31520

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Bump up the libexpat version into 2.4.6
2 changes: 1 addition & 1 deletion Modules/expat/expat.h
Original file line number Diff line number Diff line change
Expand Up @@ -1041,7 +1041,7 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold(
*/
#define XML_MAJOR_VERSION 2
#define XML_MINOR_VERSION 4
#define XML_MICRO_VERSION 4
#define XML_MICRO_VERSION 6

#ifdef __cplusplus
}
Expand Down
161 changes: 116 additions & 45 deletions Modules/expat/xmlparse.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* 2e2c8ce5f11a473d65ec313ab20ceee6afefb355f5405afc06e7204e2e41c8c0 (2.4.4+)
/* a30d2613dcfdef81475a9d1a349134d2d42722172fdaa7d5bb12ed2aa74b9596 (2.4.6+)
__ __ _
___\ \/ /_ __ __ _| |_
/ _ \\ /| '_ \ / _` | __|
Expand All @@ -11,7 +11,7 @@
Copyright (c) 2000-2006 Fred L. Drake, Jr. <[email protected]>
Copyright (c) 2001-2002 Greg Stein <[email protected]>
Copyright (c) 2002-2016 Karl Waclawek <[email protected]>
Copyright (c) 2005-2009 Steven Solie <[email protected]>
Copyright (c) 2005-2009 Steven Solie <[email protected]>
Copyright (c) 2016 Eric Rahm <[email protected]>
Copyright (c) 2016-2022 Sebastian Pipping <[email protected]>
Copyright (c) 2016 Gaurav <[email protected]>
Expand Down Expand Up @@ -718,8 +718,7 @@ XML_ParserCreate(const XML_Char *encodingName) {

XML_Parser XMLCALL
XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
XML_Char tmp[2];
*tmp = nsSep;
XML_Char tmp[2] = {nsSep, 0};
return XML_ParserCreate_MM(encodingName, NULL, tmp);
}

Expand Down Expand Up @@ -1344,8 +1343,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
would be otherwise.
*/
if (parser->m_ns) {
XML_Char tmp[2];
*tmp = parser->m_namespaceSeparator;
XML_Char tmp[2] = {parser->m_namespaceSeparator, 0};
parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
} else {
parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
Expand Down Expand Up @@ -2563,6 +2561,7 @@ storeRawNames(XML_Parser parser) {
while (tag) {
int bufSize;
int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
size_t rawNameLen;
char *rawNameBuf = tag->buf + nameLen;
/* Stop if already stored. Since m_tagStack is a stack, we can stop
at the first entry that has already been copied; everything
Expand All @@ -2574,7 +2573,11 @@ storeRawNames(XML_Parser parser) {
/* For re-use purposes we need to ensure that the
size of tag->buf is a multiple of sizeof(XML_Char).
*/
bufSize = nameLen + ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
/* Detect and prevent integer overflow. */
if (rawNameLen > (size_t)INT_MAX - nameLen)
return XML_FALSE;
bufSize = nameLen + (int)rawNameLen;
if (bufSize > tag->bufEnd - tag->buf) {
char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
if (temp == NULL)
Expand Down Expand Up @@ -3756,6 +3759,17 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
if (! mustBeXML && isXMLNS
&& (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
isXMLNS = XML_FALSE;

// NOTE: While Expat does not validate namespace URIs against RFC 3986,
// we have to at least make sure that the XML processor on top of
// Expat (that is splitting tag names by namespace separator into
// 2- or 3-tuples (uri-local or uri-local-prefix)) cannot be confused
// by an attacker putting additional namespace separator characters
// into namespace declarations. That would be ambiguous and not to
// be expected.
if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)) {
return XML_ERROR_SYNTAX;
}
}
isXML = isXML && len == xmlLen;
isXMLNS = isXMLNS && len == xmlnsLen;
Expand Down Expand Up @@ -7317,44 +7331,15 @@ nextScaffoldPart(XML_Parser parser) {
return next;
}

static void
build_node(XML_Parser parser, int src_node, XML_Content *dest,
XML_Content **contpos, XML_Char **strpos) {
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
dest->type = dtd->scaffold[src_node].type;
dest->quant = dtd->scaffold[src_node].quant;
if (dest->type == XML_CTYPE_NAME) {
const XML_Char *src;
dest->name = *strpos;
src = dtd->scaffold[src_node].name;
for (;;) {
*(*strpos)++ = *src;
if (! *src)
break;
src++;
}
dest->numchildren = 0;
dest->children = NULL;
} else {
unsigned int i;
int cn;
dest->numchildren = dtd->scaffold[src_node].childcnt;
dest->children = *contpos;
*contpos += dest->numchildren;
for (i = 0, cn = dtd->scaffold[src_node].firstchild; i < dest->numchildren;
i++, cn = dtd->scaffold[cn].nextsib) {
build_node(parser, cn, &(dest->children[i]), contpos, strpos);
}
dest->name = NULL;
}
}

static XML_Content *
build_model(XML_Parser parser) {
/* Function build_model transforms the existing parser->m_dtd->scaffold
* array of CONTENT_SCAFFOLD tree nodes into a new array of
* XML_Content tree nodes followed by a gapless list of zero-terminated
* strings. */
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
XML_Content *ret;
XML_Content *cpos;
XML_Char *str;
XML_Char *str; /* the current string writing location */

/* Detect and prevent integer overflow.
* The preprocessor guard addresses the "always false" warning
Expand All @@ -7380,10 +7365,96 @@ build_model(XML_Parser parser) {
if (! ret)
return NULL;

str = (XML_Char *)(&ret[dtd->scaffCount]);
cpos = &ret[1];
/* What follows is an iterative implementation (of what was previously done
* recursively in a dedicated function called "build_node". The old recursive
* build_node could be forced into stack exhaustion from input as small as a
* few megabyte, and so that was a security issue. Hence, a function call
* stack is avoided now by resolving recursion.)
*
* The iterative approach works as follows:
*
* - We have two writing pointers, both walking up the result array; one does
* the work, the other creates "jobs" for its colleague to do, and leads
* the way:
*
* - The faster one, pointer jobDest, always leads and writes "what job
* to do" by the other, once they reach that place in the
* array: leader "jobDest" stores the source node array index (relative
* to array dtd->scaffold) in field "numchildren".
*
* - The slower one, pointer dest, looks at the value stored in the
* "numchildren" field (which actually holds a source node array index
* at that time) and puts the real data from dtd->scaffold in.
*
* - Before the loop starts, jobDest writes source array index 0
* (where the root node is located) so that dest will have something to do
* when it starts operation.
*
* - Whenever nodes with children are encountered, jobDest appends
* them as new jobs, in order. As a result, tree node siblings are
* adjacent in the resulting array, for example:
*
* [0] root, has two children
* [1] first child of 0, has three children
* [3] first child of 1, does not have children
* [4] second child of 1, does not have children
* [5] third child of 1, does not have children
* [2] second child of 0, does not have children
*
* Or (the same data) presented in flat array view:
*
* [0] root, has two children
*
* [1] first child of 0, has three children
* [2] second child of 0, does not have children
*
* [3] first child of 1, does not have children
* [4] second child of 1, does not have children
* [5] third child of 1, does not have children
*
* - The algorithm repeats until all target array indices have been processed.
*/
XML_Content *dest = ret; /* tree node writing location, moves upwards */
XML_Content *const destLimit = &ret[dtd->scaffCount];
XML_Content *jobDest = ret; /* next free writing location in target array */
str = (XML_Char *)&ret[dtd->scaffCount];

/* Add the starting job, the root node (index 0) of the source tree */
(jobDest++)->numchildren = 0;

for (; dest < destLimit; dest++) {
/* Retrieve source tree array index from job storage */
const int src_node = (int)dest->numchildren;

/* Convert item */
dest->type = dtd->scaffold[src_node].type;
dest->quant = dtd->scaffold[src_node].quant;
if (dest->type == XML_CTYPE_NAME) {
const XML_Char *src;
dest->name = str;
src = dtd->scaffold[src_node].name;
for (;;) {
*str++ = *src;
if (! *src)
break;
src++;
}
dest->numchildren = 0;
dest->children = NULL;
} else {
unsigned int i;
int cn;
dest->name = NULL;
dest->numchildren = dtd->scaffold[src_node].childcnt;
dest->children = jobDest;

/* Append scaffold indices of children to array */
for (i = 0, cn = dtd->scaffold[src_node].firstchild;
i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib)
(jobDest++)->numchildren = (unsigned int)cn;
}
}

build_node(parser, 0, ret, &cpos, &str);
return ret;
}

Expand Down Expand Up @@ -7412,7 +7483,7 @@ getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr,

static XML_Char *
copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) {
int charsRequired = 0;
size_t charsRequired = 0;
XML_Char *result;

/* First determine how long the string is */
Expand Down
2 changes: 1 addition & 1 deletion Modules/expat/xmlrole.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
Copyright (c) 2002 Greg Stein <[email protected]>
Copyright (c) 2002-2006 Karl Waclawek <[email protected]>
Copyright (c) 2002-2003 Fred L. Drake, Jr. <[email protected]>
Copyright (c) 2005-2009 Steven Solie <[email protected]>
Copyright (c) 2005-2009 Steven Solie <[email protected]>
Copyright (c) 2016-2021 Sebastian Pipping <[email protected]>
Copyright (c) 2017 Rhodri James <[email protected]>
Copyright (c) 2019 David Loffredo <[email protected]>
Expand Down
9 changes: 2 additions & 7 deletions Modules/expat/xmltok.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
Copyright (c) 2001-2003 Fred L. Drake, Jr. <[email protected]>
Copyright (c) 2002 Greg Stein <[email protected]>
Copyright (c) 2002-2016 Karl Waclawek <[email protected]>
Copyright (c) 2005-2009 Steven Solie <[email protected]>
Copyright (c) 2016-2021 Sebastian Pipping <[email protected]>
Copyright (c) 2005-2009 Steven Solie <[email protected]>
Copyright (c) 2016-2022 Sebastian Pipping <[email protected]>
Copyright (c) 2016 Pascal Cuoq <[email protected]>
Copyright (c) 2016 Don Lewis <[email protected]>
Copyright (c) 2017 Rhodri James <[email protected]>
Expand Down Expand Up @@ -98,11 +98,6 @@
+ ((((byte)[1]) & 3) << 1) + ((((byte)[2]) >> 5) & 1)] \
& (1u << (((byte)[2]) & 0x1F)))

#define UTF8_GET_NAMING(pages, p, n) \
((n) == 2 \
? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \
: ((n) == 3 ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) : 0))

/* Detection of invalid UTF-8 sequences is based on Table 3.1B
of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/
with the additional restriction of not allowing the Unicode
Expand Down
20 changes: 12 additions & 8 deletions Modules/expat/xmltok_impl.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
Copyright (c) 2000 Clark Cooper <[email protected]>
Copyright (c) 2002 Fred L. Drake, Jr. <[email protected]>
Copyright (c) 2002-2016 Karl Waclawek <[email protected]>
Copyright (c) 2016-2021 Sebastian Pipping <[email protected]>
Copyright (c) 2016-2022 Sebastian Pipping <[email protected]>
Copyright (c) 2017 Rhodri James <[email protected]>
Copyright (c) 2018 Benjamin Peterson <[email protected]>
Copyright (c) 2018 Anton Maklakov <[email protected]>
Expand Down Expand Up @@ -69,7 +69,7 @@
case BT_LEAD##n: \
if (end - ptr < n) \
return XML_TOK_PARTIAL_CHAR; \
if (! IS_NAME_CHAR(enc, ptr, n)) { \
if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NAME_CHAR(enc, ptr, n)) { \
*nextTokPtr = ptr; \
return XML_TOK_INVALID; \
} \
Expand Down Expand Up @@ -98,7 +98,7 @@
case BT_LEAD##n: \
if (end - ptr < n) \
return XML_TOK_PARTIAL_CHAR; \
if (! IS_NMSTRT_CHAR(enc, ptr, n)) { \
if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NMSTRT_CHAR(enc, ptr, n)) { \
*nextTokPtr = ptr; \
return XML_TOK_INVALID; \
} \
Expand Down Expand Up @@ -1142,6 +1142,10 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
case BT_LEAD##n: \
if (end - ptr < n) \
return XML_TOK_PARTIAL_CHAR; \
if (IS_INVALID_CHAR(enc, ptr, n)) { \
*nextTokPtr = ptr; \
return XML_TOK_INVALID; \
} \
if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
ptr += n; \
tok = XML_TOK_NAME; \
Expand Down Expand Up @@ -1270,7 +1274,7 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end,
switch (BYTE_TYPE(enc, ptr)) {
# define LEAD_CASE(n) \
case BT_LEAD##n: \
ptr += n; \
ptr += n; /* NOTE: The encoding has already been validated. */ \
break;
LEAD_CASE(2)
LEAD_CASE(3)
Expand Down Expand Up @@ -1339,7 +1343,7 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end,
switch (BYTE_TYPE(enc, ptr)) {
# define LEAD_CASE(n) \
case BT_LEAD##n: \
ptr += n; \
ptr += n; /* NOTE: The encoding has already been validated. */ \
break;
LEAD_CASE(2)
LEAD_CASE(3)
Expand Down Expand Up @@ -1518,7 +1522,7 @@ PREFIX(getAtts)(const ENCODING *enc, const char *ptr, int attsMax,
state = inName; \
}
# define LEAD_CASE(n) \
case BT_LEAD##n: \
case BT_LEAD##n: /* NOTE: The encoding has already been validated. */ \
START_NAME ptr += (n - MINBPC(enc)); \
break;
LEAD_CASE(2)
Expand Down Expand Up @@ -1730,7 +1734,7 @@ PREFIX(nameLength)(const ENCODING *enc, const char *ptr) {
switch (BYTE_TYPE(enc, ptr)) {
# define LEAD_CASE(n) \
case BT_LEAD##n: \
ptr += n; \
ptr += n; /* NOTE: The encoding has already been validated. */ \
break;
LEAD_CASE(2)
LEAD_CASE(3)
Expand Down Expand Up @@ -1775,7 +1779,7 @@ PREFIX(updatePosition)(const ENCODING *enc, const char *ptr, const char *end,
switch (BYTE_TYPE(enc, ptr)) {
# define LEAD_CASE(n) \
case BT_LEAD##n: \
ptr += n; \
ptr += n; /* NOTE: The encoding has already been validated. */ \
pos->columnNumber++; \
break;
LEAD_CASE(2)
Expand Down