Skip to content

Commit f656bc1

Browse files
bpo-46932: Update bundled libexpat to 2.4.7 (GH-31736) (GH-31741)
(cherry picked from commit 176835c) Co-authored-by: Steve Dower <[email protected]>
1 parent 4a3c610 commit f656bc1

File tree

3 files changed

+156
-14
lines changed

3 files changed

+156
-14
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Update bundled libexpat to 2.4.7

Modules/expat/expat.h

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
Copyright (c) 2016 Cristian Rodríguez <[email protected]>
1616
Copyright (c) 2016 Thomas Beutlich <[email protected]>
1717
Copyright (c) 2017 Rhodri James <[email protected]>
18+
Copyright (c) 2022 Thijs Schreijer <[email protected]>
1819
Licensed under the MIT license:
1920
2021
Permission is hereby granted, free of charge, to any person obtaining
@@ -174,8 +175,10 @@ struct XML_cp {
174175
};
175176

176177
/* This is called for an element declaration. See above for
177-
description of the model argument. It's the caller's responsibility
178-
to free model when finished with it.
178+
description of the model argument. It's the user code's responsibility
179+
to free model when finished with it. See XML_FreeContentModel.
180+
There is no need to free the model from the handler, it can be kept
181+
around and freed at a later stage.
179182
*/
180183
typedef void(XMLCALL *XML_ElementDeclHandler)(void *userData,
181184
const XML_Char *name,
@@ -237,6 +240,17 @@ XML_ParserCreate(const XML_Char *encoding);
237240
and the local part will be concatenated without any separator.
238241
It is a programming error to use the separator '\0' with namespace
239242
triplets (see XML_SetReturnNSTriplet).
243+
If a namespace separator is chosen that can be part of a URI or
244+
part of an XML name, splitting an expanded name back into its
245+
1, 2 or 3 original parts on application level in the element handler
246+
may end up vulnerable, so these are advised against; sane choices for
247+
a namespace separator are e.g. '\n' (line feed) and '|' (pipe).
248+
249+
Note that Expat does not validate namespace URIs (beyond encoding)
250+
against RFC 3986 today (and is not required to do so with regard to
251+
the XML 1.0 namespaces specification) but it may start doing that
252+
in future releases. Before that, an application using Expat must
253+
be ready to receive namespace URIs containing non-URI characters.
240254
*/
241255
XMLPARSEAPI(XML_Parser)
242256
XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator);
@@ -317,7 +331,7 @@ typedef void(XMLCALL *XML_StartDoctypeDeclHandler)(void *userData,
317331
const XML_Char *pubid,
318332
int has_internal_subset);
319333

320-
/* This is called for the start of the DOCTYPE declaration when the
334+
/* This is called for the end of the DOCTYPE declaration when the
321335
closing > is encountered, but after processing any external
322336
subset.
323337
*/
@@ -1041,7 +1055,7 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold(
10411055
*/
10421056
#define XML_MAJOR_VERSION 2
10431057
#define XML_MINOR_VERSION 4
1044-
#define XML_MICRO_VERSION 6
1058+
#define XML_MICRO_VERSION 7
10451059

10461060
#ifdef __cplusplus
10471061
}

Modules/expat/xmlparse.c

Lines changed: 137 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* a30d2613dcfdef81475a9d1a349134d2d42722172fdaa7d5bb12ed2aa74b9596 (2.4.6+)
1+
/* fcb1a62fefa945567301146eb98e3ad3413e823a41c4378e84e8b6b6f308d824 (2.4.7+)
22
__ __ _
33
___\ \/ /_ __ __ _| |_
44
/ _ \\ /| '_ \ / _` | __|
@@ -34,6 +34,7 @@
3434
Copyright (c) 2019 Vadim Zeitlin <[email protected]>
3535
Copyright (c) 2021 Dong-hee Na <[email protected]>
3636
Copyright (c) 2022 Samanta Navarro <[email protected]>
37+
Copyright (c) 2022 Jeffrey Walton <[email protected]>
3738
Licensed under the MIT license:
3839
3940
Permission is hereby granted, free of charge, to any person obtaining
@@ -133,7 +134,7 @@
133134
* BSD / macOS (including <10.7) (arc4random): HAVE_ARC4RANDOM, \
134135
* libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
135136
* libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
136-
* Linux (including <3.17) / BSD / macOS (including <10.7) (/dev/urandom): XML_DEV_URANDOM, \
137+
* Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \
137138
* Windows >=Vista (rand_s): _WIN32. \
138139
\
139140
If insist on not using any of these, bypass this error by defining \
@@ -722,6 +723,7 @@ XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
722723
return XML_ParserCreate_MM(encodingName, NULL, tmp);
723724
}
724725

726+
// "xml=http://www.w3.org/XML/1998/namespace"
725727
static const XML_Char implicitContext[]
726728
= {ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h,
727729
ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
@@ -3704,12 +3706,124 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
37043706
return XML_ERROR_NONE;
37053707
}
37063708

3709+
static XML_Bool
3710+
is_rfc3986_uri_char(XML_Char candidate) {
3711+
// For the RFC 3986 ANBF grammar see
3712+
// https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
3713+
3714+
switch (candidate) {
3715+
// From rule "ALPHA" (uppercase half)
3716+
case 'A':
3717+
case 'B':
3718+
case 'C':
3719+
case 'D':
3720+
case 'E':
3721+
case 'F':
3722+
case 'G':
3723+
case 'H':
3724+
case 'I':
3725+
case 'J':
3726+
case 'K':
3727+
case 'L':
3728+
case 'M':
3729+
case 'N':
3730+
case 'O':
3731+
case 'P':
3732+
case 'Q':
3733+
case 'R':
3734+
case 'S':
3735+
case 'T':
3736+
case 'U':
3737+
case 'V':
3738+
case 'W':
3739+
case 'X':
3740+
case 'Y':
3741+
case 'Z':
3742+
3743+
// From rule "ALPHA" (lowercase half)
3744+
case 'a':
3745+
case 'b':
3746+
case 'c':
3747+
case 'd':
3748+
case 'e':
3749+
case 'f':
3750+
case 'g':
3751+
case 'h':
3752+
case 'i':
3753+
case 'j':
3754+
case 'k':
3755+
case 'l':
3756+
case 'm':
3757+
case 'n':
3758+
case 'o':
3759+
case 'p':
3760+
case 'q':
3761+
case 'r':
3762+
case 's':
3763+
case 't':
3764+
case 'u':
3765+
case 'v':
3766+
case 'w':
3767+
case 'x':
3768+
case 'y':
3769+
case 'z':
3770+
3771+
// From rule "DIGIT"
3772+
case '0':
3773+
case '1':
3774+
case '2':
3775+
case '3':
3776+
case '4':
3777+
case '5':
3778+
case '6':
3779+
case '7':
3780+
case '8':
3781+
case '9':
3782+
3783+
// From rule "pct-encoded"
3784+
case '%':
3785+
3786+
// From rule "unreserved"
3787+
case '-':
3788+
case '.':
3789+
case '_':
3790+
case '~':
3791+
3792+
// From rule "gen-delims"
3793+
case ':':
3794+
case '/':
3795+
case '?':
3796+
case '#':
3797+
case '[':
3798+
case ']':
3799+
case '@':
3800+
3801+
// From rule "sub-delims"
3802+
case '!':
3803+
case '$':
3804+
case '&':
3805+
case '\'':
3806+
case '(':
3807+
case ')':
3808+
case '*':
3809+
case '+':
3810+
case ',':
3811+
case ';':
3812+
case '=':
3813+
return XML_TRUE;
3814+
3815+
default:
3816+
return XML_FALSE;
3817+
}
3818+
}
3819+
37073820
/* addBinding() overwrites the value of prefix->binding without checking.
37083821
Therefore one must keep track of the old value outside of addBinding().
37093822
*/
37103823
static enum XML_Error
37113824
addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
37123825
const XML_Char *uri, BINDING **bindingsPtr) {
3826+
// "http://www.w3.org/XML/1998/namespace"
37133827
static const XML_Char xmlNamespace[]
37143828
= {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON,
37153829
ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
@@ -3720,6 +3834,7 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
37203834
ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
37213835
ASCII_e, '\0'};
37223836
static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1;
3837+
// "http://www.w3.org/2000/xmlns/"
37233838
static const XML_Char xmlnsNamespace[]
37243839
= {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
37253840
ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w,
@@ -3760,14 +3875,26 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
37603875
&& (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
37613876
isXMLNS = XML_FALSE;
37623877

3763-
// NOTE: While Expat does not validate namespace URIs against RFC 3986,
3764-
// we have to at least make sure that the XML processor on top of
3765-
// Expat (that is splitting tag names by namespace separator into
3766-
// 2- or 3-tuples (uri-local or uri-local-prefix)) cannot be confused
3767-
// by an attacker putting additional namespace separator characters
3768-
// into namespace declarations. That would be ambiguous and not to
3769-
// be expected.
3770-
if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)) {
3878+
// NOTE: While Expat does not validate namespace URIs against RFC 3986
3879+
// today (and is not REQUIRED to do so with regard to the XML 1.0
3880+
// namespaces specification) we have to at least make sure, that
3881+
// the application on top of Expat (that is likely splitting expanded
3882+
// element names ("qualified names") of form
3883+
// "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
3884+
// in its element handler code) cannot be confused by an attacker
3885+
// putting additional namespace separator characters into namespace
3886+
// declarations. That would be ambiguous and not to be expected.
3887+
//
3888+
// While the HTML API docs of function XML_ParserCreateNS have been
3889+
// advising against use of a namespace separator character that can
3890+
// appear in a URI for >20 years now, some widespread applications
3891+
// are using URI characters (':' (colon) in particular) for a
3892+
// namespace separator, in practice. To keep these applications
3893+
// functional, we only reject namespaces URIs containing the
3894+
// application-chosen namespace separator if the chosen separator
3895+
// is a non-URI character with regard to RFC 3986.
3896+
if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)
3897+
&& ! is_rfc3986_uri_char(uri[len])) {
37713898
return XML_ERROR_SYNTAX;
37723899
}
37733900
}

0 commit comments

Comments
 (0)