Skip to content

Commit c6a7bdb

Browse files
authored
bpo-20928: support base-URL and recursive includes in etree.ElementInclude (#5723)
* bpo-20928: bring elementtree's XInclude support en-par with the implementation in lxml by adding support for recursive includes and a base-URL. * bpo-20928: Support xincluding the same file multiple times, just not recursively. * bpo-20928: Add 'max_depth' parameter to xinclude that limits the maximum recursion depth to 6 by default. * Add news entry for updated ElementInclude support
1 parent ded8888 commit c6a7bdb

File tree

3 files changed

+132
-7
lines changed

3 files changed

+132
-7
lines changed

Lib/test/test_xml_etree.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1668,6 +1668,17 @@ def test_unknown_event(self):
16681668
</document>
16691669
""".format(html.escape(SIMPLE_XMLFILE, True))
16701670

1671+
XINCLUDE["include_c1_repeated.xml"] = """\
1672+
<?xml version='1.0'?>
1673+
<document xmlns:xi="http://www.w3.org/2001/XInclude">
1674+
<p>The following is the source code of Recursive1.xml:</p>
1675+
<xi:include href="C1.xml"/>
1676+
<xi:include href="C1.xml"/>
1677+
<xi:include href="C1.xml"/>
1678+
<xi:include href="C1.xml"/>
1679+
</document>
1680+
"""
1681+
16711682
#
16721683
# badly formatted xi:include tags
16731684

@@ -1688,6 +1699,31 @@ def test_unknown_event(self):
16881699
</div>
16891700
"""
16901701

1702+
XINCLUDE["Recursive1.xml"] = """\
1703+
<?xml version='1.0'?>
1704+
<document xmlns:xi="http://www.w3.org/2001/XInclude">
1705+
<p>The following is the source code of Recursive2.xml:</p>
1706+
<xi:include href="Recursive2.xml"/>
1707+
</document>
1708+
"""
1709+
1710+
XINCLUDE["Recursive2.xml"] = """\
1711+
<?xml version='1.0'?>
1712+
<document xmlns:xi="http://www.w3.org/2001/XInclude">
1713+
<p>The following is the source code of Recursive3.xml:</p>
1714+
<xi:include href="Recursive3.xml"/>
1715+
</document>
1716+
"""
1717+
1718+
XINCLUDE["Recursive3.xml"] = """\
1719+
<?xml version='1.0'?>
1720+
<document xmlns:xi="http://www.w3.org/2001/XInclude">
1721+
<p>The following is the source code of Recursive1.xml:</p>
1722+
<xi:include href="Recursive1.xml"/>
1723+
</document>
1724+
"""
1725+
1726+
16911727
class XIncludeTest(unittest.TestCase):
16921728

16931729
def xinclude_loader(self, href, parse="xml", encoding=None):
@@ -1789,6 +1825,13 @@ def test_xinclude(self):
17891825
' </ns0:include>\n'
17901826
'</div>') # C5
17911827

1828+
def test_xinclude_repeated(self):
1829+
from xml.etree import ElementInclude
1830+
1831+
document = self.xinclude_loader("include_c1_repeated.xml")
1832+
ElementInclude.include(document, self.xinclude_loader)
1833+
self.assertEqual(1+4*2, len(document.findall(".//p")))
1834+
17921835
def test_xinclude_failures(self):
17931836
from xml.etree import ElementInclude
17941837

@@ -1821,6 +1864,45 @@ def test_xinclude_failures(self):
18211864
"xi:fallback tag must be child of xi:include "
18221865
"('{http://www.w3.org/2001/XInclude}fallback')")
18231866

1867+
# Test infinitely recursive includes.
1868+
document = self.xinclude_loader("Recursive1.xml")
1869+
with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1870+
ElementInclude.include(document, self.xinclude_loader)
1871+
self.assertEqual(str(cm.exception),
1872+
"recursive include of Recursive2.xml")
1873+
1874+
# Test 'max_depth' limitation.
1875+
document = self.xinclude_loader("Recursive1.xml")
1876+
with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1877+
ElementInclude.include(document, self.xinclude_loader, max_depth=None)
1878+
self.assertEqual(str(cm.exception),
1879+
"recursive include of Recursive2.xml")
1880+
1881+
document = self.xinclude_loader("Recursive1.xml")
1882+
with self.assertRaises(ElementInclude.LimitedRecursiveIncludeError) as cm:
1883+
ElementInclude.include(document, self.xinclude_loader, max_depth=0)
1884+
self.assertEqual(str(cm.exception),
1885+
"maximum xinclude depth reached when including file Recursive2.xml")
1886+
1887+
document = self.xinclude_loader("Recursive1.xml")
1888+
with self.assertRaises(ElementInclude.LimitedRecursiveIncludeError) as cm:
1889+
ElementInclude.include(document, self.xinclude_loader, max_depth=1)
1890+
self.assertEqual(str(cm.exception),
1891+
"maximum xinclude depth reached when including file Recursive3.xml")
1892+
1893+
document = self.xinclude_loader("Recursive1.xml")
1894+
with self.assertRaises(ElementInclude.LimitedRecursiveIncludeError) as cm:
1895+
ElementInclude.include(document, self.xinclude_loader, max_depth=2)
1896+
self.assertEqual(str(cm.exception),
1897+
"maximum xinclude depth reached when including file Recursive1.xml")
1898+
1899+
document = self.xinclude_loader("Recursive1.xml")
1900+
with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1901+
ElementInclude.include(document, self.xinclude_loader, max_depth=3)
1902+
self.assertEqual(str(cm.exception),
1903+
"recursive include of Recursive2.xml")
1904+
1905+
18241906
# --------------------------------------------------------------------
18251907
# reported bugs
18261908

Lib/xml/etree/ElementInclude.py

Lines changed: 49 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -50,18 +50,28 @@
5050

5151
import copy
5252
from . import ElementTree
53+
from urllib.parse import urljoin
5354

5455
XINCLUDE = "{http://www.w3.org/2001/XInclude}"
5556

5657
XINCLUDE_INCLUDE = XINCLUDE + "include"
5758
XINCLUDE_FALLBACK = XINCLUDE + "fallback"
5859

60+
# For security reasons, the inclusion depth is limited to this read-only value by default.
61+
DEFAULT_MAX_INCLUSION_DEPTH = 6
62+
63+
5964
##
6065
# Fatal include error.
6166

6267
class FatalIncludeError(SyntaxError):
6368
pass
6469

70+
71+
class LimitedRecursiveIncludeError(FatalIncludeError):
72+
pass
73+
74+
6575
##
6676
# Default loader. This loader reads an included resource from disk.
6777
#
@@ -92,28 +102,58 @@ def default_loader(href, parse, encoding=None):
92102
# @param loader Optional resource loader. If omitted, it defaults
93103
# to {@link default_loader}. If given, it should be a callable
94104
# that implements the same interface as <b>default_loader</b>.
105+
# @param base_url The base URL of the original file, to resolve
106+
# relative include file references.
107+
# @param max_depth The maximum number of recursive inclusions.
108+
# Limited to reduce the risk of malicious content explosion.
109+
# Pass a negative value to disable the limitation.
110+
# @throws LimitedRecursiveIncludeError If the {@link max_depth} was exceeded.
95111
# @throws FatalIncludeError If the function fails to include a given
96112
# resource, or if the tree contains malformed XInclude elements.
97-
# @throws OSError If the function fails to load a given resource.
113+
# @throws IOError If the function fails to load a given resource.
114+
# @returns the node or its replacement if it was an XInclude node
98115

99-
def include(elem, loader=None):
116+
def include(elem, loader=None, base_url=None,
117+
max_depth=DEFAULT_MAX_INCLUSION_DEPTH):
118+
if max_depth is None:
119+
max_depth = -1
120+
elif max_depth < 0:
121+
raise ValueError("expected non-negative depth or None for 'max_depth', got %r" % max_depth)
122+
123+
if hasattr(elem, 'getroot'):
124+
elem = elem.getroot()
100125
if loader is None:
101126
loader = default_loader
127+
128+
_include(elem, loader, base_url, max_depth, set())
129+
130+
131+
def _include(elem, loader, base_url, max_depth, _parent_hrefs):
102132
# look for xinclude elements
103133
i = 0
104134
while i < len(elem):
105135
e = elem[i]
106136
if e.tag == XINCLUDE_INCLUDE:
107137
# process xinclude directive
108138
href = e.get("href")
139+
if base_url:
140+
href = urljoin(base_url, href)
109141
parse = e.get("parse", "xml")
110142
if parse == "xml":
143+
if href in _parent_hrefs:
144+
raise FatalIncludeError("recursive include of %s" % href)
145+
if max_depth == 0:
146+
raise LimitedRecursiveIncludeError(
147+
"maximum xinclude depth reached when including file %s" % href)
148+
_parent_hrefs.add(href)
111149
node = loader(href, parse)
112150
if node is None:
113151
raise FatalIncludeError(
114152
"cannot load %r as %r" % (href, parse)
115153
)
116-
node = copy.copy(node)
154+
node = copy.copy(node) # FIXME: this makes little sense with recursive includes
155+
_include(node, loader, href, max_depth - 1, _parent_hrefs)
156+
_parent_hrefs.remove(href)
117157
if e.tail:
118158
node.tail = (node.tail or "") + e.tail
119159
elem[i] = node
@@ -123,11 +163,13 @@ def include(elem, loader=None):
123163
raise FatalIncludeError(
124164
"cannot load %r as %r" % (href, parse)
125165
)
166+
if e.tail:
167+
text += e.tail
126168
if i:
127169
node = elem[i-1]
128-
node.tail = (node.tail or "") + text + (e.tail or "")
170+
node.tail = (node.tail or "") + text
129171
else:
130-
elem.text = (elem.text or "") + text + (e.tail or "")
172+
elem.text = (elem.text or "") + text
131173
del elem[i]
132174
continue
133175
else:
@@ -139,5 +181,5 @@ def include(elem, loader=None):
139181
"xi:fallback tag must be child of xi:include (%r)" % e.tag
140182
)
141183
else:
142-
include(e, loader)
143-
i = i + 1
184+
_include(e, loader, base_url, max_depth, _parent_hrefs)
185+
i += 1
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
ElementTree supports recursive XInclude processing. Patch by Stefan Behnel.

0 commit comments

Comments
 (0)