Skip to content

Commit 7f0ed48

Browse files
committed
Implement C14N 2.0 as a new canonicalize() function in ElementTree.
Missing features: - prefix renaming in XPath expressions (tag and attribute text is supported) - preservation of original prefixes given redundant namespace declarations
1 parent 3b33264 commit 7f0ed48

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+744
-22
lines changed

Lib/test/test_xml_etree.py

Lines changed: 158 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,15 @@
1111
import io
1212
import locale
1313
import operator
14+
import os
1415
import pickle
1516
import sys
16-
import textwrap
1717
import types
1818
import unittest
1919
import warnings
2020
import weakref
2121

22+
from functools import partial
2223
from itertools import product, islice
2324
from test import support
2425
from test.support import TESTFN, findfile, import_fresh_module, gc_collect, swap_attr
@@ -716,27 +717,6 @@ def end_ns(self, prefix):
716717
('end-ns', ''),
717718
])
718719

719-
def test_custom_builder_only_end_ns(self):
720-
class Builder(list):
721-
def end_ns(self, prefix):
722-
self.append(("end-ns", prefix))
723-
724-
builder = Builder()
725-
parser = ET.XMLParser(target=builder)
726-
parser.feed(textwrap.dedent("""\
727-
<?pi data?>
728-
<!-- comment -->
729-
<root xmlns='namespace' xmlns:p='pns' xmlns:a='ans'>
730-
<a:element key='value'>text</a:element>
731-
<p:element>text</p:element>tail
732-
<empty-element/>
733-
</root>
734-
"""))
735-
self.assertEqual(builder, [
736-
('end-ns', 'a'),
737-
('end-ns', 'p'),
738-
('end-ns', ''),
739-
])
740720

741721
# Element.getchildren() and ElementTree.getiterator() are deprecated.
742722
@checkwarnings(("This method will be removed in future versions. "
@@ -3444,6 +3424,160 @@ def test_correct_import_pyET(self):
34443424
self.assertIsInstance(pyET.Element.__init__, types.FunctionType)
34453425
self.assertIsInstance(pyET.XMLParser.__init__, types.FunctionType)
34463426

3427+
3428+
# --------------------------------------------------------------------
3429+
3430+
def c14n_roundtrip(xml, **options):
3431+
f = io.StringIO()
3432+
pyET.canonicalize(f.write, xml, **options)
3433+
return f.getvalue()
3434+
3435+
3436+
class C14NTest(unittest.TestCase):
3437+
maxDiff = None
3438+
3439+
#
3440+
# simple roundtrip tests (from c14n.py)
3441+
3442+
def test_simple_roundtrip(self):
3443+
# Basics
3444+
self.assertEqual(c14n_roundtrip("<doc/>"), '<doc></doc>')
3445+
self.assertEqual(c14n_roundtrip("<doc xmlns='uri'/>"), # FIXME
3446+
'<doc xmlns="uri"></doc>')
3447+
self.assertEqual(c14n_roundtrip("<prefix:doc xmlns:prefix='uri'/>"),
3448+
'<prefix:doc xmlns:prefix="uri"></prefix:doc>')
3449+
self.assertEqual(c14n_roundtrip("<doc xmlns:prefix='uri'><prefix:bar/></doc>"),
3450+
'<doc><prefix:bar xmlns:prefix="uri"></prefix:bar></doc>')
3451+
self.assertEqual(c14n_roundtrip("<elem xmlns:wsu='http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-utility-1.0.xsd' xmlns:SOAP-ENV='http://schemas.xmlsoap.org/soap/envelope/' />"),
3452+
'<elem></elem>')
3453+
3454+
# C14N spec
3455+
self.assertEqual(c14n_roundtrip("<doc>Hello, world!<!-- Comment 1 --></doc>"),
3456+
'<doc>Hello, world!</doc>')
3457+
self.assertEqual(c14n_roundtrip("<value>&#x32;</value>"),
3458+
'<value>2</value>')
3459+
self.assertEqual(c14n_roundtrip('<compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>'),
3460+
'<compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute>')
3461+
self.assertEqual(c14n_roundtrip('''<compute expr='value>"0" &amp;&amp; value&lt;"10" ?"valid":"error"'>valid</compute>'''),
3462+
'<compute expr="value>&quot;0&quot; &amp;&amp; value&lt;&quot;10&quot; ?&quot;valid&quot;:&quot;error&quot;">valid</compute>')
3463+
self.assertEqual(c14n_roundtrip("<norm attr=' &apos; &#x20;&#13;&#xa;&#9; &apos; '/>"),
3464+
'<norm attr=" \' &#xD;&#xA;&#x9; \' "></norm>')
3465+
self.assertEqual(c14n_roundtrip("<normNames attr=' A &#x20;&#13;&#xa;&#9; B '/>"),
3466+
'<normNames attr=" A &#xD;&#xA;&#x9; B "></normNames>')
3467+
self.assertEqual(c14n_roundtrip("<normId id=' &apos; &#x20;&#13;&#xa;&#9; &apos; '/>"),
3468+
'<normId id=" \' &#xD;&#xA;&#x9; \' "></normId>')
3469+
3470+
# fragments from PJ's tests
3471+
#self.assertEqual(c14n_roundtrip("<doc xmlns:x='http://example.com/x' xmlns='http://example.com/default'><b y:a1='1' xmlns='http://example.com/default' a3='3' xmlns:y='http://example.com/y' y:a2='2'/></doc>"),
3472+
#'<doc xmlns:x="http://example.com/x"><b xmlns:y="http://example.com/y" a3="3" y:a1="1" y:a2="2"></b></doc>')
3473+
3474+
#
3475+
# basic method=c14n tests from the c14n 2.0 specification. uses
3476+
# test files under xmltestdata/c14n-20.
3477+
3478+
# note that this uses generated C14N versions of the standard ET.write
3479+
# output, not roundtripped C14N (see above).
3480+
3481+
def test_xml_c14n2(self):
3482+
datadir = findfile("c14n-20", subdir="xmltestdata")
3483+
full_path = partial(os.path.join, datadir)
3484+
3485+
files = [filename[:-4] for filename in sorted(os.listdir(datadir))
3486+
if filename.endswith('.xml')]
3487+
input_files = [
3488+
filename for filename in files
3489+
if filename.startswith('in')
3490+
]
3491+
configs = {
3492+
filename: {
3493+
# <c14n2:PrefixRewrite>sequential</c14n2:PrefixRewrite>
3494+
option.tag.split('}')[-1]: ((option.text or '').strip(), option)
3495+
for option in ET.parse(full_path(filename) + ".xml").getroot()
3496+
}
3497+
for filename in files
3498+
if filename.startswith('c14n')
3499+
}
3500+
3501+
tests = {
3502+
input_file: [
3503+
(filename, configs[filename.rsplit('_', 1)[-1]])
3504+
for filename in files
3505+
if filename.startswith(f'out_{input_file}_')
3506+
and filename.rsplit('_', 1)[-1] in configs
3507+
]
3508+
for input_file in input_files
3509+
}
3510+
3511+
# Make sure we found all test cases.
3512+
self.assertEqual(30, len([
3513+
output_file for output_files in tests.values()
3514+
for output_file in output_files]))
3515+
3516+
def get_option(config, option_name, default=None):
3517+
return config.get(option_name, (default, ()))[0]
3518+
3519+
for input_file, output_files in tests.items():
3520+
for output_file, config in output_files:
3521+
keep_comments = get_option(
3522+
config, 'IgnoreComments') == 'true' # no, it's right :)
3523+
strip_text = get_option(
3524+
config, 'TrimTextNodes') == 'true'
3525+
rewrite_prefixes = get_option(
3526+
config, 'PrefixRewrite') == 'sequential'
3527+
if 'QNameAware' in config:
3528+
qattrs = [
3529+
f"{{{el.get('NS')}}}{el.get('Name')}"
3530+
for el in config['QNameAware'][1].findall(
3531+
'{http://www.w3.org/2010/xml-c14n2}QualifiedAttr')
3532+
]
3533+
qtags = [
3534+
f"{{{el.get('NS')}}}{el.get('Name')}"
3535+
for el in config['QNameAware'][1].findall(
3536+
'{http://www.w3.org/2010/xml-c14n2}Element')
3537+
]
3538+
else:
3539+
qtags = qattrs = None
3540+
3541+
# Build subtest description from config.
3542+
config_descr = ','.join(
3543+
f"{name}={value or ','.join(c.tag.split('}')[-1] for c in children)}"
3544+
for name, (value, children) in sorted(config.items())
3545+
)
3546+
3547+
with self.subTest(f"{output_file}({config_descr})"):
3548+
if input_file == 'inNsRedecl' and not rewrite_prefixes:
3549+
self.skipTest(
3550+
f"Redeclared namespace handling is not supported in {output_file}")
3551+
if input_file == 'inNsSuperfluous' and not rewrite_prefixes:
3552+
self.skipTest(
3553+
f"Redeclared namespace handling is not supported in {output_file}")
3554+
if 'QNameAware' in config and config['QNameAware'][1].find(
3555+
'{http://www.w3.org/2010/xml-c14n2}XPathElement') is not None:
3556+
self.skipTest(
3557+
f"QName rewriting in XPath text is not supported in {output_file}")
3558+
3559+
out = io.StringIO()
3560+
with open(full_path(input_file + ".xml"), 'r', encoding='utf8') as f:
3561+
if input_file == 'inC14N5':
3562+
# Hack: avoid setting up external entity resolution in the parser.
3563+
with open(full_path('world.txt'), 'r', encoding='utf8') as entity_file:
3564+
f = io.StringIO(f.read().replace('&ent2;', entity_file.read()))
3565+
3566+
ET.canonicalize(
3567+
out.write, file=f,
3568+
comments=keep_comments,
3569+
strip_text=strip_text,
3570+
rewrite_prefixes=rewrite_prefixes,
3571+
qname_aware_tags=qtags, qname_aware_attrs=qattrs)
3572+
text = out.getvalue()
3573+
with open(full_path(output_file + ".xml"), 'r', encoding='utf8') as f:
3574+
expected = f.read()
3575+
if input_file == 'inC14N3':
3576+
# FIXME: cET resolves default attributes but ET does not!
3577+
expected = expected.replace(' attr="default"', '')
3578+
text = text.replace(' attr="default"', '')
3579+
self.assertEqual(expected, text)
3580+
34473581
# --------------------------------------------------------------------
34483582

34493583

@@ -3476,6 +3610,8 @@ def test_main(module=None):
34763610
XMLParserTest,
34773611
XMLPullParserTest,
34783612
BugsTest,
3613+
KeywordArgsTest,
3614+
C14NTest,
34793615
]
34803616

34813617
# These tests will only run for the pure-Python version that doesn't import
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
2+
<c14n2:IgnoreComments>true</c14n2:IgnoreComments>
3+
</dsig:CanonicalizationMethod>
4+
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" Algorithm="http://www.w3.org/2010/xml-c14n2">
2+
</dsig:CanonicalizationMethod>
3+
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
2+
<c14n2:PrefixRewrite>sequential</c14n2:PrefixRewrite>
3+
</dsig:CanonicalizationMethod>
4+
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
2+
<c14n2:PrefixRewrite>sequential</c14n2:PrefixRewrite>
3+
<c14n2:QNameAware>
4+
<c14n2:QualifiedAttr Name="type" NS="http://www.w3.org/2001/XMLSchema-instance"/>
5+
</c14n2:QNameAware>
6+
</dsig:CanonicalizationMethod>
7+
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
2+
<c14n2:PrefixRewrite>sequential</c14n2:PrefixRewrite>
3+
<c14n2:QNameAware>
4+
<c14n2:Element Name="bar" NS="http://a"/>
5+
<c14n2:XPathElement Name="IncludedXPath" NS="http://www.w3.org/2010/xmldsig2#"/>
6+
</c14n2:QNameAware>
7+
</dsig:CanonicalizationMethod>
8+
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
2+
<c14n2:QNameAware>
3+
<c14n2:QualifiedAttr Name="type" NS="http://www.w3.org/2001/XMLSchema-instance"/>
4+
</c14n2:QNameAware>
5+
</dsig:CanonicalizationMethod>
6+
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
2+
<c14n2:QNameAware>
3+
<c14n2:Element Name="bar" NS="http://a"/>
4+
</c14n2:QNameAware>
5+
</dsig:CanonicalizationMethod>
6+
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
2+
<c14n2:QNameAware>
3+
<c14n2:Element Name="bar" NS="http://a"/>
4+
<c14n2:XPathElement Name="IncludedXPath" NS="http://www.w3.org/2010/xmldsig2#"/>
5+
</c14n2:QNameAware>
6+
</dsig:CanonicalizationMethod>
7+
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
2+
<c14n2:TrimTextNodes>true</c14n2:TrimTextNodes>
3+
</dsig:CanonicalizationMethod>
4+

Lib/test/xmltestdata/c14n-20/doc.dtd

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
3+
<!ELEMENT doc (#PCDATA)>
4+
5+
6+

Lib/test/xmltestdata/c14n-20/doc.xsl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
<?xml version="1.0"?>
2+
<xsl:stylesheet version="1.0"
3+
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
4+
>
5+
</xsl:stylesheet>
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
<?xml version="1.0"?>
2+
3+
<?xml-stylesheet href="doc.xsl"
4+
type="text/xsl" ?>
5+
6+
<!DOCTYPE doc SYSTEM "doc.dtd">
7+
8+
<doc>Hello, world!<!-- Comment 1 --></doc>
9+
10+
<?pi-without-data ?>
11+
12+
<!-- Comment 2 -->
13+
14+
<!-- Comment 3 -->
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
<doc>
2+
<clean> </clean>
3+
<dirty> A B </dirty>
4+
<mixed>
5+
A
6+
<clean> </clean>
7+
B
8+
<dirty> A B </dirty>
9+
C
10+
</mixed>
11+
</doc>
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
<!DOCTYPE doc [<!ATTLIST e9 attr CDATA "default">]>
2+
<doc>
3+
<e1 />
4+
<e2 ></e2>
5+
<e3 name = "elem3" id="elem3" />
6+
<e4 name="elem4" id="elem4" ></e4>
7+
<e5 a:attr="out" b:attr="sorted" attr2="all" attr="I'm"
8+
xmlns:b="http://www.ietf.org"
9+
xmlns:a="http://www.w3.org"
10+
xmlns="http://example.org"/>
11+
<e6 xmlns="" xmlns:a="http://www.w3.org">
12+
<e7 xmlns="http://www.ietf.org">
13+
<e8 xmlns="" xmlns:a="http://www.w3.org">
14+
<e9 xmlns="" xmlns:a="http://www.ietf.org"/>
15+
</e8>
16+
</e7>
17+
</e6>
18+
</doc>
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
<!DOCTYPE doc [
2+
<!ATTLIST normId id ID #IMPLIED>
3+
<!ATTLIST normNames attr NMTOKENS #IMPLIED>
4+
]>
5+
<doc>
6+
<text>First line&#x0d;&#10;Second line</text>
7+
<value>&#x32;</value>
8+
<compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
9+
<compute expr='value>"0" &amp;&amp; value&lt;"10" ?"valid":"error"'>valid</compute>
10+
<norm attr=' &apos; &#x20;&#13;&#xa;&#9; &apos; '/>
11+
<normNames attr=' A &#x20;&#13;&#xa;&#9; B '/>
12+
<normId id=' &apos;&#x20;&#13;&#xa;&#9; &apos; '/>
13+
</doc>
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
<!DOCTYPE doc [
2+
<!ATTLIST doc attrExtEnt CDATA #IMPLIED>
3+
<!ENTITY ent1 "Hello">
4+
<!ENTITY ent2 SYSTEM "world.txt">
5+
<!ENTITY entExt SYSTEM "earth.gif" NDATA gif>
6+
<!NOTATION gif SYSTEM "viewgif.exe">
7+
]>
8+
<doc attrExtEnt="entExt">
9+
&ent1;, &ent2;!
10+
</doc>
11+
12+
<!-- Let world.txt contain "world" (excluding the quotes) -->
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
<?xml version="1.0" encoding="ISO-8859-1"?>
2+
<doc>&#169;</doc>
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
<a:foo xmlns:a="http://a" xmlns:b="http://b" xmlns:child="http://c" xmlns:soap-env="http://schemas.xmlsoap.org/wsdl/soap/" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
2+
<a:bar>xsd:string</a:bar>
3+
<dsig2:IncludedXPath xmlns:dsig2="http://www.w3.org/2010/xmldsig2#">/soap-env:body/child::b:foo[@att1 != "c:val" and @att2 != 'xsd:string']</dsig2:IncludedXPath>
4+
</a:foo>
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
<foo xmlns:a="http://a" xmlns:b="http://b">
2+
<b:bar b:att1="val" att2="val"/>
3+
</foo>
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
<a:foo xmlns:a="http://a" xmlns:b="http://b" xmlns:c="http://c">
2+
<b:bar/>
3+
<b:bar/>
4+
<b:bar/>
5+
<a:bar b:att1="val"/>
6+
</a:foo>
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
<foo xmlns:a="http://z3" xmlns:b="http://z2" a:att1="val1" b:att2="val2">
2+
<bar xmlns="http://z0" xmlns:a="http://z2" a:att1="val1" b:att2="val2" xmlns:b="http://z3" />
3+
</foo>
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
<a:foo xmlns:a="http://z3" xmlns:b="http://z2" b:att1="val1" c:att3="val3" b:att2="val2" xmlns:c="http://z1" xmlns:d="http://z0">
2+
<c:bar/>
3+
<c:bar d:att3="val3"/>
4+
</a:foo>
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
<foo xmlns:a="http://z0" xmlns:b="http://z0" a:att1="val1" b:att2="val2" xmlns="http://z0">
2+
<c:bar xmlns:a="http://z0" xmlns:c="http://z0" c:att3="val3"/>
3+
<d:bar xmlns:d="http://z0"/>
4+
</foo>
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
<foo xmlns="http://z0" xml:id="23">
2+
<bar xsi:type="xsd:string" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">data</bar>
3+
</foo>

0 commit comments

Comments
 (0)