|
12 | 12 | import itertools
|
13 | 13 | import locale
|
14 | 14 | import operator
|
| 15 | +import os |
15 | 16 | import pickle
|
16 | 17 | import sys
|
17 | 18 | import textwrap
|
|
20 | 21 | import warnings
|
21 | 22 | import weakref
|
22 | 23 |
|
| 24 | +from functools import partial |
23 | 25 | from itertools import product, islice
|
24 | 26 | from test import support
|
25 | 27 | from test.support import TESTFN, findfile, import_fresh_module, gc_collect, swap_attr
|
@@ -3527,6 +3529,231 @@ def test_correct_import_pyET(self):
|
3527 | 3529 | self.assertIsInstance(pyET.Element.__init__, types.FunctionType)
|
3528 | 3530 | self.assertIsInstance(pyET.XMLParser.__init__, types.FunctionType)
|
3529 | 3531 |
|
| 3532 | + |
| 3533 | +# -------------------------------------------------------------------- |
| 3534 | + |
| 3535 | +def c14n_roundtrip(xml, **options): |
| 3536 | + return pyET.canonicalize(xml, **options) |
| 3537 | + |
| 3538 | + |
| 3539 | +class C14NTest(unittest.TestCase): |
| 3540 | + maxDiff = None |
| 3541 | + |
| 3542 | + # |
| 3543 | + # simple roundtrip tests (from c14n.py) |
| 3544 | + |
| 3545 | + def test_simple_roundtrip(self): |
| 3546 | + # Basics |
| 3547 | + self.assertEqual(c14n_roundtrip("<doc/>"), '<doc></doc>') |
| 3548 | + self.assertEqual(c14n_roundtrip("<doc xmlns='uri'/>"), # FIXME |
| 3549 | + '<doc xmlns="uri"></doc>') |
| 3550 | + self.assertEqual(c14n_roundtrip("<prefix:doc xmlns:prefix='uri'/>"), |
| 3551 | + '<prefix:doc xmlns:prefix="uri"></prefix:doc>') |
| 3552 | + self.assertEqual(c14n_roundtrip("<doc xmlns:prefix='uri'><prefix:bar/></doc>"), |
| 3553 | + '<doc><prefix:bar xmlns:prefix="uri"></prefix:bar></doc>') |
| 3554 | + self.assertEqual(c14n_roundtrip("<elem xmlns:wsu='http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-utility-1.0.xsd' xmlns:SOAP-ENV='http://schemas.xmlsoap.org/soap/envelope/' />"), |
| 3555 | + '<elem></elem>') |
| 3556 | + |
| 3557 | + # C14N spec |
| 3558 | + self.assertEqual(c14n_roundtrip("<doc>Hello, world!<!-- Comment 1 --></doc>"), |
| 3559 | + '<doc>Hello, world!</doc>') |
| 3560 | + self.assertEqual(c14n_roundtrip("<value>2</value>"), |
| 3561 | + '<value>2</value>') |
| 3562 | + self.assertEqual(c14n_roundtrip('<compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>'), |
| 3563 | + '<compute>value>"0" && value<"10" ?"valid":"error"</compute>') |
| 3564 | + self.assertEqual(c14n_roundtrip('''<compute expr='value>"0" && value<"10" ?"valid":"error"'>valid</compute>'''), |
| 3565 | + '<compute expr="value>"0" && value<"10" ?"valid":"error"">valid</compute>') |
| 3566 | + self.assertEqual(c14n_roundtrip("<norm attr=' '   
	 ' '/>"), |
| 3567 | + '<norm attr=" \' 
	 \' "></norm>') |
| 3568 | + self.assertEqual(c14n_roundtrip("<normNames attr=' A   
	 B '/>"), |
| 3569 | + '<normNames attr=" A 
	 B "></normNames>') |
| 3570 | + self.assertEqual(c14n_roundtrip("<normId id=' '   
	 ' '/>"), |
| 3571 | + '<normId id=" \' 
	 \' "></normId>') |
| 3572 | + |
| 3573 | + # fragments from PJ's tests |
| 3574 | + #self.assertEqual(c14n_roundtrip("<doc xmlns:x='http://example.com/x' xmlns='http://example.com/default'><b y:a1='1' xmlns='http://example.com/default' a3='3' xmlns:y='http://example.com/y' y:a2='2'/></doc>"), |
| 3575 | + #'<doc xmlns:x="http://example.com/x"><b xmlns:y="http://example.com/y" a3="3" y:a1="1" y:a2="2"></b></doc>') |
| 3576 | + |
| 3577 | + def test_c14n_exclusion(self): |
| 3578 | + xml = textwrap.dedent("""\ |
| 3579 | + <root xmlns:x="http://example.com/x"> |
| 3580 | + <a x:attr="attrx"> |
| 3581 | + <b>abtext</b> |
| 3582 | + </a> |
| 3583 | + <b>btext</b> |
| 3584 | + <c> |
| 3585 | + <x:d>dtext</x:d> |
| 3586 | + </c> |
| 3587 | + </root> |
| 3588 | + """) |
| 3589 | + self.assertEqual( |
| 3590 | + c14n_roundtrip(xml, strip_text=True), |
| 3591 | + '<root>' |
| 3592 | + '<a xmlns:x="http://example.com/x" x:attr="attrx"><b>abtext</b></a>' |
| 3593 | + '<b>btext</b>' |
| 3594 | + '<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>' |
| 3595 | + '</root>') |
| 3596 | + self.assertEqual( |
| 3597 | + c14n_roundtrip(xml, strip_text=True, exclude_attrs=['{http://example.com/x}attr']), |
| 3598 | + '<root>' |
| 3599 | + '<a><b>abtext</b></a>' |
| 3600 | + '<b>btext</b>' |
| 3601 | + '<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>' |
| 3602 | + '</root>') |
| 3603 | + self.assertEqual( |
| 3604 | + c14n_roundtrip(xml, strip_text=True, exclude_tags=['{http://example.com/x}d']), |
| 3605 | + '<root>' |
| 3606 | + '<a xmlns:x="http://example.com/x" x:attr="attrx"><b>abtext</b></a>' |
| 3607 | + '<b>btext</b>' |
| 3608 | + '<c></c>' |
| 3609 | + '</root>') |
| 3610 | + self.assertEqual( |
| 3611 | + c14n_roundtrip(xml, strip_text=True, exclude_attrs=['{http://example.com/x}attr'], |
| 3612 | + exclude_tags=['{http://example.com/x}d']), |
| 3613 | + '<root>' |
| 3614 | + '<a><b>abtext</b></a>' |
| 3615 | + '<b>btext</b>' |
| 3616 | + '<c></c>' |
| 3617 | + '</root>') |
| 3618 | + self.assertEqual( |
| 3619 | + c14n_roundtrip(xml, strip_text=True, exclude_tags=['a', 'b']), |
| 3620 | + '<root>' |
| 3621 | + '<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>' |
| 3622 | + '</root>') |
| 3623 | + self.assertEqual( |
| 3624 | + c14n_roundtrip(xml, exclude_tags=['a', 'b']), |
| 3625 | + '<root>\n' |
| 3626 | + ' \n' |
| 3627 | + ' \n' |
| 3628 | + ' <c>\n' |
| 3629 | + ' <x:d xmlns:x="http://example.com/x">dtext</x:d>\n' |
| 3630 | + ' </c>\n' |
| 3631 | + '</root>') |
| 3632 | + self.assertEqual( |
| 3633 | + c14n_roundtrip(xml, strip_text=True, exclude_tags=['{http://example.com/x}d', 'b']), |
| 3634 | + '<root>' |
| 3635 | + '<a xmlns:x="http://example.com/x" x:attr="attrx"></a>' |
| 3636 | + '<c></c>' |
| 3637 | + '</root>') |
| 3638 | + self.assertEqual( |
| 3639 | + c14n_roundtrip(xml, exclude_tags=['{http://example.com/x}d', 'b']), |
| 3640 | + '<root>\n' |
| 3641 | + ' <a xmlns:x="http://example.com/x" x:attr="attrx">\n' |
| 3642 | + ' \n' |
| 3643 | + ' </a>\n' |
| 3644 | + ' \n' |
| 3645 | + ' <c>\n' |
| 3646 | + ' \n' |
| 3647 | + ' </c>\n' |
| 3648 | + '</root>') |
| 3649 | + |
| 3650 | + # |
| 3651 | + # basic method=c14n tests from the c14n 2.0 specification. uses |
| 3652 | + # test files under xmltestdata/c14n-20. |
| 3653 | + |
| 3654 | + # note that this uses generated C14N versions of the standard ET.write |
| 3655 | + # output, not roundtripped C14N (see above). |
| 3656 | + |
| 3657 | + def test_xml_c14n2(self): |
| 3658 | + datadir = findfile("c14n-20", subdir="xmltestdata") |
| 3659 | + full_path = partial(os.path.join, datadir) |
| 3660 | + |
| 3661 | + files = [filename[:-4] for filename in sorted(os.listdir(datadir)) |
| 3662 | + if filename.endswith('.xml')] |
| 3663 | + input_files = [ |
| 3664 | + filename for filename in files |
| 3665 | + if filename.startswith('in') |
| 3666 | + ] |
| 3667 | + configs = { |
| 3668 | + filename: { |
| 3669 | + # <c14n2:PrefixRewrite>sequential</c14n2:PrefixRewrite> |
| 3670 | + option.tag.split('}')[-1]: ((option.text or '').strip(), option) |
| 3671 | + for option in ET.parse(full_path(filename) + ".xml").getroot() |
| 3672 | + } |
| 3673 | + for filename in files |
| 3674 | + if filename.startswith('c14n') |
| 3675 | + } |
| 3676 | + |
| 3677 | + tests = { |
| 3678 | + input_file: [ |
| 3679 | + (filename, configs[filename.rsplit('_', 1)[-1]]) |
| 3680 | + for filename in files |
| 3681 | + if filename.startswith(f'out_{input_file}_') |
| 3682 | + and filename.rsplit('_', 1)[-1] in configs |
| 3683 | + ] |
| 3684 | + for input_file in input_files |
| 3685 | + } |
| 3686 | + |
| 3687 | + # Make sure we found all test cases. |
| 3688 | + self.assertEqual(30, len([ |
| 3689 | + output_file for output_files in tests.values() |
| 3690 | + for output_file in output_files])) |
| 3691 | + |
| 3692 | + def get_option(config, option_name, default=None): |
| 3693 | + return config.get(option_name, (default, ()))[0] |
| 3694 | + |
| 3695 | + for input_file, output_files in tests.items(): |
| 3696 | + for output_file, config in output_files: |
| 3697 | + keep_comments = get_option( |
| 3698 | + config, 'IgnoreComments') == 'true' # no, it's right :) |
| 3699 | + strip_text = get_option( |
| 3700 | + config, 'TrimTextNodes') == 'true' |
| 3701 | + rewrite_prefixes = get_option( |
| 3702 | + config, 'PrefixRewrite') == 'sequential' |
| 3703 | + if 'QNameAware' in config: |
| 3704 | + qattrs = [ |
| 3705 | + f"{{{el.get('NS')}}}{el.get('Name')}" |
| 3706 | + for el in config['QNameAware'][1].findall( |
| 3707 | + '{http://www.w3.org/2010/xml-c14n2}QualifiedAttr') |
| 3708 | + ] |
| 3709 | + qtags = [ |
| 3710 | + f"{{{el.get('NS')}}}{el.get('Name')}" |
| 3711 | + for el in config['QNameAware'][1].findall( |
| 3712 | + '{http://www.w3.org/2010/xml-c14n2}Element') |
| 3713 | + ] |
| 3714 | + else: |
| 3715 | + qtags = qattrs = None |
| 3716 | + |
| 3717 | + # Build subtest description from config. |
| 3718 | + config_descr = ','.join( |
| 3719 | + f"{name}={value or ','.join(c.tag.split('}')[-1] for c in children)}" |
| 3720 | + for name, (value, children) in sorted(config.items()) |
| 3721 | + ) |
| 3722 | + |
| 3723 | + with self.subTest(f"{output_file}({config_descr})"): |
| 3724 | + if input_file == 'inNsRedecl' and not rewrite_prefixes: |
| 3725 | + self.skipTest( |
| 3726 | + f"Redeclared namespace handling is not supported in {output_file}") |
| 3727 | + if input_file == 'inNsSuperfluous' and not rewrite_prefixes: |
| 3728 | + self.skipTest( |
| 3729 | + f"Redeclared namespace handling is not supported in {output_file}") |
| 3730 | + if 'QNameAware' in config and config['QNameAware'][1].find( |
| 3731 | + '{http://www.w3.org/2010/xml-c14n2}XPathElement') is not None: |
| 3732 | + self.skipTest( |
| 3733 | + f"QName rewriting in XPath text is not supported in {output_file}") |
| 3734 | + |
| 3735 | + f = full_path(input_file + ".xml") |
| 3736 | + if input_file == 'inC14N5': |
| 3737 | + # Hack: avoid setting up external entity resolution in the parser. |
| 3738 | + with open(full_path('world.txt'), 'rb') as entity_file: |
| 3739 | + with open(f, 'rb') as f: |
| 3740 | + f = io.BytesIO(f.read().replace(b'&ent2;', entity_file.read())) |
| 3741 | + |
| 3742 | + text = ET.canonicalize( |
| 3743 | + from_file=f, |
| 3744 | + with_comments=keep_comments, |
| 3745 | + strip_text=strip_text, |
| 3746 | + rewrite_prefixes=rewrite_prefixes, |
| 3747 | + qname_aware_tags=qtags, qname_aware_attrs=qattrs) |
| 3748 | + |
| 3749 | + with open(full_path(output_file + ".xml"), 'r', encoding='utf8') as f: |
| 3750 | + expected = f.read() |
| 3751 | + if input_file == 'inC14N3': |
| 3752 | + # FIXME: cET resolves default attributes but ET does not! |
| 3753 | + expected = expected.replace(' attr="default"', '') |
| 3754 | + text = text.replace(' attr="default"', '') |
| 3755 | + self.assertEqual(expected, text) |
| 3756 | + |
3530 | 3757 | # --------------------------------------------------------------------
|
3531 | 3758 |
|
3532 | 3759 |
|
@@ -3559,6 +3786,8 @@ def test_main(module=None):
|
3559 | 3786 | XMLParserTest,
|
3560 | 3787 | XMLPullParserTest,
|
3561 | 3788 | BugsTest,
|
| 3789 | + KeywordArgsTest, |
| 3790 | + C14NTest, |
3562 | 3791 | ]
|
3563 | 3792 |
|
3564 | 3793 | # These tests will only run for the pure-Python version that doesn't import
|
|
0 commit comments