Skip to content

Commit dde3eeb

Browse files
authored
bpo-36676: Namespace prefix aware parsing support for the ET.XMLParser target (GH-12885)
* bpo-36676: Implement namespace prefix aware parsing support for the XMLParser target in ElementTree.
1 parent 43851a2 commit dde3eeb

File tree

5 files changed

+258
-30
lines changed

5 files changed

+258
-30
lines changed

Doc/library/xml.etree.elementtree.rst

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1086,7 +1086,7 @@ TreeBuilder Objects
10861086

10871087

10881088
In addition, a custom :class:`TreeBuilder` object can provide the
1089-
following method:
1089+
following methods:
10901090

10911091
.. method:: doctype(name, pubid, system)
10921092

@@ -1096,6 +1096,23 @@ TreeBuilder Objects
10961096

10971097
.. versionadded:: 3.2
10981098

1099+
.. method:: start_ns(prefix, uri)
1100+
1101+
Is called whenever the parser encounters a new namespace declaration,
1102+
before the ``start()`` callback for the opening element that defines it.
1103+
*prefix* is ``''`` for the default namespace and the declared
1104+
namespace prefix name otherwise. *uri* is the namespace URI.
1105+
1106+
.. versionadded:: 3.8
1107+
1108+
.. method:: end_ns(prefix)
1109+
1110+
Is called after the ``end()`` callback of an element that declared
1111+
a namespace prefix mapping, with the name of the *prefix* that went
1112+
out of scope.
1113+
1114+
.. versionadded:: 3.8
1115+
10991116

11001117
.. _elementtree-xmlparser-objects:
11011118

@@ -1131,7 +1148,8 @@ XMLParser Objects
11311148

11321149
:meth:`XMLParser.feed` calls *target*\'s ``start(tag, attrs_dict)`` method
11331150
for each opening tag, its ``end(tag)`` method for each closing tag, and data
1134-
is processed by method ``data(data)``. :meth:`XMLParser.close` calls
1151+
is processed by method ``data(data)``. For further supported callback
1152+
methods, see the :class:`TreeBuilder` class. :meth:`XMLParser.close` calls
11351153
*target*\'s method ``close()``. :class:`XMLParser` can be used not only for
11361154
building a tree structure. This is an example of counting the maximum depth
11371155
of an XML file::

Lib/test/test_xml_etree.py

Lines changed: 88 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,13 @@
1414
import operator
1515
import pickle
1616
import sys
17+
import textwrap
1718
import types
1819
import unittest
1920
import warnings
2021
import weakref
2122

22-
from itertools import product
23+
from itertools import product, islice
2324
from test import support
2425
from test.support import TESTFN, findfile, import_fresh_module, gc_collect, swap_attr
2526

@@ -694,12 +695,17 @@ def pi(self, target, data):
694695
self.append(("pi", target, data))
695696
def comment(self, data):
696697
self.append(("comment", data))
698+
def start_ns(self, prefix, uri):
699+
self.append(("start-ns", prefix, uri))
700+
def end_ns(self, prefix):
701+
self.append(("end-ns", prefix))
697702
builder = Builder()
698703
parser = ET.XMLParser(target=builder)
699704
parser.feed(data)
700705
self.assertEqual(builder, [
701706
('pi', 'pi', 'data'),
702707
('comment', ' comment '),
708+
('start-ns', '', 'namespace'),
703709
('start', '{namespace}root'),
704710
('start', '{namespace}element'),
705711
('end', '{namespace}element'),
@@ -708,8 +714,30 @@ def comment(self, data):
708714
('start', '{namespace}empty-element'),
709715
('end', '{namespace}empty-element'),
710716
('end', '{namespace}root'),
717+
('end-ns', ''),
711718
])
712719

720+
def test_custom_builder_only_end_ns(self):
721+
class Builder(list):
722+
def end_ns(self, prefix):
723+
self.append(("end-ns", prefix))
724+
725+
builder = Builder()
726+
parser = ET.XMLParser(target=builder)
727+
parser.feed(textwrap.dedent("""\
728+
<?pi data?>
729+
<!-- comment -->
730+
<root xmlns='namespace' xmlns:p='pns' xmlns:a='ans'>
731+
<a:element key='value'>text</a:element>
732+
<p:element>text</p:element>tail
733+
<empty-element/>
734+
</root>
735+
"""))
736+
self.assertEqual(builder, [
737+
('end-ns', 'a'),
738+
('end-ns', 'p'),
739+
('end-ns', ''),
740+
])
713741

714742
# Element.getchildren() and ElementTree.getiterator() are deprecated.
715743
@checkwarnings(("This method will be removed in future versions. "
@@ -1194,14 +1222,19 @@ def _feed(self, parser, data, chunk_size=None):
11941222
for i in range(0, len(data), chunk_size):
11951223
parser.feed(data[i:i+chunk_size])
11961224

1197-
def assert_events(self, parser, expected):
1225+
def assert_events(self, parser, expected, max_events=None):
11981226
self.assertEqual(
11991227
[(event, (elem.tag, elem.text))
1200-
for event, elem in parser.read_events()],
1228+
for event, elem in islice(parser.read_events(), max_events)],
12011229
expected)
12021230

1203-
def assert_event_tags(self, parser, expected):
1204-
events = parser.read_events()
1231+
def assert_event_tuples(self, parser, expected, max_events=None):
1232+
self.assertEqual(
1233+
list(islice(parser.read_events(), max_events)),
1234+
expected)
1235+
1236+
def assert_event_tags(self, parser, expected, max_events=None):
1237+
events = islice(parser.read_events(), max_events)
12051238
self.assertEqual([(action, elem.tag) for action, elem in events],
12061239
expected)
12071240

@@ -1276,6 +1309,56 @@ def test_ns_events(self):
12761309
self.assertEqual(list(parser.read_events()), [('end-ns', None)])
12771310
self.assertIsNone(parser.close())
12781311

1312+
def test_ns_events_start(self):
1313+
parser = ET.XMLPullParser(events=('start-ns', 'start', 'end'))
1314+
self._feed(parser, "<tag xmlns='abc' xmlns:p='xyz'>\n")
1315+
self.assert_event_tuples(parser, [
1316+
('start-ns', ('', 'abc')),
1317+
('start-ns', ('p', 'xyz')),
1318+
], max_events=2)
1319+
self.assert_event_tags(parser, [
1320+
('start', '{abc}tag'),
1321+
], max_events=1)
1322+
1323+
self._feed(parser, "<child />\n")
1324+
self.assert_event_tags(parser, [
1325+
('start', '{abc}child'),
1326+
('end', '{abc}child'),
1327+
])
1328+
1329+
self._feed(parser, "</tag>\n")
1330+
parser.close()
1331+
self.assert_event_tags(parser, [
1332+
('end', '{abc}tag'),
1333+
])
1334+
1335+
def test_ns_events_start_end(self):
1336+
parser = ET.XMLPullParser(events=('start-ns', 'start', 'end', 'end-ns'))
1337+
self._feed(parser, "<tag xmlns='abc' xmlns:p='xyz'>\n")
1338+
self.assert_event_tuples(parser, [
1339+
('start-ns', ('', 'abc')),
1340+
('start-ns', ('p', 'xyz')),
1341+
], max_events=2)
1342+
self.assert_event_tags(parser, [
1343+
('start', '{abc}tag'),
1344+
], max_events=1)
1345+
1346+
self._feed(parser, "<child />\n")
1347+
self.assert_event_tags(parser, [
1348+
('start', '{abc}child'),
1349+
('end', '{abc}child'),
1350+
])
1351+
1352+
self._feed(parser, "</tag>\n")
1353+
parser.close()
1354+
self.assert_event_tags(parser, [
1355+
('end', '{abc}tag'),
1356+
], max_events=1)
1357+
self.assert_event_tuples(parser, [
1358+
('end-ns', None),
1359+
('end-ns', None),
1360+
])
1361+
12791362
def test_events(self):
12801363
parser = ET.XMLPullParser(events=())
12811364
self._feed(parser, "<root/>\n")

Lib/xml/etree/ElementTree.py

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1518,6 +1518,10 @@ def __init__(self, *, target=None, encoding=None):
15181518
parser.StartElementHandler = self._start
15191519
if hasattr(target, 'end'):
15201520
parser.EndElementHandler = self._end
1521+
if hasattr(target, 'start_ns'):
1522+
parser.StartNamespaceDeclHandler = self._start_ns
1523+
if hasattr(target, 'end_ns'):
1524+
parser.EndNamespaceDeclHandler = self._end_ns
15211525
if hasattr(target, 'data'):
15221526
parser.CharacterDataHandler = target.data
15231527
# miscellaneous callbacks
@@ -1559,12 +1563,24 @@ def handler(tag, event=event_name, append=append,
15591563
append((event, end(tag)))
15601564
parser.EndElementHandler = handler
15611565
elif event_name == "start-ns":
1562-
def handler(prefix, uri, event=event_name, append=append):
1563-
append((event, (prefix or "", uri or "")))
1566+
# TreeBuilder does not implement .start_ns()
1567+
if hasattr(self.target, "start_ns"):
1568+
def handler(prefix, uri, event=event_name, append=append,
1569+
start_ns=self._start_ns):
1570+
append((event, start_ns(prefix, uri)))
1571+
else:
1572+
def handler(prefix, uri, event=event_name, append=append):
1573+
append((event, (prefix or '', uri or '')))
15641574
parser.StartNamespaceDeclHandler = handler
15651575
elif event_name == "end-ns":
1566-
def handler(prefix, event=event_name, append=append):
1567-
append((event, None))
1576+
# TreeBuilder does not implement .end_ns()
1577+
if hasattr(self.target, "end_ns"):
1578+
def handler(prefix, event=event_name, append=append,
1579+
end_ns=self._end_ns):
1580+
append((event, end_ns(prefix)))
1581+
else:
1582+
def handler(prefix, event=event_name, append=append):
1583+
append((event, None))
15681584
parser.EndNamespaceDeclHandler = handler
15691585
elif event_name == 'comment':
15701586
def handler(text, event=event_name, append=append, self=self):
@@ -1595,6 +1611,12 @@ def _fixname(self, key):
15951611
self._names[key] = name
15961612
return name
15971613

1614+
def _start_ns(self, prefix, uri):
1615+
return self.target.start_ns(prefix or '', uri or '')
1616+
1617+
def _end_ns(self, prefix):
1618+
return self.target.end_ns(prefix or '')
1619+
15981620
def _start(self, tag, attr_list):
15991621
# Handler for expat's StartElementHandler. Since ordered_attributes
16001622
# is set, the attributes are reported as a list of alternating
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
The XMLParser() in xml.etree.ElementTree provides namespace prefix context to the
2+
parser target if it defines the callback methods "start_ns()" and/or "end_ns()".
3+
Patch by Stefan Behnel.

0 commit comments

Comments
 (0)