Skip to content
This repository was archived by the owner on May 11, 2023. It is now read-only.

Commit b7811af

Browse files
committed
Add test_pulldom.py from Cpython v3.11.2
1 parent 471ec26 commit b7811af

File tree

1 file changed

+356
-0
lines changed

1 file changed

+356
-0
lines changed

Lib/test/test_pulldom.py

Lines changed: 356 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,356 @@
1+
import io
2+
import unittest
3+
import xml.sax
4+
5+
from xml.sax.xmlreader import AttributesImpl
6+
from xml.sax.handler import feature_external_ges
7+
from xml.dom import pulldom
8+
9+
from test.support import findfile
10+
11+
12+
tstfile = findfile("test.xml", subdir="xmltestdata")
13+
14+
# A handy XML snippet, containing attributes, a namespace prefix, and a
15+
# self-closing tag:
16+
SMALL_SAMPLE = """<?xml version="1.0"?>
17+
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:xdc="http://www.xml.com/books">
18+
<!-- A comment -->
19+
<title>Introduction to XSL</title>
20+
<hr/>
21+
<p><xdc:author xdc:attrib="prefixed attribute" attrib="other attrib">A. Namespace</xdc:author></p>
22+
</html>"""
23+
24+
25+
class PullDOMTestCase(unittest.TestCase):
26+
# TODO: RUSTPYTHON FileNotFoundError: [Errno 2] No such file or directory (os error 2): 'xmltestdata/test.xml' -> 'None'
27+
@unittest.expectedFailure
28+
def test_parse(self):
29+
"""Minimal test of DOMEventStream.parse()"""
30+
31+
# This just tests that parsing from a stream works. Actual parser
32+
# semantics are tested using parseString with a more focused XML
33+
# fragment.
34+
35+
# Test with a filename:
36+
handler = pulldom.parse(tstfile)
37+
self.addCleanup(handler.stream.close)
38+
list(handler)
39+
40+
# Test with a file object:
41+
with open(tstfile, "rb") as fin:
42+
list(pulldom.parse(fin))
43+
44+
# TODO: RUSTPYTHON implement DOM semantic
45+
@unittest.expectedFailure
46+
def test_parse_semantics(self):
47+
"""Test DOMEventStream parsing semantics."""
48+
49+
items = pulldom.parseString(SMALL_SAMPLE)
50+
evt, node = next(items)
51+
# Just check the node is a Document:
52+
self.assertTrue(hasattr(node, "createElement"))
53+
self.assertEqual(pulldom.START_DOCUMENT, evt)
54+
evt, node = next(items)
55+
self.assertEqual(pulldom.START_ELEMENT, evt)
56+
self.assertEqual("html", node.tagName)
57+
self.assertEqual(2, len(node.attributes))
58+
self.assertEqual(node.attributes.getNamedItem("xmlns:xdc").value,
59+
"http://www.xml.com/books")
60+
evt, node = next(items)
61+
self.assertEqual(pulldom.CHARACTERS, evt) # Line break
62+
evt, node = next(items)
63+
# XXX - A comment should be reported here!
64+
# self.assertEqual(pulldom.COMMENT, evt)
65+
# Line break after swallowed comment:
66+
self.assertEqual(pulldom.CHARACTERS, evt)
67+
evt, node = next(items)
68+
self.assertEqual("title", node.tagName)
69+
title_node = node
70+
evt, node = next(items)
71+
self.assertEqual(pulldom.CHARACTERS, evt)
72+
self.assertEqual("Introduction to XSL", node.data)
73+
evt, node = next(items)
74+
self.assertEqual(pulldom.END_ELEMENT, evt)
75+
self.assertEqual("title", node.tagName)
76+
self.assertTrue(title_node is node)
77+
evt, node = next(items)
78+
self.assertEqual(pulldom.CHARACTERS, evt)
79+
evt, node = next(items)
80+
self.assertEqual(pulldom.START_ELEMENT, evt)
81+
self.assertEqual("hr", node.tagName)
82+
evt, node = next(items)
83+
self.assertEqual(pulldom.END_ELEMENT, evt)
84+
self.assertEqual("hr", node.tagName)
85+
evt, node = next(items)
86+
self.assertEqual(pulldom.CHARACTERS, evt)
87+
evt, node = next(items)
88+
self.assertEqual(pulldom.START_ELEMENT, evt)
89+
self.assertEqual("p", node.tagName)
90+
evt, node = next(items)
91+
self.assertEqual(pulldom.START_ELEMENT, evt)
92+
self.assertEqual("xdc:author", node.tagName)
93+
evt, node = next(items)
94+
self.assertEqual(pulldom.CHARACTERS, evt)
95+
evt, node = next(items)
96+
self.assertEqual(pulldom.END_ELEMENT, evt)
97+
self.assertEqual("xdc:author", node.tagName)
98+
evt, node = next(items)
99+
self.assertEqual(pulldom.END_ELEMENT, evt)
100+
evt, node = next(items)
101+
self.assertEqual(pulldom.CHARACTERS, evt)
102+
evt, node = next(items)
103+
self.assertEqual(pulldom.END_ELEMENT, evt)
104+
# XXX No END_DOCUMENT item is ever obtained:
105+
#evt, node = next(items)
106+
#self.assertEqual(pulldom.END_DOCUMENT, evt)
107+
108+
# TODO: RUSTPYTHON pulldom.parseString(SMALL_SAMPLE) return iterator with tuple with 2 elements
109+
@unittest.expectedFailure
110+
def test_expandItem(self):
111+
"""Ensure expandItem works as expected."""
112+
items = pulldom.parseString(SMALL_SAMPLE)
113+
# Loop through the nodes until we get to a "title" start tag:
114+
for evt, item in items:
115+
if evt == pulldom.START_ELEMENT and item.tagName == "title":
116+
items.expandNode(item)
117+
self.assertEqual(1, len(item.childNodes))
118+
break
119+
else:
120+
self.fail("No \"title\" element detected in SMALL_SAMPLE!")
121+
# Loop until we get to the next start-element:
122+
for evt, node in items:
123+
if evt == pulldom.START_ELEMENT:
124+
break
125+
self.assertEqual("hr", node.tagName,
126+
"expandNode did not leave DOMEventStream in the correct state.")
127+
# Attempt to expand a standalone element:
128+
items.expandNode(node)
129+
self.assertEqual(next(items)[0], pulldom.CHARACTERS)
130+
evt, node = next(items)
131+
self.assertEqual(node.tagName, "p")
132+
items.expandNode(node)
133+
next(items) # Skip character data
134+
evt, node = next(items)
135+
self.assertEqual(node.tagName, "html")
136+
with self.assertRaises(StopIteration):
137+
next(items)
138+
items.clear()
139+
self.assertIsNone(items.parser)
140+
self.assertIsNone(items.stream)
141+
142+
@unittest.expectedFailure
143+
def test_comment(self):
144+
"""PullDOM does not receive "comment" events."""
145+
items = pulldom.parseString(SMALL_SAMPLE)
146+
for evt, _ in items:
147+
if evt == pulldom.COMMENT:
148+
break
149+
else:
150+
self.fail("No comment was encountered")
151+
152+
@unittest.expectedFailure
153+
def test_end_document(self):
154+
"""PullDOM does not receive "end-document" events."""
155+
items = pulldom.parseString(SMALL_SAMPLE)
156+
# Read all of the nodes up to and including </html>:
157+
for evt, node in items:
158+
if evt == pulldom.END_ELEMENT and node.tagName == "html":
159+
break
160+
try:
161+
# Assert that the next node is END_DOCUMENT:
162+
evt, node = next(items)
163+
self.assertEqual(pulldom.END_DOCUMENT, evt)
164+
except StopIteration:
165+
self.fail(
166+
"Ran out of events, but should have received END_DOCUMENT")
167+
168+
def test_external_ges_default(self):
169+
parser = pulldom.parseString(SMALL_SAMPLE)
170+
saxparser = parser.parser
171+
ges = saxparser.getFeature(feature_external_ges)
172+
self.assertEqual(ges, False)
173+
174+
175+
class ThoroughTestCase(unittest.TestCase):
176+
"""Test the hard-to-reach parts of pulldom."""
177+
178+
def test_thorough_parse(self):
179+
"""Test some of the hard-to-reach parts of PullDOM."""
180+
self._test_thorough(pulldom.parse(None, parser=SAXExerciser()))
181+
182+
@unittest.expectedFailure
183+
def test_sax2dom_fail(self):
184+
"""SAX2DOM can"t handle a PI before the root element."""
185+
pd = SAX2DOMTestHelper(None, SAXExerciser(), 12)
186+
self._test_thorough(pd)
187+
188+
def test_thorough_sax2dom(self):
189+
"""Test some of the hard-to-reach parts of SAX2DOM."""
190+
pd = SAX2DOMTestHelper(None, SAX2DOMExerciser(), 12)
191+
self._test_thorough(pd, False)
192+
193+
def _test_thorough(self, pd, before_root=True):
194+
"""Test some of the hard-to-reach parts of the parser, using a mock
195+
parser."""
196+
197+
evt, node = next(pd)
198+
self.assertEqual(pulldom.START_DOCUMENT, evt)
199+
# Just check the node is a Document:
200+
self.assertTrue(hasattr(node, "createElement"))
201+
202+
if before_root:
203+
evt, node = next(pd)
204+
self.assertEqual(pulldom.COMMENT, evt)
205+
self.assertEqual("a comment", node.data)
206+
evt, node = next(pd)
207+
self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt)
208+
self.assertEqual("target", node.target)
209+
self.assertEqual("data", node.data)
210+
211+
evt, node = next(pd)
212+
self.assertEqual(pulldom.START_ELEMENT, evt)
213+
self.assertEqual("html", node.tagName)
214+
215+
evt, node = next(pd)
216+
self.assertEqual(pulldom.COMMENT, evt)
217+
self.assertEqual("a comment", node.data)
218+
evt, node = next(pd)
219+
self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt)
220+
self.assertEqual("target", node.target)
221+
self.assertEqual("data", node.data)
222+
223+
evt, node = next(pd)
224+
self.assertEqual(pulldom.START_ELEMENT, evt)
225+
self.assertEqual("p", node.tagName)
226+
227+
evt, node = next(pd)
228+
self.assertEqual(pulldom.CHARACTERS, evt)
229+
self.assertEqual("text", node.data)
230+
evt, node = next(pd)
231+
self.assertEqual(pulldom.END_ELEMENT, evt)
232+
self.assertEqual("p", node.tagName)
233+
evt, node = next(pd)
234+
self.assertEqual(pulldom.END_ELEMENT, evt)
235+
self.assertEqual("html", node.tagName)
236+
evt, node = next(pd)
237+
self.assertEqual(pulldom.END_DOCUMENT, evt)
238+
239+
240+
class SAXExerciser(object):
241+
"""A fake sax parser that calls some of the harder-to-reach sax methods to
242+
ensure it emits the correct events"""
243+
244+
def setContentHandler(self, handler):
245+
self._handler = handler
246+
247+
def parse(self, _):
248+
h = self._handler
249+
h.startDocument()
250+
251+
# The next two items ensure that items preceding the first
252+
# start_element are properly stored and emitted:
253+
h.comment("a comment")
254+
h.processingInstruction("target", "data")
255+
256+
h.startElement("html", AttributesImpl({}))
257+
258+
h.comment("a comment")
259+
h.processingInstruction("target", "data")
260+
261+
h.startElement("p", AttributesImpl({"class": "paraclass"}))
262+
h.characters("text")
263+
h.endElement("p")
264+
h.endElement("html")
265+
h.endDocument()
266+
267+
def stub(self, *args, **kwargs):
268+
"""Stub method. Does nothing."""
269+
pass
270+
setProperty = stub
271+
setFeature = stub
272+
273+
274+
class SAX2DOMExerciser(SAXExerciser):
275+
"""The same as SAXExerciser, but without the processing instruction and
276+
comment before the root element, because S2D can"t handle it"""
277+
278+
def parse(self, _):
279+
h = self._handler
280+
h.startDocument()
281+
h.startElement("html", AttributesImpl({}))
282+
h.comment("a comment")
283+
h.processingInstruction("target", "data")
284+
h.startElement("p", AttributesImpl({"class": "paraclass"}))
285+
h.characters("text")
286+
h.endElement("p")
287+
h.endElement("html")
288+
h.endDocument()
289+
290+
291+
class SAX2DOMTestHelper(pulldom.DOMEventStream):
292+
"""Allows us to drive SAX2DOM from a DOMEventStream."""
293+
294+
def reset(self):
295+
self.pulldom = pulldom.SAX2DOM()
296+
# This content handler relies on namespace support
297+
self.parser.setFeature(xml.sax.handler.feature_namespaces, 1)
298+
self.parser.setContentHandler(self.pulldom)
299+
300+
301+
class SAX2DOMTestCase(unittest.TestCase):
302+
303+
def confirm(self, test, testname="Test"):
304+
self.assertTrue(test, testname)
305+
306+
# TODO: RUSTPYTHON read from stream io
307+
@unittest.expectedFailure
308+
def test_basic(self):
309+
"""Ensure SAX2DOM can parse from a stream."""
310+
with io.StringIO(SMALL_SAMPLE) as fin:
311+
sd = SAX2DOMTestHelper(fin, xml.sax.make_parser(),
312+
len(SMALL_SAMPLE))
313+
for evt, node in sd:
314+
if evt == pulldom.START_ELEMENT and node.tagName == "html":
315+
break
316+
# Because the buffer is the same length as the XML, all the
317+
# nodes should have been parsed and added:
318+
self.assertGreater(len(node.childNodes), 0)
319+
320+
def testSAX2DOM(self):
321+
"""Ensure SAX2DOM expands nodes as expected."""
322+
sax2dom = pulldom.SAX2DOM()
323+
sax2dom.startDocument()
324+
sax2dom.startElement("doc", {})
325+
sax2dom.characters("text")
326+
sax2dom.startElement("subelm", {})
327+
sax2dom.characters("text")
328+
sax2dom.endElement("subelm")
329+
sax2dom.characters("text")
330+
sax2dom.endElement("doc")
331+
sax2dom.endDocument()
332+
333+
doc = sax2dom.document
334+
root = doc.documentElement
335+
(text1, elm1, text2) = root.childNodes
336+
text3 = elm1.childNodes[0]
337+
338+
self.assertIsNone(text1.previousSibling)
339+
self.assertIs(text1.nextSibling, elm1)
340+
self.assertIs(elm1.previousSibling, text1)
341+
self.assertIs(elm1.nextSibling, text2)
342+
self.assertIs(text2.previousSibling, elm1)
343+
self.assertIsNone(text2.nextSibling)
344+
self.assertIsNone(text3.previousSibling)
345+
self.assertIsNone(text3.nextSibling)
346+
347+
self.assertIs(root.parentNode, doc)
348+
self.assertIs(text1.parentNode, root)
349+
self.assertIs(elm1.parentNode, root)
350+
self.assertIs(text2.parentNode, root)
351+
self.assertIs(text3.parentNode, elm1)
352+
doc.unlink()
353+
354+
355+
if __name__ == "__main__":
356+
unittest.main()

0 commit comments

Comments
 (0)