Skip to content

bpo-14465: Add an indent() function to xml.etree.ElementTree to pretty-print XML trees #15200

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Aug 23, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions Doc/library/xml.etree.elementtree.rst
Original file line number Diff line number Diff line change
Expand Up @@ -572,6 +572,18 @@ Functions
.. versionadded:: 3.2


.. function:: indent(tree, space=" ", level=0)

Appends whitespace to the subtree to indent the tree visually.
This can be used to generate pretty-printed XML output.
*tree* can be an Element or ElementTree. *space* is the whitespace
string that will be inserted for each indentation level, two space
characters by default. For indenting partial subtrees inside of an
already indented tree, pass the initial indentation level as *level*.

.. versionadded:: 3.9


.. function:: iselement(element)

Checks if an object appears to be a valid element object. *element* is an
Expand Down
117 changes: 117 additions & 0 deletions Lib/test/test_xml_etree.py
Original file line number Diff line number Diff line change
Expand Up @@ -788,6 +788,123 @@ def test_writestring(self):
elem = ET.fromstring("<html><body>text</body></html>")
self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>')

def test_indent(self):
elem = ET.XML("<root></root>")
ET.indent(elem)
self.assertEqual(ET.tostring(elem), b'<root />')

elem = ET.XML("<html><body>text</body></html>")
ET.indent(elem)
self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>\n</html>')

elem = ET.XML("<html> <body>text</body> </html>")
ET.indent(elem)
self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>\n</html>')

elem = ET.XML("<html><body>text</body>tail</html>")
ET.indent(elem)
self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>tail</html>')

elem = ET.XML("<html><body><p>par</p>\n<p>text</p>\t<p><br/></p></body></html>")
ET.indent(elem)
self.assertEqual(
ET.tostring(elem),
b'<html>\n'
b' <body>\n'
b' <p>par</p>\n'
b' <p>text</p>\n'
b' <p>\n'
b' <br />\n'
b' </p>\n'
b' </body>\n'
b'</html>'
)

elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
ET.indent(elem)
self.assertEqual(
ET.tostring(elem),
b'<html>\n'
b' <body>\n'
b' <p>pre<br />post</p>\n'
b' <p>text</p>\n'
b' </body>\n'
b'</html>'
)

def test_indent_space(self):
elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
ET.indent(elem, space='\t')
self.assertEqual(
ET.tostring(elem),
b'<html>\n'
b'\t<body>\n'
b'\t\t<p>pre<br />post</p>\n'
b'\t\t<p>text</p>\n'
b'\t</body>\n'
b'</html>'
)

elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
ET.indent(elem, space='')
self.assertEqual(
ET.tostring(elem),
b'<html>\n'
b'<body>\n'
b'<p>pre<br />post</p>\n'
b'<p>text</p>\n'
b'</body>\n'
b'</html>'
)

def test_indent_space_caching(self):
elem = ET.XML("<html><body><p>par</p><p>text</p><p><br/></p><p /></body></html>")
ET.indent(elem)
self.assertEqual(
{el.tail for el in elem.iter()},
{None, "\n", "\n ", "\n "}
)
self.assertEqual(
{el.text for el in elem.iter()},
{None, "\n ", "\n ", "\n ", "par", "text"}
)
self.assertEqual(
len({el.tail for el in elem.iter()}),
len({id(el.tail) for el in elem.iter()}),
)

def test_indent_level(self):
elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
with self.assertRaises(ValueError):
ET.indent(elem, level=-1)
self.assertEqual(
ET.tostring(elem),
b"<html><body><p>pre<br />post</p><p>text</p></body></html>"
)

ET.indent(elem, level=2)
self.assertEqual(
ET.tostring(elem),
b'<html>\n'
b' <body>\n'
b' <p>pre<br />post</p>\n'
b' <p>text</p>\n'
b' </body>\n'
b' </html>'
)

elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
ET.indent(elem, level=1, space=' ')
self.assertEqual(
ET.tostring(elem),
b'<html>\n'
b' <body>\n'
b' <p>pre<br />post</p>\n'
b' <p>text</p>\n'
b' </body>\n'
b' </html>'
)

def test_tostring_default_namespace(self):
elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>')
self.assertEqual(
Expand Down
53 changes: 52 additions & 1 deletion Lib/xml/etree/ElementTree.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@
"dump",
"Element", "ElementTree",
"fromstring", "fromstringlist",
"iselement", "iterparse",
"indent", "iselement", "iterparse",
"parse", "ParseError",
"PI", "ProcessingInstruction",
"QName",
Expand Down Expand Up @@ -1185,6 +1185,57 @@ def dump(elem):
if not tail or tail[-1] != "\n":
sys.stdout.write("\n")


def indent(tree, space=" ", level=0):
"""Indent an XML document by inserting newlines and indentation space
after elements.

*tree* is the ElementTree or Element to modify. The (root) element
itself will not be changed, but the tail text of all elements in its
subtree will be adapted.

*space* is the whitespace to insert for each indentation level, two
space characters by default.

*level* is the initial indentation level. Setting this to a higher
value than 0 can be used for indenting subtrees that are more deeply
nested inside of a document.
"""
if isinstance(tree, ElementTree):
tree = tree.getroot()
if level < 0:
raise ValueError(f"Initial indentation level must be >= 0, got {level}")
if not len(tree):
return

# Reduce the memory consumption by reusing indentation strings.
indentations = ["\n" + level * space]

def _indent_children(elem, level):
# Start a new indentation level for the first child.
child_level = level + 1
try:
child_indentation = indentations[child_level]
except IndexError:
child_indentation = indentations[level] + space
indentations.append(child_indentation)

if not elem.text or not elem.text.strip():
elem.text = child_indentation

for child in elem:
if len(child):
_indent_children(child, child_level)
if not child.tail or not child.tail.strip():
child.tail = child_indentation

# Dedent after the last child by overwriting the previous indentation.
if not child.tail.strip():
child.tail = indentations[level]

_indent_children(tree, 0)


# --------------------------------------------------------------------
# parsing

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Add an xml.etree.ElementTree.indent() function for pretty-printing XML trees.
Contributed by Stefan Behnel.