Skip to content

Commit d7e16cd

Browse files
committed
bpo-14465: Add an indent() function to xml.etree.ElementTree to pretty-print XML trees.
1 parent 51aac15 commit d7e16cd

File tree

4 files changed

+132
-1
lines changed

4 files changed

+132
-1
lines changed

Doc/library/xml.etree.elementtree.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -572,6 +572,18 @@ Functions
572572
.. versionadded:: 3.2
573573

574574

575+
.. function:: indent(tree, space=" ", level=0)
576+
577+
Appends whitespace to the subtree to indent the tree visually.
578+
This can be used to generate pretty-printed XML output.
579+
*tree* can be an Element or ElementTree. *space* is the whitespace
580+
string that will be inserted for each indentation level, two space
581+
characters by default. For indenting partial subtrees inside of an
582+
already indented tree, pass the initial indentation level as *level*.
583+
584+
.. versionadded:: 3.9
585+
586+
575587
.. function:: iselement(element)
576588

577589
Checks if an object appears to be a valid element object. *element* is an

Lib/test/test_xml_etree.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -788,6 +788,74 @@ def test_writestring(self):
788788
elem = ET.fromstring("<html><body>text</body></html>")
789789
self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>')
790790

791+
def test_indent(self):
792+
elem = ET.XML("<root></root>")
793+
ET.indent(elem)
794+
self.assertEqual(ET.tostring(elem), b'<root />')
795+
796+
elem = ET.XML("<html><body>text</body></html>")
797+
ET.indent(elem)
798+
self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>\n</html>')
799+
800+
elem = ET.XML("<html><body>text</body>tail</html>")
801+
ET.indent(elem)
802+
self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>tail</html>')
803+
804+
elem = ET.XML("<html><body><p>par</p><p>text</p><p><br/></p></body></html>")
805+
ET.indent(elem)
806+
self.assertEqual(
807+
ET.tostring(elem),
808+
b'<html>\n'
809+
b' <body>\n'
810+
b' <p>par</p>\n'
811+
b' <p>text</p>\n'
812+
b' <p>\n'
813+
b' <br />\n'
814+
b' </p>\n'
815+
b' </body>\n'
816+
b'</html>'
817+
)
818+
819+
elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
820+
ET.indent(elem)
821+
self.assertEqual(
822+
ET.tostring(elem),
823+
b'<html>\n'
824+
b' <body>\n'
825+
b' <p>pre<br />post</p>\n'
826+
b' <p>text</p>\n'
827+
b' </body>\n'
828+
b'</html>'
829+
)
830+
831+
elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
832+
ET.indent(elem, space='\t')
833+
self.assertEqual(
834+
ET.tostring(elem),
835+
b'<html>\n'
836+
b'\t<body>\n'
837+
b'\t\t<p>pre<br />post</p>\n'
838+
b'\t\t<p>text</p>\n'
839+
b'\t</body>\n'
840+
b'</html>'
841+
)
842+
843+
def test_indent_space_caching(self):
844+
elem = ET.XML("<html><body><p>par</p><p>text</p><p><br/></p><p /></body></html>")
845+
ET.indent(elem)
846+
self.assertEqual(
847+
{el.tail for el in elem.iter()},
848+
{None, "\n", "\n ", "\n "}
849+
)
850+
self.assertEqual(
851+
{el.text for el in elem.iter()},
852+
{None, "\n ", "\n ", "\n ", "par", "text"}
853+
)
854+
self.assertEqual(
855+
len({el.tail for el in elem.iter()}),
856+
len({id(el.tail) for el in elem.iter()}),
857+
)
858+
791859
def test_tostring_default_namespace(self):
792860
elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>')
793861
self.assertEqual(

Lib/xml/etree/ElementTree.py

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@
7676
"dump",
7777
"Element", "ElementTree",
7878
"fromstring", "fromstringlist",
79-
"iselement", "iterparse",
79+
"indent", "iselement", "iterparse",
8080
"parse", "ParseError",
8181
"PI", "ProcessingInstruction",
8282
"QName",
@@ -1185,6 +1185,55 @@ def dump(elem):
11851185
if not tail or tail[-1] != "\n":
11861186
sys.stdout.write("\n")
11871187

1188+
1189+
def indent(tree, space=" ", level=0):
1190+
"""Indent an XML document by inserting newlines and indentation space
1191+
after elements.
1192+
1193+
*tree* is the ElementTree or Element to modify. The (root) element
1194+
itself will not be changed, but the tail text of all elements in its
1195+
subtree will be adapted.
1196+
1197+
*space* is the whitespace to insert for each indentation level, two
1198+
space characters by default.
1199+
1200+
*level* is the initial indentation level. Setting this to a higher
1201+
value than 0 can be used for indenting subtrees that are more deeply
1202+
nested inside of a document.
1203+
"""
1204+
if isinstance(tree, ElementTree):
1205+
tree = tree.getroot()
1206+
if not len(tree):
1207+
return
1208+
1209+
# Reduce the memory consumption by reusing indentation strings.
1210+
class _indentation_cache(dict):
1211+
def __missing__(self, level):
1212+
s = self[level] = "\n" + level * space
1213+
return s
1214+
1215+
indentations = _indentation_cache()
1216+
1217+
def _indent_children(elem, level):
1218+
# Start a new indentation level for the first child.
1219+
child_level = level + 1
1220+
child_indentation = indentations[child_level]
1221+
if not elem.text or not elem.text.strip():
1222+
elem.text = child_indentation
1223+
1224+
for child in elem:
1225+
if len(child):
1226+
_indent_children(child, child_level)
1227+
if not child.tail or not child.tail.strip():
1228+
child.tail = child_indentation
1229+
1230+
# Dedent after the last child.
1231+
if not child.tail or not child.tail.strip():
1232+
child.tail = indentations[level]
1233+
1234+
_indent_children(tree, level)
1235+
1236+
11881237
# --------------------------------------------------------------------
11891238
# parsing
11901239

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Add an xml.etree.ElementTree.indent() function for pretty-printing XML trees.
2+
Contributed by Stefan Behnel.

0 commit comments

Comments
 (0)