Skip to content

Commit 03041d6

Browse files
scoderlisroach
authored andcommitted
bpo-14465: Add an indent() function to xml.etree.ElementTree to pretty-print XML trees (pythonGH-15200)
1 parent 4a13e24 commit 03041d6

File tree

4 files changed

+183
-1
lines changed

4 files changed

+183
-1
lines changed

Doc/library/xml.etree.elementtree.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -572,6 +572,18 @@ Functions
572572
.. versionadded:: 3.2
573573

574574

575+
.. function:: indent(tree, space=" ", level=0)
576+
577+
Appends whitespace to the subtree to indent the tree visually.
578+
This can be used to generate pretty-printed XML output.
579+
*tree* can be an Element or ElementTree. *space* is the whitespace
580+
string that will be inserted for each indentation level, two space
581+
characters by default. For indenting partial subtrees inside of an
582+
already indented tree, pass the initial indentation level as *level*.
583+
584+
.. versionadded:: 3.9
585+
586+
575587
.. function:: iselement(element)
576588

577589
Checks if an object appears to be a valid element object. *element* is an

Lib/test/test_xml_etree.py

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -788,6 +788,123 @@ def test_writestring(self):
788788
elem = ET.fromstring("<html><body>text</body></html>")
789789
self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>')
790790

791+
def test_indent(self):
792+
elem = ET.XML("<root></root>")
793+
ET.indent(elem)
794+
self.assertEqual(ET.tostring(elem), b'<root />')
795+
796+
elem = ET.XML("<html><body>text</body></html>")
797+
ET.indent(elem)
798+
self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>\n</html>')
799+
800+
elem = ET.XML("<html> <body>text</body> </html>")
801+
ET.indent(elem)
802+
self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>\n</html>')
803+
804+
elem = ET.XML("<html><body>text</body>tail</html>")
805+
ET.indent(elem)
806+
self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>tail</html>')
807+
808+
elem = ET.XML("<html><body><p>par</p>\n<p>text</p>\t<p><br/></p></body></html>")
809+
ET.indent(elem)
810+
self.assertEqual(
811+
ET.tostring(elem),
812+
b'<html>\n'
813+
b' <body>\n'
814+
b' <p>par</p>\n'
815+
b' <p>text</p>\n'
816+
b' <p>\n'
817+
b' <br />\n'
818+
b' </p>\n'
819+
b' </body>\n'
820+
b'</html>'
821+
)
822+
823+
elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
824+
ET.indent(elem)
825+
self.assertEqual(
826+
ET.tostring(elem),
827+
b'<html>\n'
828+
b' <body>\n'
829+
b' <p>pre<br />post</p>\n'
830+
b' <p>text</p>\n'
831+
b' </body>\n'
832+
b'</html>'
833+
)
834+
835+
def test_indent_space(self):
836+
elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
837+
ET.indent(elem, space='\t')
838+
self.assertEqual(
839+
ET.tostring(elem),
840+
b'<html>\n'
841+
b'\t<body>\n'
842+
b'\t\t<p>pre<br />post</p>\n'
843+
b'\t\t<p>text</p>\n'
844+
b'\t</body>\n'
845+
b'</html>'
846+
)
847+
848+
elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
849+
ET.indent(elem, space='')
850+
self.assertEqual(
851+
ET.tostring(elem),
852+
b'<html>\n'
853+
b'<body>\n'
854+
b'<p>pre<br />post</p>\n'
855+
b'<p>text</p>\n'
856+
b'</body>\n'
857+
b'</html>'
858+
)
859+
860+
def test_indent_space_caching(self):
861+
elem = ET.XML("<html><body><p>par</p><p>text</p><p><br/></p><p /></body></html>")
862+
ET.indent(elem)
863+
self.assertEqual(
864+
{el.tail for el in elem.iter()},
865+
{None, "\n", "\n ", "\n "}
866+
)
867+
self.assertEqual(
868+
{el.text for el in elem.iter()},
869+
{None, "\n ", "\n ", "\n ", "par", "text"}
870+
)
871+
self.assertEqual(
872+
len({el.tail for el in elem.iter()}),
873+
len({id(el.tail) for el in elem.iter()}),
874+
)
875+
876+
def test_indent_level(self):
877+
elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
878+
with self.assertRaises(ValueError):
879+
ET.indent(elem, level=-1)
880+
self.assertEqual(
881+
ET.tostring(elem),
882+
b"<html><body><p>pre<br />post</p><p>text</p></body></html>"
883+
)
884+
885+
ET.indent(elem, level=2)
886+
self.assertEqual(
887+
ET.tostring(elem),
888+
b'<html>\n'
889+
b' <body>\n'
890+
b' <p>pre<br />post</p>\n'
891+
b' <p>text</p>\n'
892+
b' </body>\n'
893+
b' </html>'
894+
)
895+
896+
elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
897+
ET.indent(elem, level=1, space=' ')
898+
self.assertEqual(
899+
ET.tostring(elem),
900+
b'<html>\n'
901+
b' <body>\n'
902+
b' <p>pre<br />post</p>\n'
903+
b' <p>text</p>\n'
904+
b' </body>\n'
905+
b' </html>'
906+
)
907+
791908
def test_tostring_default_namespace(self):
792909
elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>')
793910
self.assertEqual(

Lib/xml/etree/ElementTree.py

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@
7676
"dump",
7777
"Element", "ElementTree",
7878
"fromstring", "fromstringlist",
79-
"iselement", "iterparse",
79+
"indent", "iselement", "iterparse",
8080
"parse", "ParseError",
8181
"PI", "ProcessingInstruction",
8282
"QName",
@@ -1185,6 +1185,57 @@ def dump(elem):
11851185
if not tail or tail[-1] != "\n":
11861186
sys.stdout.write("\n")
11871187

1188+
1189+
def indent(tree, space=" ", level=0):
1190+
"""Indent an XML document by inserting newlines and indentation space
1191+
after elements.
1192+
1193+
*tree* is the ElementTree or Element to modify. The (root) element
1194+
itself will not be changed, but the tail text of all elements in its
1195+
subtree will be adapted.
1196+
1197+
*space* is the whitespace to insert for each indentation level, two
1198+
space characters by default.
1199+
1200+
*level* is the initial indentation level. Setting this to a higher
1201+
value than 0 can be used for indenting subtrees that are more deeply
1202+
nested inside of a document.
1203+
"""
1204+
if isinstance(tree, ElementTree):
1205+
tree = tree.getroot()
1206+
if level < 0:
1207+
raise ValueError(f"Initial indentation level must be >= 0, got {level}")
1208+
if not len(tree):
1209+
return
1210+
1211+
# Reduce the memory consumption by reusing indentation strings.
1212+
indentations = ["\n" + level * space]
1213+
1214+
def _indent_children(elem, level):
1215+
# Start a new indentation level for the first child.
1216+
child_level = level + 1
1217+
try:
1218+
child_indentation = indentations[child_level]
1219+
except IndexError:
1220+
child_indentation = indentations[level] + space
1221+
indentations.append(child_indentation)
1222+
1223+
if not elem.text or not elem.text.strip():
1224+
elem.text = child_indentation
1225+
1226+
for child in elem:
1227+
if len(child):
1228+
_indent_children(child, child_level)
1229+
if not child.tail or not child.tail.strip():
1230+
child.tail = child_indentation
1231+
1232+
# Dedent after the last child by overwriting the previous indentation.
1233+
if not child.tail.strip():
1234+
child.tail = indentations[level]
1235+
1236+
_indent_children(tree, 0)
1237+
1238+
11881239
# --------------------------------------------------------------------
11891240
# parsing
11901241

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Add an xml.etree.ElementTree.indent() function for pretty-printing XML trees.
2+
Contributed by Stefan Behnel.

0 commit comments

Comments
 (0)