Skip to content

Commit 101a5e8

Browse files
scoderserhiy-storchaka
authored andcommitted
bpo-31648: Improve ElementPath (#3835)
* Allow whitespace inside of ElementPath predicates. * Add ElementPath predicate support for text comparison of the current node, like "[.='text']".
1 parent 9811e80 commit 101a5e8

File tree

5 files changed

+68
-7
lines changed

5 files changed

+68
-7
lines changed

Doc/library/xml.etree.elementtree.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -437,6 +437,11 @@ Supported XPath syntax
437437
| ``[tag]`` | Selects all elements that have a child named |
438438
| | ``tag``. Only immediate children are supported. |
439439
+-----------------------+------------------------------------------------------+
440+
| ``[.='text']`` | Selects all elements whose complete text content, |
441+
| | including descendants, equals the given ``text``. |
442+
| | |
443+
| | .. versionadded:: 3.7 |
444+
+-----------------------+------------------------------------------------------+
440445
| ``[tag='text']`` | Selects all elements that have a child named |
441446
| | ``tag`` whose complete text content, including |
442447
| | descendants, equals the given ``text``. |

Doc/whatsnew/3.7.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,14 @@ Function :func:`~uu.encode` now accepts an optional *backtick*
281281
keyword argument. When it's true, zeros are represented by ``'`'``
282282
instead of spaces. (Contributed by Xiang Zhang in :issue:`30103`.)
283283

284+
xml.etree
285+
---------
286+
287+
:ref:`ElementPath <elementtree-xpath>` predicates in the :meth:`find`
288+
methods can now compare text of the current node with ``[. = "text"]``,
289+
not only text in children. Predicates also allow adding spaces for
290+
better readability. (Contributed by Stefan Behnel in :issue:`31648`.)
291+
284292
zipapp
285293
------
286294

Lib/test/test_xml_etree.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2237,6 +2237,39 @@ def test_findall(self):
22372237
['tag'] * 2)
22382238
self.assertEqual(e.findall('section//'), e.findall('section//*'))
22392239

2240+
self.assertEqual(summarize_list(e.findall(".//section[tag='subtext']")),
2241+
['section'])
2242+
self.assertEqual(summarize_list(e.findall(".//section[tag ='subtext']")),
2243+
['section'])
2244+
self.assertEqual(summarize_list(e.findall(".//section[tag= 'subtext']")),
2245+
['section'])
2246+
self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")),
2247+
['section'])
2248+
self.assertEqual(summarize_list(e.findall(".//section[ tag = 'subtext' ]")),
2249+
['section'])
2250+
2251+
self.assertEqual(summarize_list(e.findall(".//tag[.='subtext']")),
2252+
['tag'])
2253+
self.assertEqual(summarize_list(e.findall(".//tag[. ='subtext']")),
2254+
['tag'])
2255+
self.assertEqual(summarize_list(e.findall('.//tag[.= "subtext"]')),
2256+
['tag'])
2257+
self.assertEqual(summarize_list(e.findall('.//tag[ . = "subtext" ]')),
2258+
['tag'])
2259+
self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")),
2260+
['tag'])
2261+
self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext ']")),
2262+
[])
2263+
self.assertEqual(summarize_list(e.findall(".//tag[.= ' subtext']")),
2264+
[])
2265+
2266+
# duplicate section => 2x tag matches
2267+
e[1] = e[2]
2268+
self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")),
2269+
['section', 'section'])
2270+
self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")),
2271+
['tag', 'tag'])
2272+
22402273
def test_test_find_with_ns(self):
22412274
e = ET.XML(SAMPLE_XML_NS)
22422275
self.assertEqual(summarize_list(e.findall('tag')), [])

Lib/xml/etree/ElementPath.py

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,9 @@ def prepare_predicate(next, token):
157157
return
158158
if token[0] == "]":
159159
break
160+
if token == ('', ''):
161+
# ignore whitespace
162+
continue
160163
if token[0] and token[0][:1] in "'\"":
161164
token = "'", token[0][1:-1]
162165
signature.append(token[0] or "-")
@@ -188,16 +191,22 @@ def select(context, result):
188191
if elem.find(tag) is not None:
189192
yield elem
190193
return select
191-
if signature == "-='" and not re.match(r"\-?\d+$", predicate[0]):
192-
# [tag='value']
194+
if signature == ".='" or (signature == "-='" and not re.match(r"\-?\d+$", predicate[0])):
195+
# [.='value'] or [tag='value']
193196
tag = predicate[0]
194197
value = predicate[-1]
195-
def select(context, result):
196-
for elem in result:
197-
for e in elem.findall(tag):
198-
if "".join(e.itertext()) == value:
198+
if tag:
199+
def select(context, result):
200+
for elem in result:
201+
for e in elem.findall(tag):
202+
if "".join(e.itertext()) == value:
203+
yield elem
204+
break
205+
else:
206+
def select(context, result):
207+
for elem in result:
208+
if "".join(elem.itertext()) == value:
199209
yield elem
200-
break
201210
return select
202211
if signature == "-" or signature == "-()" or signature == "-()-":
203212
# [index] or [last()] or [last()-index]
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
Improvements to path predicates in ElementTree:
2+
3+
* Allow whitespace around predicate parts, i.e. "[a = 'text']" instead of requiring the less readable "[a='text']".
4+
* Add support for text comparison of the current node, like "[.='text']".
5+
6+
Patch by Stefan Behnel.

0 commit comments

Comments
 (0)