Skip to content

Commit b2ffbe4

Browse files
committed
Address Serhiy's review
1 parent 408e224 commit b2ffbe4

File tree

3 files changed

+33
-26
lines changed

3 files changed

+33
-26
lines changed

Lib/test/libregrtest/testresult.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import traceback
1010
import unittest
1111
from test import support
12-
from test.libregrtest.utils import escape_xml
12+
from test.libregrtest.utils import sanitize_xml
1313

1414
class RegressionTestResult(unittest.TextTestResult):
1515
USE_XML = False
@@ -66,10 +66,10 @@ def _add_result(self, test, capture=False, **args):
6666
if capture:
6767
if self._stdout_buffer is not None:
6868
stdout = self._stdout_buffer.getvalue().rstrip()
69-
ET.SubElement(e, 'system-out').text = escape_xml(stdout)
69+
ET.SubElement(e, 'system-out').text = sanitize_xml(stdout)
7070
if self._stderr_buffer is not None:
7171
stderr = self._stderr_buffer.getvalue().rstrip()
72-
ET.SubElement(e, 'system-err').text = escape_xml(stderr)
72+
ET.SubElement(e, 'system-err').text = sanitize_xml(stderr)
7373

7474
for k, v in args.items():
7575
if not k or not v:
@@ -79,11 +79,11 @@ def _add_result(self, test, capture=False, **args):
7979
if hasattr(v, 'items'):
8080
for k2, v2 in v.items():
8181
if k2:
82-
e2.set(k2, escape_xml(str(v2)))
82+
e2.set(k2, sanitize_xml(str(v2)))
8383
else:
84-
e2.text = escape_xml(str(v2))
84+
e2.text = sanitize_xml(str(v2))
8585
else:
86-
e2.text = escape_xml(str(v))
86+
e2.text = sanitize_xml(str(v))
8787

8888
@classmethod
8989
def __makeErrorDict(cls, err_type, err_value, err_tb):

Lib/test/libregrtest/utils.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -717,15 +717,20 @@ def get_signal_name(exitcode):
717717

718718
ILLEGAL_XML_CHARS_RE = re.compile(
719719
'['
720-
'\x00-\x1F' # ASCII control characters
721-
'\uD800-\uDFFF' # surrogate characters
720+
# Control characters; newline (\x0A and \x0D) and TAB (\x09) are legal
721+
'\x00-\x08\x0B\x0C\x0E-\x1F'
722+
# Surrogate characters
723+
'\uD800-\uDFFF'
724+
# Special Unicode characters
722725
'\uFFFE'
723726
'\uFFFF'
724-
']')
727+
# Match multiple sequential invalid characters for better effiency
728+
']+')
725729

726-
def _escape_xml_replace(regs):
727-
code_point = ord(regs[0])
728-
return f"&#{code_point};"
730+
def _sanitize_xml_replace(regs):
731+
text = regs[0]
732+
return ''.join(f'\\x{ord(ch):02x}' if ch <= '\xff' else ascii(ch)[1:-1]
733+
for ch in text)
729734

730-
def escape_xml(text):
731-
return ILLEGAL_XML_CHARS_RE.sub(_escape_xml_replace, text)
735+
def sanitize_xml(text):
736+
return ILLEGAL_XML_CHARS_RE.sub(_sanitize_xml_replace, text)

Lib/test/test_regrtest.py

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2292,7 +2292,7 @@ def test_failed(self):
22922292
self.assertEqual(testcase.get('result'), 'completed')
22932293
self.assertGreater(float(testcase.get('time')), 0)
22942294
for out in testcase.iter('system-out'):
2295-
self.assertEqual(out.text, "abc &#27; def")
2295+
self.assertEqual(out.text, r"abc \x1b def")
22962296

22972297

22982298
class TestUtils(unittest.TestCase):
@@ -2477,21 +2477,23 @@ def id(self):
24772477
self.assertTrue(match_test(test_chdir))
24782478
self.assertFalse(match_test(test_copy))
24792479

2480-
def test_escape_xml(self):
2481-
escape_xml = utils.escape_xml
2480+
def test_sanitize_xml(self):
2481+
sanitize_xml = utils.sanitize_xml
24822482

24832483
# escape invalid XML characters
2484-
self.assertEqual(escape_xml('abc \x1b def'),
2485-
'abc &#27; def')
2486-
self.assertEqual(escape_xml('nul:\x00, bell:\x07'),
2487-
'nul:&#0;, bell:&#7;')
2488-
self.assertEqual(escape_xml('surrogate:\uDC80'),
2489-
'surrogate:&#56448;')
2490-
self.assertEqual(escape_xml('illegal \uFFFE and \uFFFF'),
2491-
'illegal &#65534; and &#65535;')
2484+
self.assertEqual(sanitize_xml('abc \x1b\x1f def'),
2485+
r'abc \x1b\x1f def')
2486+
self.assertEqual(sanitize_xml('nul:\x00, bell:\x07'),
2487+
r'nul:\x00, bell:\x07')
2488+
self.assertEqual(sanitize_xml('surrogate:\uDC80'),
2489+
r'surrogate:\udc80')
2490+
self.assertEqual(sanitize_xml('illegal \uFFFE and \uFFFF'),
2491+
r'illegal \ufffe and \uffff')
24922492

24932493
# no escape for valid XML characters
2494-
self.assertEqual(escape_xml('valid t\xe9xt \u20ac'),
2494+
self.assertEqual(sanitize_xml('a\n\tb'),
2495+
'a\n\tb')
2496+
self.assertEqual(sanitize_xml('valid t\xe9xt \u20ac'),
24952497
'valid t\xe9xt \u20ac')
24962498

24972499

0 commit comments

Comments
 (0)