Skip to content

Commit aec1dac

Browse files
gh-117313: Fix re-folding email messages containing non-standard line separators (GH-117369)
Only treat '\n', '\r' and '\r\n' as line separators in re-folding the email messages. Preserve control characters '\v', '\f', '\x1c', '\x1d' and '\x1e' and Unicode line separators '\x85', '\u2028' and '\u2029' as is.
1 parent 4e502a4 commit aec1dac

File tree

3 files changed

+56
-2
lines changed

3 files changed

+56
-2
lines changed

Lib/email/policy.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
'HTTP',
2222
]
2323

24-
linesep_splitter = re.compile(r'\n|\r')
24+
linesep_splitter = re.compile(r'\n|\r\n?')
2525

2626
@_extend_docstrings
2727
class EmailPolicy(Policy):
@@ -205,7 +205,8 @@ def _fold(self, name, value, refold_binary=False):
205205
if hasattr(value, 'name'):
206206
return value.fold(policy=self)
207207
maxlen = self.max_line_length if self.max_line_length else sys.maxsize
208-
lines = value.splitlines()
208+
# We can't use splitlines here because it splits on more than \r and \n.
209+
lines = linesep_splitter.split(value)
209210
refold = (self.refold_source == 'all' or
210211
self.refold_source == 'long' and
211212
(lines and len(lines[0])+len(name)+2 > maxlen or

Lib/test/test_email/test_generator.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,39 @@ def test_flatten_linesep_overrides_policy(self):
140140
g.flatten(msg, linesep='\n')
141141
self.assertEqual(s.getvalue(), self.typ(expected))
142142

143+
def test_flatten_linesep(self):
144+
source = 'Subject: one\n two\r three\r\n four\r\n\r\ntest body\r\n'
145+
msg = self.msgmaker(self.typ(source))
146+
self.assertEqual(msg['Subject'], 'one two three four')
147+
148+
expected = 'Subject: one\n two\n three\n four\n\ntest body\n'
149+
s = self.ioclass()
150+
g = self.genclass(s)
151+
g.flatten(msg)
152+
self.assertEqual(s.getvalue(), self.typ(expected))
153+
154+
expected = 'Subject: one two three four\n\ntest body\n'
155+
s = self.ioclass()
156+
g = self.genclass(s, policy=self.policy.clone(refold_source='all'))
157+
g.flatten(msg)
158+
self.assertEqual(s.getvalue(), self.typ(expected))
159+
160+
def test_flatten_control_linesep(self):
161+
source = 'Subject: one\v two\f three\x1c four\x1d five\x1e six\r\n\r\ntest body\r\n'
162+
msg = self.msgmaker(self.typ(source))
163+
self.assertEqual(msg['Subject'], 'one\v two\f three\x1c four\x1d five\x1e six')
164+
165+
expected = 'Subject: one\v two\f three\x1c four\x1d five\x1e six\n\ntest body\n'
166+
s = self.ioclass()
167+
g = self.genclass(s)
168+
g.flatten(msg)
169+
self.assertEqual(s.getvalue(), self.typ(expected))
170+
171+
s = self.ioclass()
172+
g = self.genclass(s, policy=self.policy.clone(refold_source='all'))
173+
g.flatten(msg)
174+
self.assertEqual(s.getvalue(), self.typ(expected))
175+
143176
def test_set_mangle_from_via_policy(self):
144177
source = textwrap.dedent("""\
145178
Subject: test that
@@ -224,6 +257,22 @@ class TestGenerator(TestGeneratorBase, TestEmailBase):
224257
ioclass = io.StringIO
225258
typ = str
226259

260+
def test_flatten_unicode_linesep(self):
261+
source = 'Subject: one\x85 two\u2028 three\u2029 four\r\n\r\ntest body\r\n'
262+
msg = self.msgmaker(self.typ(source))
263+
self.assertEqual(msg['Subject'], 'one\x85 two\u2028 three\u2029 four')
264+
265+
expected = 'Subject: =?utf-8?b?b25lwoUgdHdv4oCoIHRocmVl4oCp?= four\n\ntest body\n'
266+
s = self.ioclass()
267+
g = self.genclass(s)
268+
g.flatten(msg)
269+
self.assertEqual(s.getvalue(), self.typ(expected))
270+
271+
s = self.ioclass()
272+
g = self.genclass(s, policy=self.policy.clone(refold_source='all'))
273+
g.flatten(msg)
274+
self.assertEqual(s.getvalue(), self.typ(expected))
275+
227276

228277
class TestBytesGenerator(TestGeneratorBase, TestEmailBase):
229278

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Only treat ``'\n'``, ``'\r'`` and ``'\r\n'`` as line separators in
2+
re-folding the :mod:`email` messages. Preserve control characters ``'\v'``,
3+
``'\f'``, ``'\x1c'``, ``'\x1d'`` and ``'\x1e'`` and Unicode line separators
4+
``'\x85'``, ``'\u2028'`` and ``'\u2029'`` as is.

0 commit comments

Comments
 (0)