Skip to content

[3.7] bpo-36520: Email header folded incorrectly (GH-13608) #13910

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 11, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Lib/email/_header_value_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2683,6 +2683,7 @@ def _refold_parse_tree(parse_tree, *, policy):
newline = _steal_trailing_WSP_if_exists(lines)
if newline or part.startswith_fws():
lines.append(newline + tstr)
last_ew = None
continue
if not hasattr(part, 'encode'):
# It's not a terminal, try folding the subparts.
Expand Down
131 changes: 131 additions & 0 deletions Lib/test/test_email/test_message.py
Original file line number Diff line number Diff line change
Expand Up @@ -784,6 +784,137 @@ def test_str_defaults_to_utf8(self):
m['Subject'] = 'unicöde'
self.assertEqual(str(m), 'Subject: unicöde\n\n')

def test_folding_with_utf8_encoding_1(self):
# bpo-36520
#
# Fold a line that contains UTF-8 words before
# and after the whitespace fold point, where the
# line length limit is reached within an ASCII
# word.

m = EmailMessage()
m['Subject'] = 'Hello Wörld! Hello Wörld! ' \
'Hello Wörld! Hello Wörld!Hello Wörld!'
self.assertEqual(bytes(m),
b'Subject: Hello =?utf-8?q?W=C3=B6rld!_Hello_W'
b'=C3=B6rld!_Hello_W=C3=B6rld!?=\n'
b' Hello =?utf-8?q?W=C3=B6rld!Hello_W=C3=B6rld!?=\n\n')


def test_folding_with_utf8_encoding_2(self):
# bpo-36520
#
# Fold a line that contains UTF-8 words before
# and after the whitespace fold point, where the
# line length limit is reached at the end of an
# encoded word.

m = EmailMessage()
m['Subject'] = 'Hello Wörld! Hello Wörld! ' \
'Hello Wörlds123! Hello Wörld!Hello Wörld!'
self.assertEqual(bytes(m),
b'Subject: Hello =?utf-8?q?W=C3=B6rld!_Hello_W'
b'=C3=B6rld!_Hello_W=C3=B6rlds123!?=\n'
b' Hello =?utf-8?q?W=C3=B6rld!Hello_W=C3=B6rld!?=\n\n')

def test_folding_with_utf8_encoding_3(self):
# bpo-36520
#
# Fold a line that contains UTF-8 words before
# and after the whitespace fold point, where the
# line length limit is reached at the end of the
# first word.

m = EmailMessage()
m['Subject'] = 'Hello-Wörld!-Hello-Wörld!-Hello-Wörlds123! ' \
'Hello Wörld!Hello Wörld!'
self.assertEqual(bytes(m), \
b'Subject: =?utf-8?q?Hello-W=C3=B6rld!-Hello-W'
b'=C3=B6rld!-Hello-W=C3=B6rlds123!?=\n'
b' Hello =?utf-8?q?W=C3=B6rld!Hello_W=C3=B6rld!?=\n\n')

def test_folding_with_utf8_encoding_4(self):
# bpo-36520
#
# Fold a line that contains UTF-8 words before
# and after the fold point, where the first
# word is UTF-8 and the fold point is within
# the word.

m = EmailMessage()
m['Subject'] = 'Hello-Wörld!-Hello-Wörld!-Hello-Wörlds123!-Hello' \
' Wörld!Hello Wörld!'
self.assertEqual(bytes(m),
b'Subject: =?utf-8?q?Hello-W=C3=B6rld!-Hello-W'
b'=C3=B6rld!-Hello-W=C3=B6rlds123!?=\n'
b' =?utf-8?q?-Hello_W=C3=B6rld!Hello_W=C3=B6rld!?=\n\n')

def test_folding_with_utf8_encoding_5(self):
# bpo-36520
#
# Fold a line that contains a UTF-8 word after
# the fold point.

m = EmailMessage()
m['Subject'] = '123456789 123456789 123456789 123456789 123456789' \
' 123456789 123456789 Hello Wörld!'
self.assertEqual(bytes(m),
b'Subject: 123456789 123456789 123456789 123456789'
b' 123456789 123456789 123456789\n'
b' Hello =?utf-8?q?W=C3=B6rld!?=\n\n')

def test_folding_with_utf8_encoding_6(self):
# bpo-36520
#
# Fold a line that contains a UTF-8 word before
# the fold point and ASCII words after

m = EmailMessage()
m['Subject'] = '123456789 123456789 123456789 123456789 Hello Wörld!' \
' 123456789 123456789 123456789 123456789 123456789' \
' 123456789'
self.assertEqual(bytes(m),
b'Subject: 123456789 123456789 123456789 123456789'
b' Hello =?utf-8?q?W=C3=B6rld!?=\n 123456789 '
b'123456789 123456789 123456789 123456789 '
b'123456789\n\n')

def test_folding_with_utf8_encoding_7(self):
# bpo-36520
#
# Fold a line twice that contains UTF-8 words before
# and after the first fold point, and ASCII words
# after the second fold point.

m = EmailMessage()
m['Subject'] = '123456789 123456789 Hello Wörld! Hello Wörld! ' \
'123456789-123456789 123456789 Hello Wörld! 123456789' \
' 123456789'
self.assertEqual(bytes(m),
b'Subject: 123456789 123456789 Hello =?utf-8?q?'
b'W=C3=B6rld!_Hello_W=C3=B6rld!?=\n'
b' 123456789-123456789 123456789 Hello '
b'=?utf-8?q?W=C3=B6rld!?= 123456789\n 123456789\n\n')

def test_folding_with_utf8_encoding_8(self):
# bpo-36520
#
# Fold a line twice that contains UTF-8 words before
# the first fold point, and ASCII words after the
# first fold point, and UTF-8 words after the second
# fold point.

m = EmailMessage()
m['Subject'] = '123456789 123456789 Hello Wörld! Hello Wörld! ' \
'123456789 123456789 123456789 123456789 123456789 ' \
'123456789-123456789 123456789 Hello Wörld! 123456789' \
' 123456789'
self.assertEqual(bytes(m),
b'Subject: 123456789 123456789 Hello '
b'=?utf-8?q?W=C3=B6rld!_Hello_W=C3=B6rld!?=\n 123456789 '
b'123456789 123456789 123456789 123456789 '
b'123456789-123456789\n 123456789 Hello '
b'=?utf-8?q?W=C3=B6rld!?= 123456789 123456789\n\n')

class TestMIMEPart(TestEmailMessageBase, TestEmailBase):
# Doing the full test run here may seem a bit redundant, since the two
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Lengthy email headers with UTF-8 characters are now properly encoded when they are folded. Patch by Jeffrey Kintscher.