Skip to content

Commit 36eea7a

Browse files
maxkingmiss-islington
authored andcommitted
[3.8] bpo-36520: Email header folded incorrectly (GH-13608) (GH-13909)
* [bpo-36520](https://bugs.python.org/issue36520): reset the encoded word offset when starting a new line during an email header folding operation * 📜🤖 Added by blurb_it. * [bpo-36520](https://bugs.python.org/issue36520): add an additional test case, and provide descriptive comments for the test_folding_with_utf8_encoding_* tests * [bpo-36520](https://bugs.python.org/issue36520): fix whitespace issue * [bpo-36520](https://bugs.python.org/issue36520): changes per reviewer request -- remove extraneous backslashes; add whitespace between terminating quotes and line-continuation backslashes; use "bpo-" instead of "issue GH-" in comments (cherry picked from commit f6713e8) Co-authored-by: websurfer5 <[email protected]> https://bugs.python.org/issue36520
1 parent 36926df commit 36eea7a

File tree

3 files changed

+133
-0
lines changed

3 files changed

+133
-0
lines changed

Lib/email/_header_value_parser.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2768,6 +2768,7 @@ def _refold_parse_tree(parse_tree, *, policy):
27682768
newline = _steal_trailing_WSP_if_exists(lines)
27692769
if newline or part.startswith_fws():
27702770
lines.append(newline + tstr)
2771+
last_ew = None
27712772
continue
27722773
if not hasattr(part, 'encode'):
27732774
# It's not a terminal, try folding the subparts.

Lib/test/test_email/test_message.py

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -784,6 +784,137 @@ def test_str_defaults_to_utf8(self):
784784
m['Subject'] = 'unicöde'
785785
self.assertEqual(str(m), 'Subject: unicöde\n\n')
786786

787+
def test_folding_with_utf8_encoding_1(self):
788+
# bpo-36520
789+
#
790+
# Fold a line that contains UTF-8 words before
791+
# and after the whitespace fold point, where the
792+
# line length limit is reached within an ASCII
793+
# word.
794+
795+
m = EmailMessage()
796+
m['Subject'] = 'Hello Wörld! Hello Wörld! ' \
797+
'Hello Wörld! Hello Wörld!Hello Wörld!'
798+
self.assertEqual(bytes(m),
799+
b'Subject: Hello =?utf-8?q?W=C3=B6rld!_Hello_W'
800+
b'=C3=B6rld!_Hello_W=C3=B6rld!?=\n'
801+
b' Hello =?utf-8?q?W=C3=B6rld!Hello_W=C3=B6rld!?=\n\n')
802+
803+
804+
def test_folding_with_utf8_encoding_2(self):
805+
# bpo-36520
806+
#
807+
# Fold a line that contains UTF-8 words before
808+
# and after the whitespace fold point, where the
809+
# line length limit is reached at the end of an
810+
# encoded word.
811+
812+
m = EmailMessage()
813+
m['Subject'] = 'Hello Wörld! Hello Wörld! ' \
814+
'Hello Wörlds123! Hello Wörld!Hello Wörld!'
815+
self.assertEqual(bytes(m),
816+
b'Subject: Hello =?utf-8?q?W=C3=B6rld!_Hello_W'
817+
b'=C3=B6rld!_Hello_W=C3=B6rlds123!?=\n'
818+
b' Hello =?utf-8?q?W=C3=B6rld!Hello_W=C3=B6rld!?=\n\n')
819+
820+
def test_folding_with_utf8_encoding_3(self):
821+
# bpo-36520
822+
#
823+
# Fold a line that contains UTF-8 words before
824+
# and after the whitespace fold point, where the
825+
# line length limit is reached at the end of the
826+
# first word.
827+
828+
m = EmailMessage()
829+
m['Subject'] = 'Hello-Wörld!-Hello-Wörld!-Hello-Wörlds123! ' \
830+
'Hello Wörld!Hello Wörld!'
831+
self.assertEqual(bytes(m), \
832+
b'Subject: =?utf-8?q?Hello-W=C3=B6rld!-Hello-W'
833+
b'=C3=B6rld!-Hello-W=C3=B6rlds123!?=\n'
834+
b' Hello =?utf-8?q?W=C3=B6rld!Hello_W=C3=B6rld!?=\n\n')
835+
836+
def test_folding_with_utf8_encoding_4(self):
837+
# bpo-36520
838+
#
839+
# Fold a line that contains UTF-8 words before
840+
# and after the fold point, where the first
841+
# word is UTF-8 and the fold point is within
842+
# the word.
843+
844+
m = EmailMessage()
845+
m['Subject'] = 'Hello-Wörld!-Hello-Wörld!-Hello-Wörlds123!-Hello' \
846+
' Wörld!Hello Wörld!'
847+
self.assertEqual(bytes(m),
848+
b'Subject: =?utf-8?q?Hello-W=C3=B6rld!-Hello-W'
849+
b'=C3=B6rld!-Hello-W=C3=B6rlds123!?=\n'
850+
b' =?utf-8?q?-Hello_W=C3=B6rld!Hello_W=C3=B6rld!?=\n\n')
851+
852+
def test_folding_with_utf8_encoding_5(self):
853+
# bpo-36520
854+
#
855+
# Fold a line that contains a UTF-8 word after
856+
# the fold point.
857+
858+
m = EmailMessage()
859+
m['Subject'] = '123456789 123456789 123456789 123456789 123456789' \
860+
' 123456789 123456789 Hello Wörld!'
861+
self.assertEqual(bytes(m),
862+
b'Subject: 123456789 123456789 123456789 123456789'
863+
b' 123456789 123456789 123456789\n'
864+
b' Hello =?utf-8?q?W=C3=B6rld!?=\n\n')
865+
866+
def test_folding_with_utf8_encoding_6(self):
867+
# bpo-36520
868+
#
869+
# Fold a line that contains a UTF-8 word before
870+
# the fold point and ASCII words after
871+
872+
m = EmailMessage()
873+
m['Subject'] = '123456789 123456789 123456789 123456789 Hello Wörld!' \
874+
' 123456789 123456789 123456789 123456789 123456789' \
875+
' 123456789'
876+
self.assertEqual(bytes(m),
877+
b'Subject: 123456789 123456789 123456789 123456789'
878+
b' Hello =?utf-8?q?W=C3=B6rld!?=\n 123456789 '
879+
b'123456789 123456789 123456789 123456789 '
880+
b'123456789\n\n')
881+
882+
def test_folding_with_utf8_encoding_7(self):
883+
# bpo-36520
884+
#
885+
# Fold a line twice that contains UTF-8 words before
886+
# and after the first fold point, and ASCII words
887+
# after the second fold point.
888+
889+
m = EmailMessage()
890+
m['Subject'] = '123456789 123456789 Hello Wörld! Hello Wörld! ' \
891+
'123456789-123456789 123456789 Hello Wörld! 123456789' \
892+
' 123456789'
893+
self.assertEqual(bytes(m),
894+
b'Subject: 123456789 123456789 Hello =?utf-8?q?'
895+
b'W=C3=B6rld!_Hello_W=C3=B6rld!?=\n'
896+
b' 123456789-123456789 123456789 Hello '
897+
b'=?utf-8?q?W=C3=B6rld!?= 123456789\n 123456789\n\n')
898+
899+
def test_folding_with_utf8_encoding_8(self):
900+
# bpo-36520
901+
#
902+
# Fold a line twice that contains UTF-8 words before
903+
# the first fold point, and ASCII words after the
904+
# first fold point, and UTF-8 words after the second
905+
# fold point.
906+
907+
m = EmailMessage()
908+
m['Subject'] = '123456789 123456789 Hello Wörld! Hello Wörld! ' \
909+
'123456789 123456789 123456789 123456789 123456789 ' \
910+
'123456789-123456789 123456789 Hello Wörld! 123456789' \
911+
' 123456789'
912+
self.assertEqual(bytes(m),
913+
b'Subject: 123456789 123456789 Hello '
914+
b'=?utf-8?q?W=C3=B6rld!_Hello_W=C3=B6rld!?=\n 123456789 '
915+
b'123456789 123456789 123456789 123456789 '
916+
b'123456789-123456789\n 123456789 Hello '
917+
b'=?utf-8?q?W=C3=B6rld!?= 123456789 123456789\n\n')
787918

788919
class TestMIMEPart(TestEmailMessageBase, TestEmailBase):
789920
# Doing the full test run here may seem a bit redundant, since the two
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Lengthy email headers with UTF-8 characters are now properly encoded when they are folded. Patch by Jeffrey Kintscher.

0 commit comments

Comments
 (0)