Skip to content

[3.12] bpo-40944: Fix IndexError when parse emails with truncated Message-ID, address, routes, etc (GH-20790) #117974

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions Lib/email/_header_value_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -1213,7 +1213,7 @@ def get_bare_quoted_string(value):
value is the text between the quote marks, with whitespace
preserved and quoted pairs decoded.
"""
if value[0] != '"':
if not value or value[0] != '"':
raise errors.HeaderParseError(
"expected '\"' but found '{}'".format(value))
bare_quoted_string = BareQuotedString()
Expand Down Expand Up @@ -1454,7 +1454,7 @@ def get_local_part(value):
"""
local_part = LocalPart()
leader = None
if value[0] in CFWS_LEADER:
if value and value[0] in CFWS_LEADER:
leader, value = get_cfws(value)
if not value:
raise errors.HeaderParseError(
Expand Down Expand Up @@ -1613,7 +1613,7 @@ def get_domain(value):
"""
domain = Domain()
leader = None
if value[0] in CFWS_LEADER:
if value and value[0] in CFWS_LEADER:
leader, value = get_cfws(value)
if not value:
raise errors.HeaderParseError(
Expand Down Expand Up @@ -1689,6 +1689,8 @@ def get_obs_route(value):
if value[0] in CFWS_LEADER:
token, value = get_cfws(value)
obs_route.append(token)
if not value:
break
if value[0] == '@':
obs_route.append(RouteComponentMarker)
token, value = get_domain(value[1:])
Expand All @@ -1707,7 +1709,7 @@ def get_angle_addr(value):

"""
angle_addr = AngleAddr()
if value[0] in CFWS_LEADER:
if value and value[0] in CFWS_LEADER:
token, value = get_cfws(value)
angle_addr.append(token)
if not value or value[0] != '<':
Expand All @@ -1717,7 +1719,7 @@ def get_angle_addr(value):
value = value[1:]
# Although it is not legal per RFC5322, SMTP uses '<>' in certain
# circumstances.
if value[0] == '>':
if value and value[0] == '>':
angle_addr.append(ValueTerminal('>', 'angle-addr-end'))
angle_addr.defects.append(errors.InvalidHeaderDefect(
"null addr-spec in angle-addr"))
Expand Down Expand Up @@ -1769,6 +1771,9 @@ def get_name_addr(value):
name_addr = NameAddr()
# Both the optional display name and the angle-addr can start with cfws.
leader = None
if not value:
raise errors.HeaderParseError(
"expected name-addr but found '{}'".format(value))
if value[0] in CFWS_LEADER:
leader, value = get_cfws(value)
if not value:
Expand Down
40 changes: 40 additions & 0 deletions Lib/test/test_email/test__header_value_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -801,6 +801,10 @@ def test_get_quoted_string_header_ends_in_qcontent(self):
self.assertEqual(qs.content, 'bob')
self.assertEqual(qs.quoted_value, ' "bob"')

def test_get_quoted_string_cfws_only_raises(self):
with self.assertRaises(errors.HeaderParseError):
parser.get_quoted_string(' (foo) ')

def test_get_quoted_string_no_quoted_string(self):
with self.assertRaises(errors.HeaderParseError):
parser.get_quoted_string(' (ab) xyz')
Expand Down Expand Up @@ -1135,6 +1139,10 @@ def test_get_local_part_complex_obsolete_invalid(self):
'@python.org')
self.assertEqual(local_part.local_part, 'Fred.A.Johnson and dogs')

def test_get_local_part_empty_raises(self):
with self.assertRaises(errors.HeaderParseError):
parser.get_local_part('')

def test_get_local_part_no_part_raises(self):
with self.assertRaises(errors.HeaderParseError):
parser.get_local_part(' (foo) ')
Expand Down Expand Up @@ -1387,6 +1395,10 @@ def test_get_domain_obsolete(self):
'')
self.assertEqual(domain.domain, 'example.com')

def test_get_domain_empty_raises(self):
with self.assertRaises(errors.HeaderParseError):
parser.get_domain("")

def test_get_domain_no_non_cfws_raises(self):
with self.assertRaises(errors.HeaderParseError):
parser.get_domain(" (foo)\t")
Expand Down Expand Up @@ -1512,6 +1524,10 @@ def test_get_obs_route_no_route_before_end_raises(self):
with self.assertRaises(errors.HeaderParseError):
parser.get_obs_route('(foo) @example.com,')

def test_get_obs_route_no_route_before_end_raises2(self):
with self.assertRaises(errors.HeaderParseError):
parser.get_obs_route('(foo) @example.com, (foo) ')

def test_get_obs_route_no_route_before_special_raises(self):
with self.assertRaises(errors.HeaderParseError):
parser.get_obs_route('(foo) [abc],')
Expand All @@ -1520,6 +1536,14 @@ def test_get_obs_route_no_route_before_special_raises2(self):
with self.assertRaises(errors.HeaderParseError):
parser.get_obs_route('(foo) @example.com [abc],')

def test_get_obs_route_no_domain_after_at_raises(self):
with self.assertRaises(errors.HeaderParseError):
parser.get_obs_route('@')

def test_get_obs_route_no_domain_after_at_raises2(self):
with self.assertRaises(errors.HeaderParseError):
parser.get_obs_route('@example.com, @')

# get_angle_addr

def test_get_angle_addr_simple(self):
Expand Down Expand Up @@ -1646,6 +1670,14 @@ def test_get_angle_addr_ends_at_special(self):
self.assertIsNone(angle_addr.route)
self.assertEqual(angle_addr.addr_spec, '[email protected]')

def test_get_angle_addr_empty_raise(self):
with self.assertRaises(errors.HeaderParseError):
parser.get_angle_addr('')

def test_get_angle_addr_left_angle_only_raise(self):
with self.assertRaises(errors.HeaderParseError):
parser.get_angle_addr('<')

def test_get_angle_addr_no_angle_raise(self):
with self.assertRaises(errors.HeaderParseError):
parser.get_angle_addr('(foo) ')
Expand Down Expand Up @@ -1857,6 +1889,10 @@ def test_get_name_addr_ends_at_special(self):
self.assertIsNone(name_addr.route)
self.assertEqual(name_addr.addr_spec, '[email protected]')

def test_get_name_addr_empty_raises(self):
with self.assertRaises(errors.HeaderParseError):
parser.get_name_addr('')

def test_get_name_addr_no_content_raises(self):
with self.assertRaises(errors.HeaderParseError):
parser.get_name_addr(' (foo) ')
Expand Down Expand Up @@ -2732,6 +2768,10 @@ def test_get_msg_id_empty_id_right(self):
with self.assertRaises(errors.HeaderParseError):
parser.get_msg_id("<simplelocal@>")

def test_get_msg_id_no_id_right(self):
with self.assertRaises(errors.HeaderParseError):
parser.get_msg_id("<simplelocal@")

def test_get_msg_id_with_brackets(self):
# Microsof Outlook generates non-standard one-off addresses:
# https://learn.microsoft.com/en-us/office/client-developer/outlook/mapi/one-off-addresses
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix several IndexError when parse emails with truncated Message-ID, address, routes, etc, e.g. ``example@``.