Skip to content

Commit 303aac8

Browse files
authored
bpo-30681: Support invalid date format or value in email Date header (GH-22090)
I am re-submitting an older PR which was abandoned but is still relevant, #10783 by @timb07. The issue being solved () is still relevant. The original PR #10783 was closed as the final request changes were not applied and since abandoned. In this new PR I have re-used the original patch plus applied both comments from the review, by @maxking and @pganssle. For reference, here is the original PR description: In email.utils.parsedate_to_datetime(), a failure to parse the date, or invalid date components (such as hour outside 0..23) raises an exception. Document this behaviour, and add tests to test_email/test_utils.py to confirm this behaviour. In email.headerregistry.DateHeader.parse(), check when parsedate_to_datetime() raises an exception and add a new defect InvalidDateDefect; preserve the invalid value as the string value of the header, but set the datetime attribute to None. Add tests to test_email/test_headerregistry.py to confirm this behaviour; also added test to test_email/test_inversion.py to confirm emails with such defective date headers round trip successfully. This pull request incorporates feedback gratefully received from @bitdancer, @brettcannon, @Mariatta and @warsaw, and replaces the earlier PR #2254. Automerge-Triggered-By: GH:warsaw
1 parent 8e3b9f9 commit 303aac8

File tree

10 files changed

+59
-5
lines changed

10 files changed

+59
-5
lines changed

Doc/library/email.errors.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,3 +112,6 @@ All defect classes are subclassed from :class:`email.errors.MessageDefect`.
112112
* :class:`InvalidBase64LengthDefect` -- When decoding a block of base64 encoded
113113
bytes, the number of non-padding base64 characters was invalid (1 more than
114114
a multiple of 4). The encoded block was kept as-is.
115+
116+
* :class:`InvalidDateDefect` -- When decoding an invalid or unparsable date field.
117+
The original value is kept as-is.

Doc/library/email.utils.rst

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,8 +124,10 @@ of the new API.
124124
.. function:: parsedate_to_datetime(date)
125125

126126
The inverse of :func:`format_datetime`. Performs the same function as
127-
:func:`parsedate`, but on success returns a :mod:`~datetime.datetime`. If
128-
the input date has a timezone of ``-0000``, the ``datetime`` will be a naive
127+
:func:`parsedate`, but on success returns a :mod:`~datetime.datetime`;
128+
otherwise ``ValueError`` is raised if *date* contains an invalid value such
129+
as an hour greater than 23 or a timezone offset not between -24 and 24 hours.
130+
If the input date has a timezone of ``-0000``, the ``datetime`` will be a naive
129131
``datetime``, and if the date is conforming to the RFCs it will represent a
130132
time in UTC but with no indication of the actual source timezone of the
131133
message the date comes from. If the input date has any other valid timezone

Lib/email/_parseaddr.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ def _parsedate_tz(data):
6565
6666
"""
6767
if not data:
68-
return
68+
return None
6969
data = data.split()
7070
# The FWS after the comma after the day-of-week is optional, so search and
7171
# adjust for this.

Lib/email/errors.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,3 +108,6 @@ class NonASCIILocalPartDefect(HeaderDefect):
108108
"""local_part contains non-ASCII characters"""
109109
# This defect only occurs during unicode parsing, not when
110110
# parsing messages decoded from binary.
111+
112+
class InvalidDateDefect(HeaderDefect):
113+
"""Header has unparseable or invalid date"""

Lib/email/headerregistry.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,14 @@ def parse(cls, value, kwds):
302302
kwds['parse_tree'] = parser.TokenList()
303303
return
304304
if isinstance(value, str):
305-
value = utils.parsedate_to_datetime(value)
305+
kwds['decoded'] = value
306+
try:
307+
value = utils.parsedate_to_datetime(value)
308+
except ValueError:
309+
kwds['defects'].append(errors.InvalidDateDefect('Invalid date value or format'))
310+
kwds['datetime'] = None
311+
kwds['parse_tree'] = parser.TokenList()
312+
return
306313
kwds['datetime'] = value
307314
kwds['decoded'] = utils.format_datetime(kwds['datetime'])
308315
kwds['parse_tree'] = cls.value_parser(kwds['decoded'])

Lib/email/utils.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,10 @@ def make_msgid(idstring=None, domain=None):
195195

196196

197197
def parsedate_to_datetime(data):
198-
*dtuple, tz = _parsedate_tz(data)
198+
parsed_date_tz = _parsedate_tz(data)
199+
if parsed_date_tz is None:
200+
raise ValueError('Invalid date value or format "%s"' % str(data))
201+
*dtuple, tz = parsed_date_tz
199202
if tz is None:
200203
return datetime.datetime(*dtuple[:6])
201204
return datetime.datetime(*dtuple[:6],

Lib/test/test_email/test_headerregistry.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,22 @@ def test_no_value_is_defect(self):
204204
self.assertEqual(len(h.defects), 1)
205205
self.assertIsInstance(h.defects[0], errors.HeaderMissingRequiredValue)
206206

207+
def test_invalid_date_format(self):
208+
s = 'Not a date header'
209+
h = self.make_header('date', s)
210+
self.assertEqual(h, s)
211+
self.assertIsNone(h.datetime)
212+
self.assertEqual(len(h.defects), 1)
213+
self.assertIsInstance(h.defects[0], errors.InvalidDateDefect)
214+
215+
def test_invalid_date_value(self):
216+
s = 'Tue, 06 Jun 2017 27:39:33 +0600'
217+
h = self.make_header('date', s)
218+
self.assertEqual(h, s)
219+
self.assertIsNone(h.datetime)
220+
self.assertEqual(len(h.defects), 1)
221+
self.assertIsInstance(h.defects[0], errors.InvalidDateDefect)
222+
207223
def test_datetime_read_only(self):
208224
h = self.make_header('date', self.datestring)
209225
with self.assertRaises(AttributeError):

Lib/test/test_email/test_inversion.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,14 @@ def msg_as_input(self, msg):
4646
foo
4747
"""),),
4848

49+
'header_with_invalid_date': (dedent(b"""\
50+
Date: Tue, 06 Jun 2017 27:39:33 +0600
51+
52+
Subject: timezones
53+
54+
How do they work even?
55+
"""),),
56+
4957
}
5058

5159
payload_params = {

Lib/test/test_email/test_utils.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,16 @@ def test_parsedate_to_datetime_naive(self):
4848
utils.parsedate_to_datetime(self.datestring + ' -0000'),
4949
self.naive_dt)
5050

51+
def test_parsedate_to_datetime_with_invalid_raises_valueerror(self):
52+
invalid_dates = ['',
53+
'0',
54+
'A Complete Waste of Time'
55+
'Tue, 06 Jun 2017 27:39:33 +0600',
56+
'Tue, 06 Jun 2017 07:39:33 +2600',
57+
'Tue, 06 Jun 2017 27:39:33']
58+
for dtstr in invalid_dates:
59+
with self.subTest(dtstr=dtstr):
60+
self.assertRaises(ValueError, utils.parsedate_to_datetime, dtstr)
5161

5262
class LocaltimeTests(unittest.TestCase):
5363

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Handle exceptions caused by unparseable date headers when using email
2+
"default" policy. Patch by Tim Bell, Georges Toth

0 commit comments

Comments
 (0)