Skip to content

bpo-27397: Make email module properly handle invalid-length base64 strings #7583

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jun 12, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Doc/library/email.errors.rst
Original file line number Diff line number Diff line change
Expand Up @@ -108,3 +108,7 @@ All defect classes are subclassed from :class:`email.errors.MessageDefect`.
* :class:`InvalidBase64CharactersDefect` -- When decoding a block of base64
encoded bytes, characters outside the base64 alphabet were encountered.
The characters are ignored, but the resulting decoded bytes may be invalid.

* :class:`InvalidBase64LengthDefect` -- When decoding a block of base64 encoded
bytes, the number of non-padding base64 characters was invalid (1 more than
a multiple of 4). The encoded block was kept as-is.
48 changes: 30 additions & 18 deletions Lib/email/_encoded_words.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,30 +98,42 @@ def len_q(bstring):
#

def decode_b(encoded):
defects = []
# First try encoding with validate=True, fixing the padding if needed.
# This will succeed only if encoded includes no invalid characters.
pad_err = len(encoded) % 4
if pad_err:
defects.append(errors.InvalidBase64PaddingDefect())
padded_encoded = encoded + b'==='[:4-pad_err]
else:
padded_encoded = encoded
missing_padding = b'==='[:4-pad_err] if pad_err else b''
try:
return base64.b64decode(padded_encoded, validate=True), defects
return (
base64.b64decode(encoded + missing_padding, validate=True),
[errors.InvalidBase64PaddingDefect()] if pad_err else [],
)
except binascii.Error:
# Since we had correct padding, this must an invalid char error.
defects = [errors.InvalidBase64CharactersDefect()]
# Since we had correct padding, this is likely an invalid char error.
#
# The non-alphabet characters are ignored as far as padding
# goes, but we don't know how many there are. So we'll just
# try various padding lengths until something works.
for i in 0, 1, 2, 3:
# goes, but we don't know how many there are. So try without adding
# padding to see if it works.
try:
return (
base64.b64decode(encoded, validate=False),
[errors.InvalidBase64CharactersDefect()],
)
except binascii.Error:
# Add as much padding as could possibly be necessary (extra padding
# is ignored).
try:
return base64.b64decode(encoded+b'='*i, validate=False), defects
return (
base64.b64decode(encoded + b'==', validate=False),
[errors.InvalidBase64CharactersDefect(),
errors.InvalidBase64PaddingDefect()],
)
except binascii.Error:
if i==0:
defects.append(errors.InvalidBase64PaddingDefect())
else:
# This should never happen.
raise AssertionError("unexpected binascii.Error")
# This only happens when the encoded string's length is 1 more
# than a multiple of 4, which is invalid.
#
# bpo-27397: Just return the encoded string since there's no
# way to decode.
return encoded, [errors.InvalidBase64LengthDefect()]

def encode_b(bstring):
return base64.b64encode(bstring).decode('ascii')
Expand Down
3 changes: 3 additions & 0 deletions Lib/email/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ class InvalidBase64PaddingDefect(MessageDefect):
class InvalidBase64CharactersDefect(MessageDefect):
"""base64 encoded sequence had characters not in base64 alphabet"""

class InvalidBase64LengthDefect(MessageDefect):
"""base64 encoded sequence had invalid length (1 mod 4)"""

# These errors are specific to header parsing.

class HeaderDefect(MessageDefect):
Expand Down
6 changes: 6 additions & 0 deletions Lib/test/test_email/test__encoded_words.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,10 @@ def test_simple(self):
self._test(b'Zm9v', b'foo')

def test_missing_padding(self):
# 1 missing padding character
self._test(b'dmk', b'vi', [errors.InvalidBase64PaddingDefect])
# 2 missing padding characters
self._test(b'dg', b'v', [errors.InvalidBase64PaddingDefect])

def test_invalid_character(self):
self._test(b'dm\x01k===', b'vi', [errors.InvalidBase64CharactersDefect])
Expand All @@ -42,6 +45,9 @@ def test_invalid_character_and_bad_padding(self):
self._test(b'dm\x01k', b'vi', [errors.InvalidBase64CharactersDefect,
errors.InvalidBase64PaddingDefect])

def test_invalid_length(self):
self._test(b'abcde', b'abcde', [errors.InvalidBase64LengthDefect])


class TestDecode(TestEmailBase):

Expand Down
9 changes: 9 additions & 0 deletions Lib/test/test_email/test__header_value_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,15 @@ def test_get_unstructured_invalid_base64_character_and_bad_padding(self):
errors.InvalidBase64PaddingDefect],
'')

def test_get_unstructured_invalid_base64_length(self):
# bpo-27397: Return the encoded string since there's no way to decode.
self._test_get_x(self._get_unst,
'=?utf-8?b?abcde?=',
'abcde',
'abcde',
[errors.InvalidBase64LengthDefect],
'')

def test_get_unstructured_no_whitespace_between_ews(self):
self._test_get_x(self._get_unst,
'=?utf-8?q?foo?==?utf-8?q?bar?=',
Expand Down
17 changes: 17 additions & 0 deletions Lib/test/test_email/test_defect_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,23 @@ def test_invalid_chars_in_base64_payload(self):
self.assertDefectsEqual(self.get_defects(msg),
[errors.InvalidBase64CharactersDefect])

def test_invalid_length_of_base64_payload(self):
source = textwrap.dedent("""\
Subject: test
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: base64

abcde
""")
msg = self._str_msg(source)
with self._raise_point(errors.InvalidBase64LengthDefect):
payload = msg.get_payload(decode=True)
if self.raise_expected: return
self.assertEqual(payload, b'abcde')
self.assertDefectsEqual(self.get_defects(msg),
[errors.InvalidBase64LengthDefect])

def test_missing_ending_boundary(self):
source = textwrap.dedent("""\
To: [email protected]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Make email module properly handle invalid-length base64 strings.