Skip to content

Commit 7b82281

Browse files
bpo-27397: Make email module properly handle invalid-length base64 strings (GH-7583) (GH-7664)
When attempting to base64-decode a payload of invalid length (1 mod 4), properly recognize and handle it. The given data will be returned as-is, i.e. not decoded, along with a new defect, InvalidBase64LengthDefect. (cherry picked from commit c3f55be) Co-authored-by: Tal Einat <[email protected]>
1 parent 66cea5c commit 7b82281

File tree

7 files changed

+70
-18
lines changed

7 files changed

+70
-18
lines changed

Doc/library/email.errors.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,3 +108,7 @@ All defect classes are subclassed from :class:`email.errors.MessageDefect`.
108108
* :class:`InvalidBase64CharactersDefect` -- When decoding a block of base64
109109
encoded bytes, characters outside the base64 alphabet were encountered.
110110
The characters are ignored, but the resulting decoded bytes may be invalid.
111+
112+
* :class:`InvalidBase64LengthDefect` -- When decoding a block of base64 encoded
113+
bytes, the number of non-padding base64 characters was invalid (1 more than
114+
a multiple of 4). The encoded block was kept as-is.

Lib/email/_encoded_words.py

Lines changed: 30 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -98,30 +98,42 @@ def len_q(bstring):
9898
#
9999

100100
def decode_b(encoded):
101-
defects = []
101+
# First try encoding with validate=True, fixing the padding if needed.
102+
# This will succeed only if encoded includes no invalid characters.
102103
pad_err = len(encoded) % 4
103-
if pad_err:
104-
defects.append(errors.InvalidBase64PaddingDefect())
105-
padded_encoded = encoded + b'==='[:4-pad_err]
106-
else:
107-
padded_encoded = encoded
104+
missing_padding = b'==='[:4-pad_err] if pad_err else b''
108105
try:
109-
return base64.b64decode(padded_encoded, validate=True), defects
106+
return (
107+
base64.b64decode(encoded + missing_padding, validate=True),
108+
[errors.InvalidBase64PaddingDefect()] if pad_err else [],
109+
)
110110
except binascii.Error:
111-
# Since we had correct padding, this must an invalid char error.
112-
defects = [errors.InvalidBase64CharactersDefect()]
111+
# Since we had correct padding, this is likely an invalid char error.
112+
#
113113
# The non-alphabet characters are ignored as far as padding
114-
# goes, but we don't know how many there are. So we'll just
115-
# try various padding lengths until something works.
116-
for i in 0, 1, 2, 3:
114+
# goes, but we don't know how many there are. So try without adding
115+
# padding to see if it works.
116+
try:
117+
return (
118+
base64.b64decode(encoded, validate=False),
119+
[errors.InvalidBase64CharactersDefect()],
120+
)
121+
except binascii.Error:
122+
# Add as much padding as could possibly be necessary (extra padding
123+
# is ignored).
117124
try:
118-
return base64.b64decode(encoded+b'='*i, validate=False), defects
125+
return (
126+
base64.b64decode(encoded + b'==', validate=False),
127+
[errors.InvalidBase64CharactersDefect(),
128+
errors.InvalidBase64PaddingDefect()],
129+
)
119130
except binascii.Error:
120-
if i==0:
121-
defects.append(errors.InvalidBase64PaddingDefect())
122-
else:
123-
# This should never happen.
124-
raise AssertionError("unexpected binascii.Error")
131+
# This only happens when the encoded string's length is 1 more
132+
# than a multiple of 4, which is invalid.
133+
#
134+
# bpo-27397: Just return the encoded string since there's no
135+
# way to decode.
136+
return encoded, [errors.InvalidBase64LengthDefect()]
125137

126138
def encode_b(bstring):
127139
return base64.b64encode(bstring).decode('ascii')

Lib/email/errors.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,9 @@ class InvalidBase64PaddingDefect(MessageDefect):
7373
class InvalidBase64CharactersDefect(MessageDefect):
7474
"""base64 encoded sequence had characters not in base64 alphabet"""
7575

76+
class InvalidBase64LengthDefect(MessageDefect):
77+
"""base64 encoded sequence had invalid length (1 mod 4)"""
78+
7679
# These errors are specific to header parsing.
7780

7881
class HeaderDefect(MessageDefect):

Lib/test/test_email/test__encoded_words.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,10 @@ def test_simple(self):
3333
self._test(b'Zm9v', b'foo')
3434

3535
def test_missing_padding(self):
36+
# 1 missing padding character
3637
self._test(b'dmk', b'vi', [errors.InvalidBase64PaddingDefect])
38+
# 2 missing padding characters
39+
self._test(b'dg', b'v', [errors.InvalidBase64PaddingDefect])
3740

3841
def test_invalid_character(self):
3942
self._test(b'dm\x01k===', b'vi', [errors.InvalidBase64CharactersDefect])
@@ -42,6 +45,9 @@ def test_invalid_character_and_bad_padding(self):
4245
self._test(b'dm\x01k', b'vi', [errors.InvalidBase64CharactersDefect,
4346
errors.InvalidBase64PaddingDefect])
4447

48+
def test_invalid_length(self):
49+
self._test(b'abcde', b'abcde', [errors.InvalidBase64LengthDefect])
50+
4551

4652
class TestDecode(TestEmailBase):
4753

Lib/test/test_email/test__header_value_parser.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,15 @@ def test_get_unstructured_invalid_base64_character_and_bad_padding(self):
347347
errors.InvalidBase64PaddingDefect],
348348
'')
349349

350+
def test_get_unstructured_invalid_base64_length(self):
351+
# bpo-27397: Return the encoded string since there's no way to decode.
352+
self._test_get_x(self._get_unst,
353+
'=?utf-8?b?abcde?=',
354+
'abcde',
355+
'abcde',
356+
[errors.InvalidBase64LengthDefect],
357+
'')
358+
350359
def test_get_unstructured_no_whitespace_between_ews(self):
351360
self._test_get_x(self._get_unst,
352361
'=?utf-8?q?foo?==?utf-8?q?bar?=',

Lib/test/test_email/test_defect_handling.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,23 @@ def test_invalid_chars_in_base64_payload(self):
254254
self.assertDefectsEqual(self.get_defects(msg),
255255
[errors.InvalidBase64CharactersDefect])
256256

257+
def test_invalid_length_of_base64_payload(self):
258+
source = textwrap.dedent("""\
259+
Subject: test
260+
MIME-Version: 1.0
261+
Content-Type: text/plain; charset="utf-8"
262+
Content-Transfer-Encoding: base64
263+
264+
abcde
265+
""")
266+
msg = self._str_msg(source)
267+
with self._raise_point(errors.InvalidBase64LengthDefect):
268+
payload = msg.get_payload(decode=True)
269+
if self.raise_expected: return
270+
self.assertEqual(payload, b'abcde')
271+
self.assertDefectsEqual(self.get_defects(msg),
272+
[errors.InvalidBase64LengthDefect])
273+
257274
def test_missing_ending_boundary(self):
258275
source = textwrap.dedent("""\
259276
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Make email module properly handle invalid-length base64 strings.

0 commit comments

Comments
 (0)