Skip to content

Commit 225e365

Browse files
[3.7] bpo-42103: Improve validation of Plist files. (GH-22882) (#23117)
* Prevent some possible DoS attacks via providing invalid Plist files with extremely large number of objects or collection sizes. * Raise InvalidFileException for too large bytes and string size instead of returning garbage. * Raise InvalidFileException instead of ValueError for specific invalid datetime (NaN). * Raise InvalidFileException instead of TypeError for non-hashable dict keys. * Add more tests for invalid Plist files.. (cherry picked from commit 34637a0) Co-authored-by: Serhiy Storchaka <[email protected]>
1 parent dd28047 commit 225e365

File tree

4 files changed

+367
-67
lines changed

4 files changed

+367
-67
lines changed

Lib/plistlib.py

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -568,7 +568,7 @@ def parse(self, fp):
568568
return self._read_object(top_object)
569569

570570
except (OSError, IndexError, struct.error, OverflowError,
571-
UnicodeDecodeError):
571+
ValueError):
572572
raise InvalidFileException()
573573

574574
def _get_size(self, tokenL):
@@ -584,7 +584,7 @@ def _get_size(self, tokenL):
584584
def _read_ints(self, n, size):
585585
data = self._fp.read(size * n)
586586
if size in _BINARY_FORMAT:
587-
return struct.unpack('>' + _BINARY_FORMAT[size] * n, data)
587+
return struct.unpack(f'>{n}{_BINARY_FORMAT[size]}', data)
588588
else:
589589
if not size or len(data) != size * n:
590590
raise InvalidFileException()
@@ -643,19 +643,25 @@ def _read_object(self, ref):
643643

644644
elif tokenH == 0x40: # data
645645
s = self._get_size(tokenL)
646-
if self._use_builtin_types:
647-
result = self._fp.read(s)
648-
else:
649-
result = Data(self._fp.read(s))
646+
result = self._fp.read(s)
647+
if len(result) != s:
648+
raise InvalidFileException()
649+
if not self._use_builtin_types:
650+
result = Data(result)
650651

651652
elif tokenH == 0x50: # ascii string
652653
s = self._get_size(tokenL)
653-
result = self._fp.read(s).decode('ascii')
654-
result = result
654+
data = self._fp.read(s)
655+
if len(data) != s:
656+
raise InvalidFileException()
657+
result = data.decode('ascii')
655658

656659
elif tokenH == 0x60: # unicode string
657-
s = self._get_size(tokenL)
658-
result = self._fp.read(s * 2).decode('utf-16be')
660+
s = self._get_size(tokenL) * 2
661+
data = self._fp.read(s)
662+
if len(data) != s:
663+
raise InvalidFileException()
664+
result = data.decode('utf-16be')
659665

660666
# tokenH == 0x80 is documented as 'UID' and appears to be used for
661667
# keyed-archiving, not in plists.
@@ -679,9 +685,11 @@ def _read_object(self, ref):
679685
obj_refs = self._read_refs(s)
680686
result = self._dict_type()
681687
self._objects[ref] = result
682-
for k, o in zip(key_refs, obj_refs):
683-
result[self._read_object(k)] = self._read_object(o)
684-
688+
try:
689+
for k, o in zip(key_refs, obj_refs):
690+
result[self._read_object(k)] = self._read_object(o)
691+
except TypeError:
692+
raise InvalidFileException()
685693
else:
686694
raise InvalidFileException()
687695

0 commit comments

Comments
 (0)