Skip to content

Commit 3e72de9

Browse files
[3.8] bpo-39667: Sync zipp 3.0 (GH-18540) (GH-18701)
* bpo-39667: Sync zipp 3.0 (GH-18540) * bpo-39667: Improve pathlib.Path compatibility on zipfile.Path and correct performance degradation as found in zipp 3.0 * πŸ“œπŸ€– Added by blurb_it. * Update docs for new zipfile.Path.open * Rely on dict, faster than OrderedDict. * Syntax edits on docs Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> (cherry picked from commit 0aeab5c) Co-authored-by: Jason R. Coombs <[email protected]> * Clarify the change in behavior with a couple of workaround options. * Restore API compatibility while backporting performance improvements. Co-authored-by: Jason R. Coombs <[email protected]>
1 parent 59047fa commit 3e72de9

File tree

4 files changed

+27
-26
lines changed

4 files changed

+27
-26
lines changed

β€ŽDoc/library/zipfile.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,12 @@ Path objects are traversable using the ``/`` operator.
494494
Invoke :meth:`ZipFile.open` on the current path. Accepts
495495
the same arguments as :meth:`ZipFile.open`.
496496

497+
.. caution::
498+
499+
The signature on this function changes in an incompatible way
500+
in Python 3.9. For a future-compatible version, consider using
501+
the third-party zipp.Path package (3.0 or later).
502+
497503
.. method:: Path.iterdir()
498504

499505
Enumerate the children of the current directory.

β€ŽLib/test/test_zipfile.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import os
66
import pathlib
77
import posixpath
8+
import string
89
import struct
910
import subprocess
1011
import sys
@@ -2933,6 +2934,11 @@ def test_joinpath_constant_time(self):
29332934
# Check the file iterated all items
29342935
assert entries.count == self.HUGE_ZIPFILE_NUM_ENTRIES
29352936

2937+
# @func_timeout.func_set_timeout(3)
2938+
def test_implied_dirs_performance(self):
2939+
data = ['/'.join(string.ascii_lowercase + str(n)) for n in range(10000)]
2940+
zipfile.CompleteDirs._implied_dirs(data)
2941+
29362942

29372943
if __name__ == "__main__":
29382944
unittest.main()

β€ŽLib/zipfile.py

Lines changed: 14 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
import threading
1818
import time
1919
import contextlib
20-
from collections import OrderedDict
2120

2221
try:
2322
import zlib # We may need its compression method
@@ -2125,24 +2124,6 @@ def _compile(file, optimize=-1):
21252124
return (fname, archivename)
21262125

21272126

2128-
def _unique_everseen(iterable, key=None):
2129-
"List unique elements, preserving order. Remember all elements ever seen."
2130-
# unique_everseen('AAAABBBCCDAABBB') --> A B C D
2131-
# unique_everseen('ABBCcAD', str.lower) --> A B C D
2132-
seen = set()
2133-
seen_add = seen.add
2134-
if key is None:
2135-
for element in itertools.filterfalse(seen.__contains__, iterable):
2136-
seen_add(element)
2137-
yield element
2138-
else:
2139-
for element in iterable:
2140-
k = key(element)
2141-
if k not in seen:
2142-
seen_add(k)
2143-
yield element
2144-
2145-
21462127
def _parents(path):
21472128
"""
21482129
Given a path with elements separated by
@@ -2184,6 +2165,18 @@ def _ancestry(path):
21842165
path, tail = posixpath.split(path)
21852166

21862167

2168+
_dedupe = dict.fromkeys
2169+
"""Deduplicate an iterable in original order"""
2170+
2171+
2172+
def _difference(minuend, subtrahend):
2173+
"""
2174+
Return items in minuend not in subtrahend, retaining order
2175+
with O(1) lookup.
2176+
"""
2177+
return itertools.filterfalse(set(subtrahend).__contains__, minuend)
2178+
2179+
21872180
class CompleteDirs(ZipFile):
21882181
"""
21892182
A ZipFile subclass that ensures that implied directories
@@ -2193,13 +2186,8 @@ class CompleteDirs(ZipFile):
21932186
@staticmethod
21942187
def _implied_dirs(names):
21952188
parents = itertools.chain.from_iterable(map(_parents, names))
2196-
# Deduplicate entries in original order
2197-
implied_dirs = OrderedDict.fromkeys(
2198-
p + posixpath.sep for p in parents
2199-
# Cast names to a set for O(1) lookups
2200-
if p + posixpath.sep not in set(names)
2201-
)
2202-
return implied_dirs
2189+
as_dirs = (p + posixpath.sep for p in parents)
2190+
return _dedupe(_difference(as_dirs, names))
22032191

22042192
def namelist(self):
22052193
names = super(CompleteDirs, self).namelist()
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Correct performance degradation in ``zipfile.Path`` as found in zipp 3.0. While retaining compatibility, this change discourages the use of ``zipfile.Path.open`` due to the signature change in Python 3.9. For compatibility across Python 3.8 and later versions, consider using ``zipp.Path`` on Python 3.8.x and earlier.

0 commit comments

Comments
Β (0)