Skip to content

[3.8] bpo-39667: Sync zipp 3.0 (GH-18540) #18701

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 15, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions Doc/library/zipfile.rst
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,12 @@ Path objects are traversable using the ``/`` operator.
Invoke :meth:`ZipFile.open` on the current path. Accepts
the same arguments as :meth:`ZipFile.open`.

.. caution::

The signature on this function changes in an incompatible way
in Python 3.9. For a future-compatible version, consider using
the third-party zipp.Path package (3.0 or later).

.. method:: Path.iterdir()

Enumerate the children of the current directory.
Expand Down
6 changes: 6 additions & 0 deletions Lib/test/test_zipfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import os
import pathlib
import posixpath
import string
import struct
import subprocess
import sys
Expand Down Expand Up @@ -2933,6 +2934,11 @@ def test_joinpath_constant_time(self):
# Check the file iterated all items
assert entries.count == self.HUGE_ZIPFILE_NUM_ENTRIES

# @func_timeout.func_set_timeout(3)
def test_implied_dirs_performance(self):
data = ['/'.join(string.ascii_lowercase + str(n)) for n in range(10000)]
zipfile.CompleteDirs._implied_dirs(data)


if __name__ == "__main__":
unittest.main()
40 changes: 14 additions & 26 deletions Lib/zipfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
import threading
import time
import contextlib
from collections import OrderedDict

try:
import zlib # We may need its compression method
Expand Down Expand Up @@ -2125,24 +2124,6 @@ def _compile(file, optimize=-1):
return (fname, archivename)


def _unique_everseen(iterable, key=None):
"List unique elements, preserving order. Remember all elements ever seen."
# unique_everseen('AAAABBBCCDAABBB') --> A B C D
# unique_everseen('ABBCcAD', str.lower) --> A B C D
seen = set()
seen_add = seen.add
if key is None:
for element in itertools.filterfalse(seen.__contains__, iterable):
seen_add(element)
yield element
else:
for element in iterable:
k = key(element)
if k not in seen:
seen_add(k)
yield element


def _parents(path):
"""
Given a path with elements separated by
Expand Down Expand Up @@ -2184,6 +2165,18 @@ def _ancestry(path):
path, tail = posixpath.split(path)


_dedupe = dict.fromkeys
"""Deduplicate an iterable in original order"""


def _difference(minuend, subtrahend):
"""
Return items in minuend not in subtrahend, retaining order
with O(1) lookup.
"""
return itertools.filterfalse(set(subtrahend).__contains__, minuend)


class CompleteDirs(ZipFile):
"""
A ZipFile subclass that ensures that implied directories
Expand All @@ -2193,13 +2186,8 @@ class CompleteDirs(ZipFile):
@staticmethod
def _implied_dirs(names):
parents = itertools.chain.from_iterable(map(_parents, names))
# Deduplicate entries in original order
implied_dirs = OrderedDict.fromkeys(
p + posixpath.sep for p in parents
# Cast names to a set for O(1) lookups
if p + posixpath.sep not in set(names)
)
return implied_dirs
as_dirs = (p + posixpath.sep for p in parents)
return _dedupe(_difference(as_dirs, names))

def namelist(self):
names = super(CompleteDirs, self).namelist()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Correct performance degradation in ``zipfile.Path`` as found in zipp 3.0. While retaining compatibility, this change discourages the use of ``zipfile.Path.open`` due to the signature change in Python 3.9. For compatibility across Python 3.8 and later versions, consider using ``zipp.Path`` on Python 3.8.x and earlier.