Skip to content

Commit 0aeab5c

Browse files
bpo-39667: Sync zipp 3.0 (GH-18540)
* bpo-39667: Improve pathlib.Path compatibility on zipfile.Path and correct performance degradation as found in zipp 3.0 * πŸ“œπŸ€– Added by blurb_it. * Update docs for new zipfile.Path.open * Rely on dict, faster than OrderedDict. * Syntax edits on docs Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com>
1 parent 1f0cd3c commit 0aeab5c

File tree

4 files changed

+53
-37
lines changed

4 files changed

+53
-37
lines changed

β€ŽDoc/library/zipfile.rst

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -489,10 +489,20 @@ Path objects are traversable using the ``/`` operator.
489489

490490
The final path component.
491491

492-
.. method:: Path.open(*, **)
493-
494-
Invoke :meth:`ZipFile.open` on the current path. Accepts
495-
the same arguments as :meth:`ZipFile.open`.
492+
.. method:: Path.open(mode='r', *, pwd, **)
493+
494+
Invoke :meth:`ZipFile.open` on the current path.
495+
Allows opening for read or write, text or binary
496+
through supported modes: 'r', 'w', 'rb', 'wb'.
497+
Positional and keyword arguments are passed through to
498+
:class:`io.TextIOWrapper` when opened as text and
499+
ignored otherwise.
500+
``pwd`` is the ``pwd`` parameter to
501+
:meth:`ZipFile.open`.
502+
503+
.. versionchanged:: 3.9
504+
Added support for text and binary modes for open. Default
505+
mode is now text.
496506

497507
.. method:: Path.iterdir()
498508

β€ŽLib/test/test_zipfile.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import os
66
import pathlib
77
import posixpath
8+
import string
89
import struct
910
import subprocess
1011
import sys
@@ -2880,7 +2881,7 @@ def test_open(self):
28802881
a, b, g = root.iterdir()
28812882
with a.open() as strm:
28822883
data = strm.read()
2883-
assert data == b"content of a"
2884+
assert data == "content of a"
28842885

28852886
def test_read(self):
28862887
for alpharep in self.zipfile_alpharep():
@@ -2974,6 +2975,11 @@ def test_joinpath_constant_time(self):
29742975
# Check the file iterated all items
29752976
assert entries.count == self.HUGE_ZIPFILE_NUM_ENTRIES
29762977

2978+
# @func_timeout.func_set_timeout(3)
2979+
def test_implied_dirs_performance(self):
2980+
data = ['/'.join(string.ascii_lowercase + str(n)) for n in range(10000)]
2981+
zipfile.CompleteDirs._implied_dirs(data)
2982+
29772983

29782984
if __name__ == "__main__":
29792985
unittest.main()

β€ŽLib/zipfile.py

Lines changed: 31 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
import threading
1818
import time
1919
import contextlib
20-
from collections import OrderedDict
2120

2221
try:
2322
import zlib # We may need its compression method
@@ -2102,24 +2101,6 @@ def _compile(file, optimize=-1):
21022101
return (fname, archivename)
21032102

21042103

2105-
def _unique_everseen(iterable, key=None):
2106-
"List unique elements, preserving order. Remember all elements ever seen."
2107-
# unique_everseen('AAAABBBCCDAABBB') --> A B C D
2108-
# unique_everseen('ABBCcAD', str.lower) --> A B C D
2109-
seen = set()
2110-
seen_add = seen.add
2111-
if key is None:
2112-
for element in itertools.filterfalse(seen.__contains__, iterable):
2113-
seen_add(element)
2114-
yield element
2115-
else:
2116-
for element in iterable:
2117-
k = key(element)
2118-
if k not in seen:
2119-
seen_add(k)
2120-
yield element
2121-
2122-
21232104
def _parents(path):
21242105
"""
21252106
Given a path with elements separated by
@@ -2161,6 +2142,18 @@ def _ancestry(path):
21612142
path, tail = posixpath.split(path)
21622143

21632144

2145+
_dedupe = dict.fromkeys
2146+
"""Deduplicate an iterable in original order"""
2147+
2148+
2149+
def _difference(minuend, subtrahend):
2150+
"""
2151+
Return items in minuend not in subtrahend, retaining order
2152+
with O(1) lookup.
2153+
"""
2154+
return itertools.filterfalse(set(subtrahend).__contains__, minuend)
2155+
2156+
21642157
class CompleteDirs(ZipFile):
21652158
"""
21662159
A ZipFile subclass that ensures that implied directories
@@ -2170,13 +2163,8 @@ class CompleteDirs(ZipFile):
21702163
@staticmethod
21712164
def _implied_dirs(names):
21722165
parents = itertools.chain.from_iterable(map(_parents, names))
2173-
# Deduplicate entries in original order
2174-
implied_dirs = OrderedDict.fromkeys(
2175-
p + posixpath.sep for p in parents
2176-
# Cast names to a set for O(1) lookups
2177-
if p + posixpath.sep not in set(names)
2178-
)
2179-
return implied_dirs
2166+
as_dirs = (p + posixpath.sep for p in parents)
2167+
return _dedupe(_difference(as_dirs, names))
21802168

21812169
def namelist(self):
21822170
names = super(CompleteDirs, self).namelist()
@@ -2305,20 +2293,31 @@ def __init__(self, root, at=""):
23052293
self.root = FastLookup.make(root)
23062294
self.at = at
23072295

2308-
@property
2309-
def open(self):
2310-
return functools.partial(self.root.open, self.at)
2296+
def open(self, mode='r', *args, **kwargs):
2297+
"""
2298+
Open this entry as text or binary following the semantics
2299+
of ``pathlib.Path.open()`` by passing arguments through
2300+
to io.TextIOWrapper().
2301+
"""
2302+
pwd = kwargs.pop('pwd', None)
2303+
zip_mode = mode[0]
2304+
stream = self.root.open(self.at, zip_mode, pwd=pwd)
2305+
if 'b' in mode:
2306+
if args or kwargs:
2307+
raise ValueError("encoding args invalid for binary operation")
2308+
return stream
2309+
return io.TextIOWrapper(stream, *args, **kwargs)
23112310

23122311
@property
23132312
def name(self):
23142313
return posixpath.basename(self.at.rstrip("/"))
23152314

23162315
def read_text(self, *args, **kwargs):
2317-
with self.open() as strm:
2318-
return io.TextIOWrapper(strm, *args, **kwargs).read()
2316+
with self.open('r', *args, **kwargs) as strm:
2317+
return strm.read()
23192318

23202319
def read_bytes(self):
2321-
with self.open() as strm:
2320+
with self.open('rb') as strm:
23222321
return strm.read()
23232322

23242323
def _is_child(self, path):
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Improve pathlib.Path compatibility on zipfile.Path and correct performance degradation as found in zipp 3.0.

0 commit comments

Comments
Β (0)