Skip to content

Commit 3fa1522

Browse files
jaracomiss-islington
authored andcommitted
bpo-39667: Sync zipp 3.0 (GH-18540)
* bpo-39667: Improve pathlib.Path compatibility on zipfile.Path and correct performance degradation as found in zipp 3.0 * πŸ“œπŸ€– Added by blurb_it. * Update docs for new zipfile.Path.open * Rely on dict, faster than OrderedDict. * Syntax edits on docs Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> (cherry picked from commit 0aeab5c) Co-authored-by: Jason R. Coombs <[email protected]>
1 parent 45c4112 commit 3fa1522

File tree

4 files changed

+53
-37
lines changed

4 files changed

+53
-37
lines changed

β€ŽDoc/library/zipfile.rst

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -489,10 +489,20 @@ Path objects are traversable using the ``/`` operator.
489489

490490
The final path component.
491491

492-
.. method:: Path.open(*, **)
493-
494-
Invoke :meth:`ZipFile.open` on the current path. Accepts
495-
the same arguments as :meth:`ZipFile.open`.
492+
.. method:: Path.open(mode='r', *, pwd, **)
493+
494+
Invoke :meth:`ZipFile.open` on the current path.
495+
Allows opening for read or write, text or binary
496+
through supported modes: 'r', 'w', 'rb', 'wb'.
497+
Positional and keyword arguments are passed through to
498+
:class:`io.TextIOWrapper` when opened as text and
499+
ignored otherwise.
500+
``pwd`` is the ``pwd`` parameter to
501+
:meth:`ZipFile.open`.
502+
503+
.. versionchanged:: 3.9
504+
Added support for text and binary modes for open. Default
505+
mode is now text.
496506

497507
.. method:: Path.iterdir()
498508

β€ŽLib/test/test_zipfile.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import os
66
import pathlib
77
import posixpath
8+
import string
89
import struct
910
import subprocess
1011
import sys
@@ -2839,7 +2840,7 @@ def test_open(self):
28392840
a, b, g = root.iterdir()
28402841
with a.open() as strm:
28412842
data = strm.read()
2842-
assert data == b"content of a"
2843+
assert data == "content of a"
28432844

28442845
def test_read(self):
28452846
for alpharep in self.zipfile_alpharep():
@@ -2933,6 +2934,11 @@ def test_joinpath_constant_time(self):
29332934
# Check the file iterated all items
29342935
assert entries.count == self.HUGE_ZIPFILE_NUM_ENTRIES
29352936

2937+
# @func_timeout.func_set_timeout(3)
2938+
def test_implied_dirs_performance(self):
2939+
data = ['/'.join(string.ascii_lowercase + str(n)) for n in range(10000)]
2940+
zipfile.CompleteDirs._implied_dirs(data)
2941+
29362942

29372943
if __name__ == "__main__":
29382944
unittest.main()

β€ŽLib/zipfile.py

Lines changed: 31 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
import threading
1818
import time
1919
import contextlib
20-
from collections import OrderedDict
2120

2221
try:
2322
import zlib # We may need its compression method
@@ -2125,24 +2124,6 @@ def _compile(file, optimize=-1):
21252124
return (fname, archivename)
21262125

21272126

2128-
def _unique_everseen(iterable, key=None):
2129-
"List unique elements, preserving order. Remember all elements ever seen."
2130-
# unique_everseen('AAAABBBCCDAABBB') --> A B C D
2131-
# unique_everseen('ABBCcAD', str.lower) --> A B C D
2132-
seen = set()
2133-
seen_add = seen.add
2134-
if key is None:
2135-
for element in itertools.filterfalse(seen.__contains__, iterable):
2136-
seen_add(element)
2137-
yield element
2138-
else:
2139-
for element in iterable:
2140-
k = key(element)
2141-
if k not in seen:
2142-
seen_add(k)
2143-
yield element
2144-
2145-
21462127
def _parents(path):
21472128
"""
21482129
Given a path with elements separated by
@@ -2184,6 +2165,18 @@ def _ancestry(path):
21842165
path, tail = posixpath.split(path)
21852166

21862167

2168+
_dedupe = dict.fromkeys
2169+
"""Deduplicate an iterable in original order"""
2170+
2171+
2172+
def _difference(minuend, subtrahend):
2173+
"""
2174+
Return items in minuend not in subtrahend, retaining order
2175+
with O(1) lookup.
2176+
"""
2177+
return itertools.filterfalse(set(subtrahend).__contains__, minuend)
2178+
2179+
21872180
class CompleteDirs(ZipFile):
21882181
"""
21892182
A ZipFile subclass that ensures that implied directories
@@ -2193,13 +2186,8 @@ class CompleteDirs(ZipFile):
21932186
@staticmethod
21942187
def _implied_dirs(names):
21952188
parents = itertools.chain.from_iterable(map(_parents, names))
2196-
# Deduplicate entries in original order
2197-
implied_dirs = OrderedDict.fromkeys(
2198-
p + posixpath.sep for p in parents
2199-
# Cast names to a set for O(1) lookups
2200-
if p + posixpath.sep not in set(names)
2201-
)
2202-
return implied_dirs
2189+
as_dirs = (p + posixpath.sep for p in parents)
2190+
return _dedupe(_difference(as_dirs, names))
22032191

22042192
def namelist(self):
22052193
names = super(CompleteDirs, self).namelist()
@@ -2328,20 +2316,31 @@ def __init__(self, root, at=""):
23282316
self.root = FastLookup.make(root)
23292317
self.at = at
23302318

2331-
@property
2332-
def open(self):
2333-
return functools.partial(self.root.open, self.at)
2319+
def open(self, mode='r', *args, **kwargs):
2320+
"""
2321+
Open this entry as text or binary following the semantics
2322+
of ``pathlib.Path.open()`` by passing arguments through
2323+
to io.TextIOWrapper().
2324+
"""
2325+
pwd = kwargs.pop('pwd', None)
2326+
zip_mode = mode[0]
2327+
stream = self.root.open(self.at, zip_mode, pwd=pwd)
2328+
if 'b' in mode:
2329+
if args or kwargs:
2330+
raise ValueError("encoding args invalid for binary operation")
2331+
return stream
2332+
return io.TextIOWrapper(stream, *args, **kwargs)
23342333

23352334
@property
23362335
def name(self):
23372336
return posixpath.basename(self.at.rstrip("/"))
23382337

23392338
def read_text(self, *args, **kwargs):
2340-
with self.open() as strm:
2341-
return io.TextIOWrapper(strm, *args, **kwargs).read()
2339+
with self.open('r', *args, **kwargs) as strm:
2340+
return strm.read()
23422341

23432342
def read_bytes(self):
2344-
with self.open() as strm:
2343+
with self.open('rb') as strm:
23452344
return strm.read()
23462345

23472346
def _is_child(self, path):
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Improve pathlib.Path compatibility on zipfile.Path and correct performance degradation as found in zipp 3.0.

0 commit comments

Comments
Β (0)