Skip to content

Commit 310f6aa

Browse files
gvanrossumbrandtbuchercool-RR
authored
bpo-40636: PEP 618: add strict parameter to zip() (GH-20921)
zip() now supports PEP 618's strict parameter, which raises a ValueError if the arguments are exhausted at different lengths. Patch by Brandt Bucher. Co-authored-by: Brandt Bucher <[email protected]> Co-authored-by: Ram Rachum <[email protected]>
1 parent 37bb289 commit 310f6aa

File tree

3 files changed

+238
-8
lines changed

3 files changed

+238
-8
lines changed

Lib/test/test_builtin.py

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1521,6 +1521,14 @@ def test_vars(self):
15211521
self.assertRaises(TypeError, vars, 42)
15221522
self.assertEqual(vars(self.C_get_vars()), {'a':2})
15231523

1524+
def iter_error(self, iterable, error):
1525+
"""Collect `iterable` into a list, catching an expected `error`."""
1526+
items = []
1527+
with self.assertRaises(error):
1528+
for item in iterable:
1529+
items.append(item)
1530+
return items
1531+
15241532
def test_zip(self):
15251533
a = (1, 2, 3)
15261534
b = (4, 5, 6)
@@ -1573,6 +1581,66 @@ def test_zip_pickle(self):
15731581
z1 = zip(a, b)
15741582
self.check_iter_pickle(z1, t, proto)
15751583

1584+
def test_zip_pickle_strict(self):
1585+
a = (1, 2, 3)
1586+
b = (4, 5, 6)
1587+
t = [(1, 4), (2, 5), (3, 6)]
1588+
for proto in range(pickle.HIGHEST_PROTOCOL + 1):
1589+
z1 = zip(a, b, strict=True)
1590+
self.check_iter_pickle(z1, t, proto)
1591+
1592+
def test_zip_pickle_strict_fail(self):
1593+
a = (1, 2, 3)
1594+
b = (4, 5, 6, 7)
1595+
t = [(1, 4), (2, 5), (3, 6)]
1596+
for proto in range(pickle.HIGHEST_PROTOCOL + 1):
1597+
z1 = zip(a, b, strict=True)
1598+
z2 = pickle.loads(pickle.dumps(z1, proto))
1599+
self.assertEqual(self.iter_error(z1, ValueError), t)
1600+
self.assertEqual(self.iter_error(z2, ValueError), t)
1601+
1602+
def test_zip_pickle_stability(self):
1603+
# Pickles of zip((1, 2, 3), (4, 5, 6)) dumped from 3.9:
1604+
pickles = [
1605+
b'citertools\nizip\np0\n(c__builtin__\niter\np1\n((I1\nI2\nI3\ntp2\ntp3\nRp4\nI0\nbg1\n((I4\nI5\nI6\ntp5\ntp6\nRp7\nI0\nbtp8\nRp9\n.',
1606+
b'citertools\nizip\nq\x00(c__builtin__\niter\nq\x01((K\x01K\x02K\x03tq\x02tq\x03Rq\x04K\x00bh\x01((K\x04K\x05K\x06tq\x05tq\x06Rq\x07K\x00btq\x08Rq\t.',
1607+
b'\x80\x02citertools\nizip\nq\x00c__builtin__\niter\nq\x01K\x01K\x02K\x03\x87q\x02\x85q\x03Rq\x04K\x00bh\x01K\x04K\x05K\x06\x87q\x05\x85q\x06Rq\x07K\x00b\x86q\x08Rq\t.',
1608+
b'\x80\x03cbuiltins\nzip\nq\x00cbuiltins\niter\nq\x01K\x01K\x02K\x03\x87q\x02\x85q\x03Rq\x04K\x00bh\x01K\x04K\x05K\x06\x87q\x05\x85q\x06Rq\x07K\x00b\x86q\x08Rq\t.',
1609+
b'\x80\x04\x95L\x00\x00\x00\x00\x00\x00\x00\x8c\x08builtins\x94\x8c\x03zip\x94\x93\x94\x8c\x08builtins\x94\x8c\x04iter\x94\x93\x94K\x01K\x02K\x03\x87\x94\x85\x94R\x94K\x00bh\x05K\x04K\x05K\x06\x87\x94\x85\x94R\x94K\x00b\x86\x94R\x94.',
1610+
b'\x80\x05\x95L\x00\x00\x00\x00\x00\x00\x00\x8c\x08builtins\x94\x8c\x03zip\x94\x93\x94\x8c\x08builtins\x94\x8c\x04iter\x94\x93\x94K\x01K\x02K\x03\x87\x94\x85\x94R\x94K\x00bh\x05K\x04K\x05K\x06\x87\x94\x85\x94R\x94K\x00b\x86\x94R\x94.',
1611+
]
1612+
for protocol, dump in enumerate(pickles):
1613+
z1 = zip((1, 2, 3), (4, 5, 6))
1614+
z2 = zip((1, 2, 3), (4, 5, 6), strict=False)
1615+
z3 = pickle.loads(dump)
1616+
l3 = list(z3)
1617+
self.assertEqual(type(z3), zip)
1618+
self.assertEqual(pickle.dumps(z1, protocol), dump)
1619+
self.assertEqual(pickle.dumps(z2, protocol), dump)
1620+
self.assertEqual(list(z1), l3)
1621+
self.assertEqual(list(z2), l3)
1622+
1623+
def test_zip_pickle_strict_stability(self):
1624+
# Pickles of zip((1, 2, 3), (4, 5), strict=True) dumped from 3.10:
1625+
pickles = [
1626+
b'citertools\nizip\np0\n(c__builtin__\niter\np1\n((I1\nI2\nI3\ntp2\ntp3\nRp4\nI0\nbg1\n((I4\nI5\ntp5\ntp6\nRp7\nI0\nbtp8\nRp9\nI01\nb.',
1627+
b'citertools\nizip\nq\x00(c__builtin__\niter\nq\x01((K\x01K\x02K\x03tq\x02tq\x03Rq\x04K\x00bh\x01((K\x04K\x05tq\x05tq\x06Rq\x07K\x00btq\x08Rq\tI01\nb.',
1628+
b'\x80\x02citertools\nizip\nq\x00c__builtin__\niter\nq\x01K\x01K\x02K\x03\x87q\x02\x85q\x03Rq\x04K\x00bh\x01K\x04K\x05\x86q\x05\x85q\x06Rq\x07K\x00b\x86q\x08Rq\t\x88b.',
1629+
b'\x80\x03cbuiltins\nzip\nq\x00cbuiltins\niter\nq\x01K\x01K\x02K\x03\x87q\x02\x85q\x03Rq\x04K\x00bh\x01K\x04K\x05\x86q\x05\x85q\x06Rq\x07K\x00b\x86q\x08Rq\t\x88b.',
1630+
b'\x80\x04\x95L\x00\x00\x00\x00\x00\x00\x00\x8c\x08builtins\x94\x8c\x03zip\x94\x93\x94\x8c\x08builtins\x94\x8c\x04iter\x94\x93\x94K\x01K\x02K\x03\x87\x94\x85\x94R\x94K\x00bh\x05K\x04K\x05\x86\x94\x85\x94R\x94K\x00b\x86\x94R\x94\x88b.',
1631+
b'\x80\x05\x95L\x00\x00\x00\x00\x00\x00\x00\x8c\x08builtins\x94\x8c\x03zip\x94\x93\x94\x8c\x08builtins\x94\x8c\x04iter\x94\x93\x94K\x01K\x02K\x03\x87\x94\x85\x94R\x94K\x00bh\x05K\x04K\x05\x86\x94\x85\x94R\x94K\x00b\x86\x94R\x94\x88b.',
1632+
]
1633+
a = (1, 2, 3)
1634+
b = (4, 5)
1635+
t = [(1, 4), (2, 5)]
1636+
for protocol, dump in enumerate(pickles):
1637+
z1 = zip(a, b, strict=True)
1638+
z2 = pickle.loads(dump)
1639+
self.assertEqual(pickle.dumps(z1, protocol), dump)
1640+
self.assertEqual(type(z2), zip)
1641+
self.assertEqual(self.iter_error(z1, ValueError), t)
1642+
self.assertEqual(self.iter_error(z2, ValueError), t)
1643+
15761644
def test_zip_bad_iterable(self):
15771645
exception = TypeError()
15781646

@@ -1585,6 +1653,88 @@ def __iter__(self):
15851653

15861654
self.assertIs(cm.exception, exception)
15871655

1656+
def test_zip_strict(self):
1657+
self.assertEqual(tuple(zip((1, 2, 3), 'abc', strict=True)),
1658+
((1, 'a'), (2, 'b'), (3, 'c')))
1659+
self.assertRaises(ValueError, tuple,
1660+
zip((1, 2, 3, 4), 'abc', strict=True))
1661+
self.assertRaises(ValueError, tuple,
1662+
zip((1, 2), 'abc', strict=True))
1663+
self.assertRaises(ValueError, tuple,
1664+
zip((1, 2), (1, 2), 'abc', strict=True))
1665+
1666+
def test_zip_strict_iterators(self):
1667+
x = iter(range(5))
1668+
y = [0]
1669+
z = iter(range(5))
1670+
self.assertRaises(ValueError, list,
1671+
(zip(x, y, z, strict=True)))
1672+
self.assertEqual(next(x), 2)
1673+
self.assertEqual(next(z), 1)
1674+
1675+
def test_zip_strict_error_handling(self):
1676+
1677+
class Error(Exception):
1678+
pass
1679+
1680+
class Iter:
1681+
def __init__(self, size):
1682+
self.size = size
1683+
def __iter__(self):
1684+
return self
1685+
def __next__(self):
1686+
self.size -= 1
1687+
if self.size < 0:
1688+
raise Error
1689+
return self.size
1690+
1691+
l1 = self.iter_error(zip("AB", Iter(1), strict=True), Error)
1692+
self.assertEqual(l1, [("A", 0)])
1693+
l2 = self.iter_error(zip("AB", Iter(2), "A", strict=True), ValueError)
1694+
self.assertEqual(l2, [("A", 1, "A")])
1695+
l3 = self.iter_error(zip("AB", Iter(2), "ABC", strict=True), Error)
1696+
self.assertEqual(l3, [("A", 1, "A"), ("B", 0, "B")])
1697+
l4 = self.iter_error(zip("AB", Iter(3), strict=True), ValueError)
1698+
self.assertEqual(l4, [("A", 2), ("B", 1)])
1699+
l5 = self.iter_error(zip(Iter(1), "AB", strict=True), Error)
1700+
self.assertEqual(l5, [(0, "A")])
1701+
l6 = self.iter_error(zip(Iter(2), "A", strict=True), ValueError)
1702+
self.assertEqual(l6, [(1, "A")])
1703+
l7 = self.iter_error(zip(Iter(2), "ABC", strict=True), Error)
1704+
self.assertEqual(l7, [(1, "A"), (0, "B")])
1705+
l8 = self.iter_error(zip(Iter(3), "AB", strict=True), ValueError)
1706+
self.assertEqual(l8, [(2, "A"), (1, "B")])
1707+
1708+
def test_zip_strict_error_handling_stopiteration(self):
1709+
1710+
class Iter:
1711+
def __init__(self, size):
1712+
self.size = size
1713+
def __iter__(self):
1714+
return self
1715+
def __next__(self):
1716+
self.size -= 1
1717+
if self.size < 0:
1718+
raise StopIteration
1719+
return self.size
1720+
1721+
l1 = self.iter_error(zip("AB", Iter(1), strict=True), ValueError)
1722+
self.assertEqual(l1, [("A", 0)])
1723+
l2 = self.iter_error(zip("AB", Iter(2), "A", strict=True), ValueError)
1724+
self.assertEqual(l2, [("A", 1, "A")])
1725+
l3 = self.iter_error(zip("AB", Iter(2), "ABC", strict=True), ValueError)
1726+
self.assertEqual(l3, [("A", 1, "A"), ("B", 0, "B")])
1727+
l4 = self.iter_error(zip("AB", Iter(3), strict=True), ValueError)
1728+
self.assertEqual(l4, [("A", 2), ("B", 1)])
1729+
l5 = self.iter_error(zip(Iter(1), "AB", strict=True), ValueError)
1730+
self.assertEqual(l5, [(0, "A")])
1731+
l6 = self.iter_error(zip(Iter(2), "A", strict=True), ValueError)
1732+
self.assertEqual(l6, [(1, "A")])
1733+
l7 = self.iter_error(zip(Iter(2), "ABC", strict=True), ValueError)
1734+
self.assertEqual(l7, [(1, "A"), (0, "B")])
1735+
l8 = self.iter_error(zip(Iter(3), "AB", strict=True), ValueError)
1736+
self.assertEqual(l8, [(2, "A"), (1, "B")])
1737+
15881738
def test_format(self):
15891739
# Test the basic machinery of the format() builtin. Don't test
15901740
# the specifics of the various formatters
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
:func:`zip` now supports :pep:`618`'s ``strict`` parameter, which raises a
2+
:exc:`ValueError` if the arguments are exhausted at different lengths.
3+
Patch by Brandt Bucher.

Python/bltinmodule.c

Lines changed: 85 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2517,9 +2517,10 @@ builtin_issubclass_impl(PyObject *module, PyObject *cls,
25172517

25182518
typedef struct {
25192519
PyObject_HEAD
2520-
Py_ssize_t tuplesize;
2521-
PyObject *ittuple; /* tuple of iterators */
2520+
Py_ssize_t tuplesize;
2521+
PyObject *ittuple; /* tuple of iterators */
25222522
PyObject *result;
2523+
int strict;
25232524
} zipobject;
25242525

25252526
static PyObject *
@@ -2530,9 +2531,21 @@ zip_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
25302531
PyObject *ittuple; /* tuple of iterators */
25312532
PyObject *result;
25322533
Py_ssize_t tuplesize;
2534+
int strict = 0;
25332535

2534-
if (type == &PyZip_Type && !_PyArg_NoKeywords("zip", kwds))
2535-
return NULL;
2536+
if (kwds) {
2537+
PyObject *empty = PyTuple_New(0);
2538+
if (empty == NULL) {
2539+
return NULL;
2540+
}
2541+
static char *kwlist[] = {"strict", NULL};
2542+
int parsed = PyArg_ParseTupleAndKeywords(
2543+
empty, kwds, "|$p:zip", kwlist, &strict);
2544+
Py_DECREF(empty);
2545+
if (!parsed) {
2546+
return NULL;
2547+
}
2548+
}
25362549

25372550
/* args must be a tuple */
25382551
assert(PyTuple_Check(args));
@@ -2573,6 +2586,7 @@ zip_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
25732586
lz->ittuple = ittuple;
25742587
lz->tuplesize = tuplesize;
25752588
lz->result = result;
2589+
lz->strict = strict;
25762590

25772591
return (PyObject *)lz;
25782592
}
@@ -2613,6 +2627,9 @@ zip_next(zipobject *lz)
26132627
item = (*Py_TYPE(it)->tp_iternext)(it);
26142628
if (item == NULL) {
26152629
Py_DECREF(result);
2630+
if (lz->strict) {
2631+
goto check;
2632+
}
26162633
return NULL;
26172634
}
26182635
olditem = PyTuple_GET_ITEM(result, i);
@@ -2628,36 +2645,96 @@ zip_next(zipobject *lz)
26282645
item = (*Py_TYPE(it)->tp_iternext)(it);
26292646
if (item == NULL) {
26302647
Py_DECREF(result);
2648+
if (lz->strict) {
2649+
goto check;
2650+
}
26312651
return NULL;
26322652
}
26332653
PyTuple_SET_ITEM(result, i, item);
26342654
}
26352655
}
26362656
return result;
2657+
check:
2658+
if (PyErr_Occurred()) {
2659+
if (!PyErr_ExceptionMatches(PyExc_StopIteration)) {
2660+
// next() on argument i raised an exception (not StopIteration)
2661+
return NULL;
2662+
}
2663+
PyErr_Clear();
2664+
}
2665+
if (i) {
2666+
// ValueError: zip() argument 2 is shorter than argument 1
2667+
// ValueError: zip() argument 3 is shorter than arguments 1-2
2668+
const char* plural = i == 1 ? " " : "s 1-";
2669+
return PyErr_Format(PyExc_ValueError,
2670+
"zip() argument %d is shorter than argument%s%d",
2671+
i + 1, plural, i);
2672+
}
2673+
for (i = 1; i < tuplesize; i++) {
2674+
it = PyTuple_GET_ITEM(lz->ittuple, i);
2675+
item = (*Py_TYPE(it)->tp_iternext)(it);
2676+
if (item) {
2677+
Py_DECREF(item);
2678+
const char* plural = i == 1 ? " " : "s 1-";
2679+
return PyErr_Format(PyExc_ValueError,
2680+
"zip() argument %d is longer than argument%s%d",
2681+
i + 1, plural, i);
2682+
}
2683+
if (PyErr_Occurred()) {
2684+
if (!PyErr_ExceptionMatches(PyExc_StopIteration)) {
2685+
// next() on argument i raised an exception (not StopIteration)
2686+
return NULL;
2687+
}
2688+
PyErr_Clear();
2689+
}
2690+
// Argument i is exhausted. So far so good...
2691+
}
2692+
// All arguments are exhausted. Success!
2693+
return NULL;
26372694
}
26382695

26392696
static PyObject *
26402697
zip_reduce(zipobject *lz, PyObject *Py_UNUSED(ignored))
26412698
{
26422699
/* Just recreate the zip with the internal iterator tuple */
2643-
return Py_BuildValue("OO", Py_TYPE(lz), lz->ittuple);
2700+
if (lz->strict) {
2701+
return PyTuple_Pack(3, Py_TYPE(lz), lz->ittuple, Py_True);
2702+
}
2703+
return PyTuple_Pack(2, Py_TYPE(lz), lz->ittuple);
2704+
}
2705+
2706+
PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
2707+
2708+
static PyObject *
2709+
zip_setstate(zipobject *lz, PyObject *state)
2710+
{
2711+
int strict = PyObject_IsTrue(state);
2712+
if (strict < 0) {
2713+
return NULL;
2714+
}
2715+
lz->strict = strict;
2716+
Py_RETURN_NONE;
26442717
}
26452718

26462719
static PyMethodDef zip_methods[] = {
26472720
{"__reduce__", (PyCFunction)zip_reduce, METH_NOARGS, reduce_doc},
2648-
{NULL, NULL} /* sentinel */
2721+
{"__setstate__", (PyCFunction)zip_setstate, METH_O, setstate_doc},
2722+
{NULL} /* sentinel */
26492723
};
26502724

26512725
PyDoc_STRVAR(zip_doc,
2652-
"zip(*iterables) --> A zip object yielding tuples until an input is exhausted.\n\
2726+
"zip(*iterables, strict=False) --> Yield tuples until an input is exhausted.\n\
26532727
\n\
26542728
>>> list(zip('abcdefg', range(3), range(4)))\n\
26552729
[('a', 0, 0), ('b', 1, 1), ('c', 2, 2)]\n\
26562730
\n\
26572731
The zip object yields n-length tuples, where n is the number of iterables\n\
26582732
passed as positional arguments to zip(). The i-th element in every tuple\n\
26592733
comes from the i-th iterable argument to zip(). This continues until the\n\
2660-
shortest argument is exhausted.");
2734+
shortest argument is exhausted.\n\
2735+
\n\
2736+
If strict is true and one of the arguments is exhausted before the others,\n\
2737+
raise a ValueError.");
26612738

26622739
PyTypeObject PyZip_Type = {
26632740
PyVarObject_HEAD_INIT(&PyType_Type, 0)

0 commit comments

Comments
 (0)