Skip to content

Commit 44130cd

Browse files
normanrzmpiannuccidstansby
authored
Add wrappers for zarr v3 (#524)
* import zarr-python#1839 * pep8 * remove try/catch * pep8 * update to latest zarr-python interfaces * flake * add zarr-python to ci * fix import * tests * fixes * skip zarr3 tests on older python versions * ruff * add zfpy and pcodec * remove zarr from dependencies * change prefix * fixes for ci * fix for tests * pr feedback * Sync with zarr 3 beta (#597) * Sync with zarr 3 beta * Update zarr version in ci * dont install zarr python 3 in workflows running 3.10 * Update numcodecs/tests/test_zarr3.py Co-authored-by: David Stansby <[email protected]> * moves zarr3 to private module, adds test for zarr-python2 installs * add typing_extensions as dep * tests * importorskip minversion * ci install * drop zarr 2 in ci * no zarr2 + make zarr3 a public module * pre-commit * fixes? * fix validate * fix pcodec test * fix pcodec test * codecov * codecov * fix error match * codecov * codecov * coverage * wip docs * docs and renames all codecs * docs * new zarr beta * no zfpy for macos-14 * xfail * rm dead code * Update .github/workflows/ci.yaml Co-authored-by: David Stansby <[email protected]> * debug rtd * debug ci * Filter warnings in zarr3 tests * Fix warning ignore * pr feedback --------- Co-authored-by: Matthew Iannucci <[email protected]> Co-authored-by: David Stansby <[email protected]>
1 parent d8a219f commit 44130cd

File tree

8 files changed

+764
-2
lines changed

8 files changed

+764
-2
lines changed

.github/workflows/ci.yaml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ jobs:
1313
fail-fast: false
1414
matrix:
1515
python-version: ["3.11", "3.12", "3.13"]
16-
# macos-12 is an intel runner, macos-14 is a arm64 runner
16+
# macos-13 is an intel runner, macos-14 is a arm64 runner
1717
platform: [ubuntu-latest, windows-latest, macos-13, macos-14]
1818

1919
steps:
@@ -70,6 +70,12 @@ jobs:
7070
conda activate env
7171
python -m pip install -v ".[pcodec]"
7272
73+
- name: Install zarr-python
74+
shell: "bash -l {0}"
75+
run: |
76+
conda activate env
77+
# TODO: remove --pre option when zarr v3 is out
78+
python -m pip install --pre zarr
7379
7480
# This is used to test with zfpy, which does not yet support numpy 2.0
7581
- name: Install older numpy and zfpy

.readthedocs.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ build:
77
os: ubuntu-20.04
88
tools:
99
python: "3.12"
10+
jobs:
11+
post_install:
12+
- python -m pip install --pre 'zarr'
1013

1114
sphinx:
1215
configuration: docs/conf.py
@@ -19,3 +22,4 @@ python:
1922
- docs
2023
- msgpack
2124
- zfpy
25+
- crc32c

docs/api.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,4 @@ API reference
1010
checksum32
1111
abc
1212
registry
13+
zarr3

docs/zarr3.rst

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
Zarr 3 codecs
2+
=============
3+
.. automodule:: numcodecs.zarr3
4+
5+
6+
Bytes-to-bytes codecs
7+
---------------------
8+
.. autoclass:: Blosc()
9+
10+
.. autoattribute:: codec_name
11+
12+
.. autoclass:: LZ4()
13+
14+
.. autoattribute:: codec_name
15+
16+
.. autoclass:: Zstd()
17+
18+
.. autoattribute:: codec_name
19+
20+
.. autoclass:: Zlib()
21+
22+
.. autoattribute:: codec_name
23+
24+
.. autoclass:: GZip()
25+
26+
.. autoattribute:: codec_name
27+
28+
.. autoclass:: BZ2()
29+
30+
.. autoattribute:: codec_name
31+
32+
.. autoclass:: LZMA()
33+
34+
.. autoattribute:: codec_name
35+
36+
.. autoclass:: Shuffle()
37+
38+
.. autoattribute:: codec_name
39+
40+
41+
Array-to-array codecs
42+
---------------------
43+
.. autoclass:: Delta()
44+
45+
.. autoattribute:: codec_name
46+
47+
.. autoclass:: BitRound()
48+
49+
.. autoattribute:: codec_name
50+
51+
.. autoclass:: FixedScaleOffset()
52+
53+
.. autoattribute:: codec_name
54+
55+
.. autoclass:: Quantize()
56+
57+
.. autoattribute:: codec_name
58+
59+
.. autoclass:: PackBits()
60+
61+
.. autoattribute:: codec_name
62+
63+
.. autoclass:: AsType()
64+
65+
.. autoattribute:: codec_name
66+
67+
68+
Bytes-to-bytes checksum codecs
69+
------------------------------
70+
.. autoclass:: CRC32()
71+
72+
.. autoattribute:: codec_name
73+
74+
.. autoclass:: CRC32C()
75+
76+
.. autoattribute:: codec_name
77+
78+
.. autoclass:: Adler32()
79+
80+
.. autoattribute:: codec_name
81+
82+
.. autoclass:: Fletcher32()
83+
84+
.. autoattribute:: codec_name
85+
86+
.. autoclass:: JenkinsLookup3()
87+
88+
.. autoattribute:: codec_name
89+
90+
91+
Array-to-bytes codecs
92+
---------------------
93+
.. autoclass:: PCodec()
94+
95+
.. autoattribute:: codec_name
96+
97+
.. autoclass:: ZFPY()
98+
99+
.. autoattribute:: codec_name

numcodecs/tests/test_zarr3.py

Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
from __future__ import annotations
2+
3+
import numpy as np
4+
import pytest
5+
6+
import numcodecs.zarr3
7+
8+
zarr = pytest.importorskip("zarr")
9+
10+
pytestmark = [
11+
pytest.mark.skipif(zarr.__version__ < "3.0.0", reason="zarr 3.0.0 or later is required"),
12+
pytest.mark.filterwarnings("ignore:Codec 'numcodecs.*' not configured in config.*:UserWarning"),
13+
pytest.mark.filterwarnings(
14+
"ignore:Numcodecs codecs are not in the Zarr version 3 specification and may not be supported by other zarr implementations."
15+
),
16+
]
17+
18+
get_codec_class = zarr.registry.get_codec_class
19+
Array = zarr.Array
20+
JSON = zarr.core.common.JSON
21+
BytesCodec = zarr.codecs.BytesCodec
22+
Store = zarr.abc.store.Store
23+
MemoryStore = zarr.storage.MemoryStore
24+
StorePath = zarr.storage.StorePath
25+
26+
27+
EXPECTED_WARNING_STR = "Numcodecs codecs are not in the Zarr version 3.*"
28+
29+
30+
@pytest.fixture
31+
def store() -> Store:
32+
return StorePath(MemoryStore(mode="w"))
33+
34+
35+
ALL_CODECS = [getattr(numcodecs.zarr3, cls_name) for cls_name in numcodecs.zarr3.__all__]
36+
37+
38+
@pytest.mark.parametrize("codec_class", ALL_CODECS)
39+
def test_entry_points(codec_class: type[numcodecs.zarr3._NumcodecsCodec]):
40+
codec_name = codec_class.codec_name
41+
assert get_codec_class(codec_name) == codec_class
42+
43+
44+
@pytest.mark.parametrize("codec_class", ALL_CODECS)
45+
def test_docstring(codec_class: type[numcodecs.zarr3._NumcodecsCodec]):
46+
assert "See :class:`numcodecs." in codec_class.__doc__
47+
48+
49+
@pytest.mark.parametrize(
50+
"codec_class",
51+
[
52+
numcodecs.zarr3.Blosc,
53+
numcodecs.zarr3.LZ4,
54+
numcodecs.zarr3.Zstd,
55+
numcodecs.zarr3.Zlib,
56+
numcodecs.zarr3.GZip,
57+
numcodecs.zarr3.BZ2,
58+
numcodecs.zarr3.LZMA,
59+
numcodecs.zarr3.Shuffle,
60+
],
61+
)
62+
def test_generic_codec_class(store: Store, codec_class: type[numcodecs.zarr3._NumcodecsCodec]):
63+
data = np.arange(0, 256, dtype="uint16").reshape((16, 16))
64+
65+
with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR):
66+
a = Array.create(
67+
store / "generic",
68+
shape=data.shape,
69+
chunk_shape=(16, 16),
70+
dtype=data.dtype,
71+
fill_value=0,
72+
codecs=[BytesCodec(), codec_class()],
73+
)
74+
75+
a[:, :] = data.copy()
76+
np.testing.assert_array_equal(data, a[:, :])
77+
78+
79+
@pytest.mark.parametrize(
80+
("codec_class", "codec_config"),
81+
[
82+
(numcodecs.zarr3.Delta, {"dtype": "float32"}),
83+
(numcodecs.zarr3.FixedScaleOffset, {"offset": 0, "scale": 25.5}),
84+
(numcodecs.zarr3.FixedScaleOffset, {"offset": 0, "scale": 51, "astype": "uint16"}),
85+
(numcodecs.zarr3.AsType, {"encode_dtype": "float32", "decode_dtype": "float64"}),
86+
],
87+
ids=[
88+
"delta",
89+
"fixedscaleoffset",
90+
"fixedscaleoffset2",
91+
"astype",
92+
],
93+
)
94+
def test_generic_filter(
95+
store: Store, codec_class: type[numcodecs.zarr3._NumcodecsCodec], codec_config: dict[str, JSON]
96+
):
97+
data = np.linspace(0, 10, 256, dtype="float32").reshape((16, 16))
98+
99+
with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR):
100+
a = Array.create(
101+
store / "generic",
102+
shape=data.shape,
103+
chunk_shape=(16, 16),
104+
dtype=data.dtype,
105+
fill_value=0,
106+
codecs=[
107+
codec_class(**codec_config),
108+
BytesCodec(),
109+
],
110+
)
111+
112+
a[:, :] = data.copy()
113+
a = Array.open(store / "generic")
114+
np.testing.assert_array_equal(data, a[:, :])
115+
116+
117+
def test_generic_filter_bitround(store: Store):
118+
data = np.linspace(0, 1, 256, dtype="float32").reshape((16, 16))
119+
120+
with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR):
121+
a = Array.create(
122+
store / "generic_bitround",
123+
shape=data.shape,
124+
chunk_shape=(16, 16),
125+
dtype=data.dtype,
126+
fill_value=0,
127+
codecs=[numcodecs.zarr3.BitRound(keepbits=3), BytesCodec()],
128+
)
129+
130+
a[:, :] = data.copy()
131+
a = Array.open(store / "generic_bitround")
132+
assert np.allclose(data, a[:, :], atol=0.1)
133+
134+
135+
def test_generic_filter_quantize(store: Store):
136+
data = np.linspace(0, 10, 256, dtype="float32").reshape((16, 16))
137+
138+
with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR):
139+
a = Array.create(
140+
store / "generic_quantize",
141+
shape=data.shape,
142+
chunk_shape=(16, 16),
143+
dtype=data.dtype,
144+
fill_value=0,
145+
codecs=[numcodecs.zarr3.Quantize(digits=3), BytesCodec()],
146+
)
147+
148+
a[:, :] = data.copy()
149+
a = Array.open(store / "generic_quantize")
150+
assert np.allclose(data, a[:, :], atol=0.001)
151+
152+
153+
def test_generic_filter_packbits(store: Store):
154+
data = np.zeros((16, 16), dtype="bool")
155+
data[0:4, :] = True
156+
157+
with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR):
158+
a = Array.create(
159+
store / "generic_packbits",
160+
shape=data.shape,
161+
chunk_shape=(16, 16),
162+
dtype=data.dtype,
163+
fill_value=0,
164+
codecs=[numcodecs.zarr3.PackBits(), BytesCodec()],
165+
)
166+
167+
a[:, :] = data.copy()
168+
a = Array.open(store / "generic_packbits")
169+
np.testing.assert_array_equal(data, a[:, :])
170+
171+
with pytest.raises(ValueError, match=".*requires bool dtype.*"):
172+
Array.create(
173+
store / "generic_packbits_err",
174+
shape=data.shape,
175+
chunk_shape=(16, 16),
176+
dtype="uint32",
177+
fill_value=0,
178+
codecs=[numcodecs.zarr3.PackBits(), BytesCodec()],
179+
)
180+
181+
182+
@pytest.mark.parametrize(
183+
"codec_class",
184+
[
185+
numcodecs.zarr3.CRC32,
186+
numcodecs.zarr3.CRC32C,
187+
numcodecs.zarr3.Adler32,
188+
numcodecs.zarr3.Fletcher32,
189+
numcodecs.zarr3.JenkinsLookup3,
190+
],
191+
)
192+
def test_generic_checksum(store: Store, codec_class: type[numcodecs.zarr3._NumcodecsCodec]):
193+
data = np.linspace(0, 10, 256, dtype="float32").reshape((16, 16))
194+
195+
with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR):
196+
a = Array.create(
197+
store / "generic_checksum",
198+
shape=data.shape,
199+
chunk_shape=(16, 16),
200+
dtype=data.dtype,
201+
fill_value=0,
202+
codecs=[BytesCodec(), codec_class()],
203+
)
204+
205+
a[:, :] = data.copy()
206+
a = Array.open(store / "generic_checksum")
207+
np.testing.assert_array_equal(data, a[:, :])
208+
209+
210+
@pytest.mark.parametrize("codec_class", [numcodecs.zarr3.PCodec, numcodecs.zarr3.ZFPY])
211+
def test_generic_bytes_codec(store: Store, codec_class: type[numcodecs.zarr3._NumcodecsCodec]):
212+
try:
213+
codec_class()._codec # noqa: B018
214+
except ValueError as e:
215+
if "codec not available" in str(e):
216+
pytest.xfail(f"{codec_class.codec_name} is not available: {e}")
217+
else:
218+
raise # pragma: no cover
219+
except ImportError as e:
220+
pytest.xfail(f"{codec_class.codec_name} is not available: {e}")
221+
222+
data = np.arange(0, 256, dtype="float32").reshape((16, 16))
223+
224+
with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR):
225+
a = Array.create(
226+
store / "generic",
227+
shape=data.shape,
228+
chunk_shape=(16, 16),
229+
dtype=data.dtype,
230+
fill_value=0,
231+
codecs=[
232+
codec_class(),
233+
],
234+
)
235+
236+
a[:, :] = data.copy()
237+
np.testing.assert_array_equal(data, a[:, :])

numcodecs/tests/test_zarr3_import.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
from __future__ import annotations
2+
3+
import pytest
4+
5+
6+
def test_zarr3_import():
7+
ERROR_MESSAGE_MATCH = "zarr 3.0.0 or later.*"
8+
9+
try:
10+
import zarr # noqa: F401
11+
except ImportError: # pragma: no cover
12+
with pytest.raises(ImportError, match=ERROR_MESSAGE_MATCH):
13+
import numcodecs.zarr3 # noqa: F401

0 commit comments

Comments
 (0)