1
1
from __future__ import annotations
2
2
3
- from dataclasses import dataclass
3
+ from dataclasses import dataclass , replace
4
4
from functools import cached_property
5
5
import math
6
6
from typing_extensions import Self
9
9
import numpy as np
10
10
import numcodecs
11
11
12
- from zarr .abc .codec import ArrayArrayCodec , BytesBytesCodec , Codec
12
+ from zarr .abc .codec import ArrayArrayCodec , BytesBytesCodec
13
+ from zarr .buffer import NDBuffer , Buffer , as_numpy_array_wrapper
13
14
from zarr .common import (
14
15
JSON ,
15
16
ArraySpec ,
16
- BytesLike ,
17
17
parse_named_configuration ,
18
18
product ,
19
19
to_thread ,
20
20
)
21
- from zarr .config import RuntimeConfiguration
22
21
from zarr .metadata import ArrayMetadata
23
22
24
23
@@ -33,12 +32,12 @@ def parse_codec_configuration(
33
32
raise ValueError (
34
33
f"Expected name to start with '{ expected_name_prefix } '. Got { parsed_name } instead."
35
34
)
36
- id = parsed_name [len (expected_name_prefix ):]
35
+ id = parsed_name [len (expected_name_prefix ) :]
37
36
return {"id" : id , ** parsed_configuration }
38
37
39
38
40
39
@dataclass (frozen = True )
41
- class NumcodecsCodec ( Codec ) :
40
+ class NumcodecsCodec :
42
41
codec_config : dict [str , JSON ]
43
42
44
43
def __init__ (
@@ -65,6 +64,7 @@ def __init__(
65
64
66
65
@cached_property
67
66
def _codec (self ) -> numcodecs .abc .Codec :
67
+ print (self .codec_config )
68
68
return numcodecs .get_codec (self .codec_config )
69
69
70
70
@classmethod
@@ -91,49 +91,40 @@ class NumcodecsBytesBytesCodec(NumcodecsCodec, BytesBytesCodec):
91
91
def __init__ (self , * , codec_id : str , codec_config : dict [str , JSON ]) -> None :
92
92
super ().__init__ (codec_id = codec_id , codec_config = codec_config )
93
93
94
- async def decode (
95
- self ,
96
- chunk_bytes : BytesLike ,
97
- _chunk_spec : ArraySpec ,
98
- _runtime_configuration : RuntimeConfiguration ,
99
- ) -> BytesLike :
100
- return await to_thread (self ._codec .decode , chunk_bytes )
94
+ async def _decode_single (
95
+ self , chunk_bytes : Buffer , _chunk_spec : ArraySpec
96
+ ) -> Buffer :
97
+ return await to_thread (as_numpy_array_wrapper , self ._codec .decode , chunk_bytes )
101
98
102
- def _encode (self , chunk_bytes : BytesLike ) -> BytesLike :
103
- encoded = self ._codec .encode (chunk_bytes )
99
+ def _encode (self , chunk_bytes : Buffer ) -> Buffer :
100
+ encoded = self ._codec .encode (chunk_bytes . as_array_like () )
104
101
if isinstance (encoded , np .ndarray ): # Required for checksum codecs
105
102
return encoded .tobytes ()
106
- return encoded
107
-
108
- async def encode (
109
- self ,
110
- chunk_bytes : BytesLike ,
111
- _chunk_spec : ArraySpec ,
112
- _runtime_configuration : RuntimeConfiguration ,
113
- ) -> BytesLike :
103
+ return Buffer .from_bytes (encoded )
104
+
105
+ async def _encode_single (
106
+ self , chunk_bytes : Buffer , _chunk_spec : ArraySpec
107
+ ) -> Buffer :
114
108
return await to_thread (self ._encode , chunk_bytes )
115
109
116
110
117
111
class NumcodecsArrayArrayCodec (NumcodecsCodec , ArrayArrayCodec ):
118
112
def __init__ (self , * , codec_id : str , codec_config : dict [str , JSON ]) -> None :
119
113
super ().__init__ (codec_id = codec_id , codec_config = codec_config )
120
114
121
- async def decode (
122
- self ,
123
- chunk_array : np .ndarray ,
124
- chunk_spec : ArraySpec ,
125
- _runtime_configuration : RuntimeConfiguration ,
126
- ) -> np .ndarray :
127
- out = await to_thread (self ._codec .decode , chunk_array )
128
- return out .reshape (chunk_spec .shape )
115
+ async def _decode_single (
116
+ self , chunk_array : NDBuffer , chunk_spec : ArraySpec
117
+ ) -> NDBuffer :
118
+ chunk_ndarray = chunk_array .as_ndarray_like ()
119
+ out = await to_thread (self ._codec .decode , chunk_ndarray )
120
+ return NDBuffer .from_ndarray_like (out .reshape (chunk_spec .shape ))
129
121
130
- async def encode (
131
- self ,
132
- chunk_array : np .ndarray ,
133
- _chunk_spec : ArraySpec ,
134
- _runtime_configuration : RuntimeConfiguration ,
135
- ) -> np .ndarray :
136
- return await to_thread (self ._codec .encode , chunk_array )
122
+ async def _encode_single (
123
+ self , chunk_array : NDBuffer , _chunk_spec : ArraySpec
124
+ ) -> NDBuffer :
125
+ chunk_ndarray = chunk_array .as_ndarray_like ()
126
+ out = await to_thread (self ._codec .encode , chunk_ndarray )
127
+ return NDBuffer .from_ndarray_like (out )
137
128
138
129
139
130
def make_bytes_bytes_codec (
@@ -185,7 +176,7 @@ class ShuffleCodec(NumcodecsBytesBytesCodec):
185
176
def __init__ (self , codec_config : dict [str , JSON ] = {}) -> None :
186
177
super ().__init__ (codec_id = "shuffle" , codec_config = codec_config )
187
178
188
- def evolve (self , array_spec : ArraySpec ) -> Self :
179
+ def evolve_from_array_spec (self , array_spec : ArraySpec ) -> Self :
189
180
if array_spec .dtype .itemsize != self .codec_config .get ("elementsize" ):
190
181
return self .__class__ (
191
182
{** self .codec_config , "elementsize" : array_spec .dtype .itemsize }
@@ -199,14 +190,10 @@ def __init__(self, codec_config: dict[str, JSON] = {}) -> None:
199
190
200
191
def resolve_metadata (self , chunk_spec : ArraySpec ) -> ArraySpec :
201
192
if astype := self .codec_config .get ("astype" ):
202
- return ArraySpec (
203
- chunk_spec .shape ,
204
- np .dtype (astype ),
205
- chunk_spec .fill_value ,
206
- )
193
+ return replace (chunk_spec , dtype = np .dtype (astype ))
207
194
return chunk_spec
208
195
209
- def evolve (self , array_spec : ArraySpec ) -> Self :
196
+ def evolve_from_array_spec (self , array_spec : ArraySpec ) -> Self :
210
197
if str (array_spec .dtype ) != self .codec_config .get ("dtype" ):
211
198
return self .__class__ ({** self .codec_config , "dtype" : str (array_spec .dtype )})
212
199
return self
@@ -216,7 +203,7 @@ class QuantizeCodec(NumcodecsArrayArrayCodec):
216
203
def __init__ (self , codec_config : dict [str , JSON ] = {}) -> None :
217
204
super ().__init__ (codec_id = "quantize" , codec_config = codec_config )
218
205
219
- def evolve (self , array_spec : ArraySpec ) -> Self :
206
+ def evolve_from_array_spec (self , array_spec : ArraySpec ) -> Self :
220
207
if str (array_spec .dtype ) != self .codec_config .get ("dtype" ):
221
208
return self .__class__ ({** self .codec_config , "dtype" : str (array_spec .dtype )})
222
209
return self
@@ -227,13 +214,9 @@ def __init__(self, codec_config: dict[str, JSON] = {}) -> None:
227
214
super ().__init__ (codec_id = "astype" , codec_config = codec_config )
228
215
229
216
def resolve_metadata (self , chunk_spec : ArraySpec ) -> ArraySpec :
230
- return ArraySpec (
231
- chunk_spec .shape ,
232
- np .dtype (self .codec_config ["encode_dtype" ]),
233
- chunk_spec .fill_value ,
234
- )
217
+ return replace (chunk_spec , dtype = np .dtype (self .codec_config ["encode_dtype" ]))
235
218
236
- def evolve (self , array_spec : ArraySpec ) -> Self :
219
+ def evolve_from_array_spec (self , array_spec : ArraySpec ) -> Self :
237
220
decode_dtype = self .codec_config .get ("decode_dtype" )
238
221
if str (array_spec .dtype ) != decode_dtype :
239
222
return self .__class__ (
@@ -247,10 +230,10 @@ def __init__(self, codec_config: dict[str, JSON] = {}) -> None:
247
230
super ().__init__ (codec_id = "packbits" , codec_config = codec_config )
248
231
249
232
def resolve_metadata (self , chunk_spec : ArraySpec ) -> ArraySpec :
250
- return ArraySpec (
251
- ( 1 + math . ceil ( product ( chunk_spec . shape ) / 8 ),) ,
252
- np . dtype ( "uint8" ),
253
- chunk_spec . fill_value ,
233
+ return replace (
234
+ chunk_spec ,
235
+ shape = ( 1 + math . ceil ( product ( chunk_spec . shape ) / 8 ), ),
236
+ dtype = np . dtype ( "uint8" ) ,
254
237
)
255
238
256
239
def validate (self , array_metadata : ArrayMetadata ) -> None :
@@ -282,4 +265,4 @@ def validate(self, array_metadata: ArrayMetadata) -> None:
282
265
Crc32Codec = make_checksum_codec ("crc32" , "Crc32Codec" )
283
266
Adler32Codec = make_checksum_codec ("adler32" , "Adler32Codec" )
284
267
Fletcher32Codec = make_checksum_codec ("fletcher32" , "Fletcher32Codec" )
285
- JenkinsLookup3 = make_checksum_codec ("jenkinks_lookup3 " , "JenkinsLookup3" )
268
+ JenkinsLookup3 = make_checksum_codec ("jenkins_lookup3 " , "JenkinsLookup3" )
0 commit comments