Skip to content

Commit 3f972d9

Browse files
mwlonnormanrzrabernat
authored
Upgraded pcodec to 0.2 and used new ModeSpec configuration (#544)
* Upgraded pcodec to 0.2 and used new ModeSpec configuration * test coverage fix Co-authored-by: Ryan Abernathey <[email protected]> * actually test the line since it seems there's no way around it * fix pre-commit --------- Co-authored-by: Norman Rzepka <[email protected]> Co-authored-by: Ryan Abernathey <[email protected]>
1 parent 4929b35 commit 3f972d9

File tree

9 files changed

+40
-39
lines changed

9 files changed

+40
-39
lines changed

fixture/pcodec/codec.00/config.json

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
{
22
"delta_encoding_order": null,
33
"equal_pages_up_to": 262144,
4-
"float_mult_spec": "enabled",
54
"id": "pcodec",
6-
"int_mult_spec": "enabled",
7-
"level": 8
5+
"level": 8,
6+
"mode_spec": "auto"
87
}

fixture/pcodec/codec.01/config.json

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
{
22
"delta_encoding_order": null,
33
"equal_pages_up_to": 262144,
4-
"float_mult_spec": "enabled",
54
"id": "pcodec",
6-
"int_mult_spec": "enabled",
7-
"level": 1
5+
"level": 1,
6+
"mode_spec": "auto"
87
}

fixture/pcodec/codec.02/config.json

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
{
22
"delta_encoding_order": null,
33
"equal_pages_up_to": 262144,
4-
"float_mult_spec": "enabled",
54
"id": "pcodec",
6-
"int_mult_spec": "enabled",
7-
"level": 5
5+
"level": 5,
6+
"mode_spec": "auto"
87
}

fixture/pcodec/codec.03/config.json

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
{
22
"delta_encoding_order": null,
33
"equal_pages_up_to": 262144,
4-
"float_mult_spec": "enabled",
54
"id": "pcodec",
6-
"int_mult_spec": "enabled",
7-
"level": 9
5+
"level": 9,
6+
"mode_spec": "auto"
87
}

fixture/pcodec/codec.04/config.json

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
{
22
"delta_encoding_order": null,
33
"equal_pages_up_to": 262144,
4-
"float_mult_spec": "disabled",
54
"id": "pcodec",
6-
"int_mult_spec": "disabled",
7-
"level": 8
5+
"level": 8,
6+
"mode_spec": "classic"
87
}

fixture/pcodec/codec.05/config.json

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
{
22
"delta_encoding_order": null,
33
"equal_pages_up_to": 300,
4-
"float_mult_spec": "enabled",
54
"id": "pcodec",
6-
"int_mult_spec": "enabled",
7-
"level": 8
5+
"level": 8,
6+
"mode_spec": "auto"
87
}

numcodecs/pcodec.py

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from numcodecs.compat import ensure_contiguous_ndarray
66

77
try:
8-
from pcodec import standalone, ChunkConfig, PagingSpec
8+
from pcodec import standalone, ChunkConfig, PagingSpec, ModeSpec
99
except ImportError: # pragma: no cover
1010
standalone = None
1111

@@ -20,8 +20,8 @@ class PCodec(numcodecs.abc.Codec):
2020
2121
See `PCodec Repo <https://github.com/mwlon/pcodec>`_ for more information.
2222
23-
PCodec supports only the following numerical dtypes: uint32, unit64, int32,
24-
int64, float32, and float64.
23+
PCodec supports only the following numerical dtypes: uint16, uint32, uint64,
24+
int16, int32, int64, float16, float32, and float64.
2525
2626
Parameters
2727
----------
@@ -31,14 +31,11 @@ class PCodec(numcodecs.abc.Codec):
3131
delta_encoding_order : init or None
3232
Either a delta encoding level from 0-7 or None. If set to None, pcodec
3333
will try to infer the optimal delta encoding order.
34-
int_mult_spec : {'enabled', 'disabled'}
35-
If enabled, pcodec will consider using int mult mode, which can
36-
substantially improve compression ratio but decrease speed in some cases
37-
for integer types.
38-
float_mult_spec : {'enabled', 'disabled'}
39-
If enabled, pcodec will consider using float mult mode, which can
40-
substantially improve compression ratio but decrease speed in some cases
41-
for float types.
34+
mode_spec : {'auto', 'classic'}
35+
Configures whether Pcodec should try to infer the best "mode" or
36+
structure of the data (e.g. approximate multiples of 0.1) to improve
37+
compression ratio, or skip this step and just use the numbers as-is
38+
(Classic mode).
4239
equal_pages_up_to : int
4340
Divide the chunk into equal pages of up to this many numbers.
4441
"""
@@ -49,9 +46,9 @@ def __init__(
4946
self,
5047
level: int = 8,
5148
delta_encoding_order: Optional[int] = None,
52-
int_mult_spec: Literal["enabled", "disabled"] = "enabled",
53-
float_mult_spec: Literal["enabled", "disabled"] = "enabled",
5449
equal_pages_up_to: int = 262144,
50+
# TODO one day, add support for the Try* mode specs
51+
mode_spec: Literal['auto', 'classic'] = 'auto',
5552
):
5653
if standalone is None: # pragma: no cover
5754
raise ImportError("pcodec must be installed to use the PCodec codec.")
@@ -60,20 +57,25 @@ def __init__(
6057
# match other codecs
6158
self.level = level
6259
self.delta_encoding_order = delta_encoding_order
63-
self.int_mult_spec = int_mult_spec
64-
self.float_mult_spec = float_mult_spec
6560
self.equal_pages_up_to = equal_pages_up_to
61+
self.mode_spec = mode_spec
6662

6763
def encode(self, buf):
6864
buf = ensure_contiguous_ndarray(buf)
6965

66+
match self.mode_spec:
67+
case 'auto':
68+
mode_spec = ModeSpec.auto()
69+
case 'classic':
70+
mode_spec = ModeSpec.classic()
71+
case _:
72+
raise ValueError(f"unknown value for mode_spec: {self.mode_spec}")
7073
paging_spec = PagingSpec.equal_pages_up_to(self.equal_pages_up_to)
7174

7275
config = ChunkConfig(
7376
compression_level=self.level,
7477
delta_encoding_order=self.delta_encoding_order,
75-
int_mult_spec=self.int_mult_spec,
76-
float_mult_spec=self.float_mult_spec,
78+
mode_spec=mode_spec,
7779
paging_spec=paging_spec,
7880
)
7981
return standalone.simple_compress(buf, config)

numcodecs/tests/test_pcodec.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
PCodec(level=1),
2525
PCodec(level=5),
2626
PCodec(level=9),
27-
PCodec(float_mult_spec="disabled", int_mult_spec="disabled"),
27+
PCodec(mode_spec='classic'),
2828
PCodec(equal_pages_up_to=300),
2929
]
3030

@@ -57,10 +57,15 @@ def test_config():
5757
check_config(codec)
5858

5959

60+
def test_invalid_config_error():
61+
with pytest.raises(ValueError):
62+
codec = PCodec(mode_spec='bogus')
63+
check_encode_decode_array_to_bytes(arrays[0], codec)
64+
65+
6066
def test_repr():
6167
check_repr(
62-
"PCodec(delta_encoding_order=None, equal_pages_up_to=262144, float_mult_spec='enabled', "
63-
"int_mult_spec='enabled', level=3)"
68+
"PCodec(delta_encoding_order=None, equal_pages_up_to=262144, level=3, mode_spec='auto')"
6469
)
6570

6671

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ zfpy = [
6767
"numpy<2.0.0",
6868
]
6969
pcodec = [
70-
"pcodec>=0.1.0",
70+
"pcodec>=0.2.0",
7171
]
7272

7373
[tool.setuptools]

0 commit comments

Comments
 (0)