Skip to content

Commit 4abe4be

Browse files
authored
Add pcodec (#501)
* added PCodec * fix line length and print statements * docs * mock pcodec on rtd * fix typo * add dtype details * changed import style for pcodec * fix flake8 * revert import changes * fix errors due to changes in pcodec API * change import style * skip coverage of failed import path * skip pcodec tests if not installed
1 parent 0878717 commit 4abe4be

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

89 files changed

+250
-5
lines changed

.github/workflows/ci-linux.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ jobs:
5050
run: |
5151
conda activate env
5252
export DISABLE_NUMCODECS_AVX2=""
53-
python -m pip install -v -e .[test,test_extras,msgpack,zfpy]
53+
python -m pip install -v -e .[test,test_extras,msgpack,zfpy,pcodec]
5454
5555
- name: List installed packages
5656
shell: "bash -l {0}"

.github/workflows/ci-osx.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ jobs:
5050
run: |
5151
conda activate env
5252
export DISABLE_NUMCODECS_AVX2=""
53-
python -m pip install -v -e .[test,test_extras,msgpack,zfpy]
53+
python -m pip install -v -e .[test,test_extras,msgpack,zfpy,pcodec]
5454
5555
- name: List installed packages
5656
shell: "bash -l {0}"

.github/workflows/ci-windows.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ jobs:
4242
shell: "bash -l {0}"
4343
run: |
4444
conda activate env
45-
python -m pip install -v -e .[test,test_extras,msgpack,zfpy]
45+
python -m pip install -v -e .[test,test_extras,msgpack,zfpy,pcodec]
4646
4747
- name: List installed packages
4848
shell: "bash -l {0}"

docs/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def __getattr__(cls, name):
2323
return Mock()
2424

2525

26-
MOCK_MODULES = ['msgpack']
26+
MOCK_MODULES = ['msgpack', 'pcodec']
2727
sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES)
2828

2929

docs/index.rst

Lines changed: 1 addition & 0 deletions

docs/pcodec.rst

Lines changed: 10 additions & 0 deletions

docs/release.rst

Lines changed: 2 additions & 0 deletions

fixture/pcodec/array.00.npy

4.03 KB
Binary file not shown.

fixture/pcodec/array.01.npy

7.94 KB
Binary file not shown.

fixture/pcodec/array.02.npy

4.03 KB
Binary file not shown.

fixture/pcodec/array.03.npy

7.94 KB
Binary file not shown.

fixture/pcodec/array.04.npy

4.03 KB
Binary file not shown.

fixture/pcodec/array.05.npy

7.94 KB
Binary file not shown.

fixture/pcodec/array.06.npy

7.94 KB
Binary file not shown.

fixture/pcodec/array.07.npy

7.94 KB
Binary file not shown.

fixture/pcodec/array.08.npy

7.94 KB
Binary file not shown.

fixture/pcodec/array.09.npy

7.94 KB
Binary file not shown.

fixture/pcodec/codec.00/config.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"delta_encoding_order": null,
3+
"equal_pages_up_to": 262144,
4+
"float_mult_spec": "enabled",
5+
"id": "pcodec",
6+
"int_mult_spec": "enabled",
7+
"level": 8
8+
}
25 Bytes
Binary file not shown.
34 Bytes
Binary file not shown.
25 Bytes
Binary file not shown.
34 Bytes
Binary file not shown.
150 Bytes
Binary file not shown.
117 Bytes
Binary file not shown.
5.57 KB
Binary file not shown.
5.57 KB
Binary file not shown.
7.35 KB
Binary file not shown.
579 Bytes
Binary file not shown.

fixture/pcodec/codec.01/config.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"delta_encoding_order": null,
3+
"equal_pages_up_to": 262144,
4+
"float_mult_spec": "enabled",
5+
"id": "pcodec",
6+
"int_mult_spec": "enabled",
7+
"level": 1
8+
}
25 Bytes
Binary file not shown.
34 Bytes
Binary file not shown.
25 Bytes
Binary file not shown.
34 Bytes
Binary file not shown.
150 Bytes
Binary file not shown.
114 Bytes
Binary file not shown.
5.64 KB
Binary file not shown.
5.64 KB
Binary file not shown.
7.35 KB
Binary file not shown.
651 Bytes
Binary file not shown.

fixture/pcodec/codec.02/config.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"delta_encoding_order": null,
3+
"equal_pages_up_to": 262144,
4+
"float_mult_spec": "enabled",
5+
"id": "pcodec",
6+
"int_mult_spec": "enabled",
7+
"level": 5
8+
}
25 Bytes
Binary file not shown.
34 Bytes
Binary file not shown.
25 Bytes
Binary file not shown.
34 Bytes
Binary file not shown.
150 Bytes
Binary file not shown.
115 Bytes
Binary file not shown.
5.57 KB
Binary file not shown.
5.58 KB
Binary file not shown.
7.35 KB
Binary file not shown.
579 Bytes
Binary file not shown.

fixture/pcodec/codec.03/config.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"delta_encoding_order": null,
3+
"equal_pages_up_to": 262144,
4+
"float_mult_spec": "enabled",
5+
"id": "pcodec",
6+
"int_mult_spec": "enabled",
7+
"level": 9
8+
}
25 Bytes
Binary file not shown.
34 Bytes
Binary file not shown.
25 Bytes
Binary file not shown.
34 Bytes
Binary file not shown.
150 Bytes
Binary file not shown.
117 Bytes
Binary file not shown.
5.56 KB
Binary file not shown.
5.57 KB
Binary file not shown.
7.35 KB
Binary file not shown.
579 Bytes
Binary file not shown.

fixture/pcodec/codec.04/config.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"delta_encoding_order": null,
3+
"equal_pages_up_to": 262144,
4+
"float_mult_spec": "disabled",
5+
"id": "pcodec",
6+
"int_mult_spec": "disabled",
7+
"level": 8
8+
}
25 Bytes
Binary file not shown.
34 Bytes
Binary file not shown.
25 Bytes
Binary file not shown.
34 Bytes
Binary file not shown.
150 Bytes
Binary file not shown.
117 Bytes
Binary file not shown.
5.57 KB
Binary file not shown.
5.57 KB
Binary file not shown.
7.35 KB
Binary file not shown.
579 Bytes
Binary file not shown.

fixture/pcodec/codec.05/config.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"delta_encoding_order": null,
3+
"equal_pages_up_to": 300,
4+
"float_mult_spec": "enabled",
5+
"id": "pcodec",
6+
"int_mult_spec": "enabled",
7+
"level": 8
8+
}
73 Bytes
Binary file not shown.
109 Bytes
Binary file not shown.
73 Bytes
Binary file not shown.
109 Bytes
Binary file not shown.
201 Bytes
Binary file not shown.
221 Bytes
Binary file not shown.
5.67 KB
Binary file not shown.
5.67 KB
Binary file not shown.
7.4 KB
Binary file not shown.
671 Bytes
Binary file not shown.

numcodecs/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,3 +115,6 @@
115115

116116
from numcodecs.fletcher32 import Fletcher32
117117
register_codec(Fletcher32)
118+
119+
from numcodecs.pcodec import PCodec
120+
register_codec(PCodec)

numcodecs/pcodec.py

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
from typing import Optional, Literal
2+
3+
import numcodecs
4+
import numcodecs.abc
5+
from numcodecs.compat import ensure_contiguous_ndarray
6+
7+
try:
8+
from pcodec import standalone, ChunkConfig, PagingSpec
9+
except ImportError: # pragma: no cover
10+
standalone = None
11+
12+
13+
DEFAULT_MAX_PAGE_N = 262144
14+
15+
16+
class PCodec(numcodecs.abc.Codec):
17+
"""
18+
PCodec (or pco, pronounced "pico") losslessly compresses and decompresses
19+
numerical sequences with high compression ratio and fast speed.
20+
21+
See `PCodec Repo <https://github.com/mwlon/pcodec>`_ for more information.
22+
23+
PCodec supports only the following numerical dtypes: uint32, unit64, int32,
24+
int64, float32, and float64.
25+
26+
Parameters
27+
----------
28+
level : int
29+
A compression level from 0-12, where 12 take the longest and compresses
30+
the most.
31+
delta_encoding_order : init or None
32+
Either a delta encoding level from 0-7 or None. If set to None, pcodec
33+
will try to infer the optimal delta encoding order.
34+
int_mult_spec : {'enabled', 'disabled'}
35+
If enabled, pcodec will consider using int mult mode, which can
36+
substantially improve compression ratio but decrease speed in some cases
37+
for integer types.
38+
float_mult_spec : {'enabled', 'disabled'}
39+
If enabled, pcodec will consider using float mult mode, which can
40+
substantially improve compression ratio but decrease speed in some cases
41+
for float types.
42+
equal_pages_up_to : int
43+
Divide the chunk into equal pages of up to this many numbers.
44+
"""
45+
46+
codec_id = "pcodec"
47+
48+
def __init__(
49+
self,
50+
level: int = 8,
51+
delta_encoding_order: Optional[int] = None,
52+
int_mult_spec: Literal["enabled", "disabled"] = "enabled",
53+
float_mult_spec: Literal["enabled", "disabled"] = "enabled",
54+
equal_pages_up_to: int = 262144
55+
):
56+
if standalone is None: # pragma: no cover
57+
raise ImportError(
58+
"pcodec must be installed to use the PCodec codec."
59+
)
60+
61+
# note that we use `level` instead of `compression_level` to
62+
# match other codecs
63+
self.level = level
64+
self.delta_encoding_order = delta_encoding_order
65+
self.int_mult_spec = int_mult_spec
66+
self.float_mult_spec = float_mult_spec
67+
self.equal_pages_up_to = equal_pages_up_to
68+
69+
def encode(self, buf):
70+
buf = ensure_contiguous_ndarray(buf)
71+
72+
paging_spec = PagingSpec.equal_pages_up_to(self.equal_pages_up_to)
73+
74+
config = ChunkConfig(
75+
compression_level=self.level,
76+
delta_encoding_order=self.delta_encoding_order,
77+
int_mult_spec=self.int_mult_spec,
78+
float_mult_spec=self.float_mult_spec,
79+
paging_spec=paging_spec,
80+
)
81+
return standalone.simple_compress(buf, config)
82+
83+
def decode(self, buf, out=None):
84+
if out is not None:
85+
out = ensure_contiguous_ndarray(out)
86+
standalone.simple_decompress_into(buf, out)
87+
return out
88+
else:
89+
return standalone.simple_decompress(buf)

numcodecs/tests/common.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,17 @@ def check_encode_decode_array(arr, codec):
217217
assert_array_items_equal(arr, dec)
218218

219219

220+
def check_encode_decode_array_to_bytes(arr, codec):
221+
222+
enc = codec.encode(arr)
223+
dec = codec.decode(enc)
224+
assert_array_items_equal(arr, dec)
225+
226+
out = np.empty_like(arr)
227+
codec.decode(enc, out=out)
228+
assert_array_items_equal(arr, out)
229+
230+
220231
def check_config(codec):
221232
config = codec.get_config()
222233
# round-trip through JSON to check serialization

numcodecs/tests/test_pcodec.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
import pytest
2+
import numpy as np
3+
4+
from numcodecs.pcodec import PCodec
5+
6+
try:
7+
# initializing codec triggers ImportError
8+
PCodec()
9+
except ImportError: # pragma: no cover
10+
pytest.skip(
11+
"pcodec not available", allow_module_level=True
12+
)
13+
14+
from numcodecs.tests.common import (
15+
check_encode_decode_array_to_bytes,
16+
check_config,
17+
check_repr,
18+
check_backwards_compatibility,
19+
check_err_decode_object_buffer,
20+
check_err_encode_object_buffer,
21+
)
22+
23+
24+
codecs = [
25+
PCodec(),
26+
PCodec(level=1),
27+
PCodec(level=5),
28+
PCodec(level=9),
29+
PCodec(float_mult_spec="disabled", int_mult_spec="disabled"),
30+
PCodec(equal_pages_up_to=300),
31+
]
32+
33+
34+
# mix of dtypes: integer, float
35+
# mix of shapes: 1D, 2D
36+
# mix of orders: C, F
37+
arrays = [
38+
np.arange(1000, dtype="u4"),
39+
np.arange(1000, dtype="u8"),
40+
np.arange(1000, dtype="i4"),
41+
np.arange(1000, dtype="i8"),
42+
np.linspace(1000, 1001, 1000, dtype="f4"),
43+
np.linspace(1000, 1001, 1000, dtype="f8"),
44+
np.random.normal(loc=1000, scale=1, size=(100, 10)),
45+
np.asfortranarray(np.random.normal(loc=1000, scale=1, size=(100, 10))),
46+
np.random.randint(0, 2**60, size=1000, dtype="u8"),
47+
np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype="i8"),
48+
]
49+
50+
51+
@pytest.mark.parametrize("arr", arrays)
52+
@pytest.mark.parametrize("codec", codecs)
53+
def test_encode_decode(arr, codec):
54+
check_encode_decode_array_to_bytes(arr, codec)
55+
56+
57+
def test_config():
58+
codec = PCodec(level=3)
59+
check_config(codec)
60+
61+
62+
def test_repr():
63+
check_repr(
64+
"PCodec(delta_encoding_order=None, equal_pages_up_to=262144, float_mult_spec='enabled', "
65+
"int_mult_spec='enabled', level=3)"
66+
)
67+
68+
69+
def test_backwards_compatibility():
70+
check_backwards_compatibility(PCodec.codec_id, arrays, codecs)
71+
72+
73+
def test_err_decode_object_buffer():
74+
check_err_decode_object_buffer(PCodec())
75+
76+
77+
def test_err_encode_object_buffer():
78+
check_err_encode_object_buffer(PCodec())

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,9 @@ msgpack = [
6666
zfpy = [
6767
"zfpy>=1.0.0",
6868
]
69+
pcodec = [
70+
"pcodec>=0.1.0",
71+
]
6972

7073
[tool.setuptools]
7174
license-files = ["LICENSE.txt"]

0 commit comments

Comments
 (0)