Skip to content

Commit f0cc3ea

Browse files
committed
Rudimentary column object tests
1 parent 6825c67 commit f0cc3ea

File tree

5 files changed

+184
-11
lines changed

5 files changed

+184
-11
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ it's a **work in progress**.
1616

1717
* **Signatures** ([`test_signatures.py`](./tests/test_signatures.py)): Assert methods have the correct signatures.
1818

19-
* **Basic functionality** ([`test_dataframe_object.py`](./tests/test_dataframe_object.py)): Smoke methods can take valid input, and assert they return valid output (where appropiate).
19+
* **Basic functionality** (e.g. [`test_dataframe_object.py`](./tests/test_dataframe_object.py)): Smoke methods can take valid input, and assert they return valid output (where appropiate).
2020

2121
### What the heck is `LibraryInfo`?
2222

tests/conftest.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,3 +62,5 @@ def pytest_collection_modifyitems(config, items):
6262
for item in items:
6363
if any(id_ in item.nodeid for id_ in ci_failing_ids):
6464
item.add_marker(pytest.mark.xfail())
65+
elif "test_column_object" in item.nodeid:
66+
item.add_marker(pytest.mark.skip("TODO"))

tests/strategies.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,14 @@ class NominalDtype(Enum):
1515
DATETIME64NS = "datetime64[ns]"
1616
CATEGORY = "category"
1717
# Numerics
18-
UINT8 = "uint8"
19-
UINT16 = "uint16"
20-
UINT32 = "uint32"
21-
UINT64 = "uint64"
2218
INT8 = "int8"
2319
INT16 = "int16"
2420
INT32 = "int32"
2521
INT64 = "int64"
22+
UINT8 = "uint8"
23+
UINT16 = "uint16"
24+
UINT32 = "uint32"
25+
UINT64 = "uint64"
2626
FLOAT32 = "float32"
2727
FLOAT64 = "float64"
2828

@@ -56,12 +56,12 @@ def __iter__(self):
5656
def __len__(self):
5757
return len(self._name_to_column)
5858

59-
def num_rows(self) -> int:
60-
return self._nrows
61-
6259
def num_columns(self) -> int:
6360
return self._ncols
6461

62+
def num_rows(self) -> int:
63+
return self._nrows
64+
6565
def __repr__(self) -> str:
6666
col_reprs = []
6767
for name, col in self.items():

tests/test_column_object.py

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
from enum import IntEnum
2+
from typing import Dict, Iterator, Tuple
3+
4+
import numpy as np
5+
import pytest
6+
from hypothesis import given
7+
from hypothesis import strategies as st
8+
9+
from tests.api import Column
10+
11+
from .strategies import MockColumn, NominalDtype, mock_dataframes
12+
from .wrappers import LibraryInfo
13+
14+
# TODO: helpful assertion messages
15+
# TODO: better column generation
16+
17+
18+
def columns_and_mock_columns(
19+
libinfo: LibraryInfo, data: st.DataObject
20+
) -> Iterator[Tuple[Column, MockColumn]]:
21+
mock_df = data.draw(
22+
mock_dataframes(**{**libinfo.mock_dataframes_kwargs, "allow_zero_cols": False}),
23+
label="mock_df",
24+
)
25+
df = libinfo.mock_to_interchange(mock_df)
26+
for name in mock_df.keys():
27+
yield df.get_column_by_name(name), mock_df[name]
28+
29+
30+
@given(data=st.data())
31+
def test_size(libinfo: LibraryInfo, data: st.DataObject):
32+
for col, mock_col in columns_and_mock_columns(libinfo, data):
33+
size = col.size
34+
if size is not None:
35+
assert isinstance(size, int)
36+
assert size == mock_col.array.size
37+
38+
39+
@given(data=st.data())
40+
def test_offset(libinfo: LibraryInfo, data: st.DataObject):
41+
for col, _ in columns_and_mock_columns(libinfo, data):
42+
offset = col.offset
43+
assert isinstance(offset, int)
44+
45+
46+
INT_DTYPES = tuple(e for e in NominalDtype if e.value.startswith("int"))
47+
UINT_DTYPES = tuple(e for e in NominalDtype if e.value.startswith("uint"))
48+
FLOAT_DTYPES = tuple(e for e in NominalDtype if e.value.startswith("float"))
49+
50+
51+
class DtypeKind(IntEnum):
52+
INT = 0
53+
UINT = 1
54+
FLOAT = 2
55+
BOOL = 20
56+
STRING = 21
57+
DATETIME = 22
58+
CATEGORICAL = 23
59+
60+
61+
NOMINAL_TO_KIND: Dict[NominalDtype, DtypeKind] = {
62+
**{nd: DtypeKind.INT for nd in INT_DTYPES},
63+
**{nd: DtypeKind.UINT for nd in UINT_DTYPES},
64+
**{nd: DtypeKind.FLOAT for nd in FLOAT_DTYPES},
65+
NominalDtype.BOOL: DtypeKind.BOOL,
66+
NominalDtype.UTF8: DtypeKind.STRING,
67+
NominalDtype.DATETIME64NS: DtypeKind.DATETIME,
68+
NominalDtype.CATEGORY: DtypeKind.CATEGORICAL,
69+
}
70+
71+
72+
@given(data=st.data())
73+
def test_dtype(libinfo: LibraryInfo, data: st.DataObject):
74+
for col, mock_col in columns_and_mock_columns(libinfo, data):
75+
dtype = col.dtype
76+
assert isinstance(dtype, tuple)
77+
assert len(dtype) == 4
78+
kind, bitwidth, fstring, endianness = col.dtype
79+
assert isinstance(kind, IntEnum)
80+
assert kind.value == NOMINAL_TO_KIND[mock_col.nominal_dtype].value
81+
assert isinstance(bitwidth, int)
82+
# TODO: Test fstring and endianness have valid values
83+
assert isinstance(fstring, str)
84+
assert isinstance(endianness, str)
85+
86+
87+
@given(data=st.data())
88+
def test_describe_categorical(libinfo: LibraryInfo, data: st.DataObject):
89+
# TODO: ensure generation for categorical columns
90+
for col, mock_col in columns_and_mock_columns(libinfo, data):
91+
if mock_col.nominal_dtype == NominalDtype.CATEGORY:
92+
catinfo = col.describe_categorical
93+
assert isinstance(catinfo, dict)
94+
for key in ["is_ordered", "is_dictionary", "mapping"]:
95+
assert key in catinfo.keys()
96+
assert isinstance(catinfo["is_ordered"], bool)
97+
assert isinstance(catinfo["is_dictionary"], bool)
98+
mapping = catinfo["mapping"]
99+
if mapping is not None:
100+
assert isinstance(mapping, dict)
101+
else:
102+
with pytest.raises(RuntimeError):
103+
col.describe_categorical
104+
105+
106+
@given(data=st.data())
107+
def test_describe_null(libinfo: LibraryInfo, data: st.DataObject):
108+
for col, _ in columns_and_mock_columns(libinfo, data):
109+
nullinfo = col.describe_null
110+
assert isinstance(nullinfo, tuple)
111+
assert len(nullinfo) == 2
112+
kind, value = nullinfo
113+
assert isinstance(kind, int)
114+
assert kind in [0, 1, 2, 3, 4]
115+
if kind in [0, 1]: # noll-nullable or NaN/NaT
116+
assert value is None
117+
elif kind in [3, 4]: # bit or byte mask
118+
assert isinstance(value, int)
119+
assert value in [0, 1]
120+
121+
122+
@given(data=st.data())
123+
def test_null_count(libinfo: LibraryInfo, data: st.DataObject):
124+
for col, mock_col in columns_and_mock_columns(libinfo, data):
125+
null_count = col.null_count
126+
if null_count is not None:
127+
assert isinstance(null_count, int)
128+
assert null_count == sum(np.isnan(mock_col.array))
129+
130+
131+
@given(data=st.data())
132+
def test_num_chunks(libinfo: LibraryInfo, data: st.DataObject):
133+
for col, _ in columns_and_mock_columns(libinfo, data):
134+
num_chunks = col.num_chunks()
135+
assert isinstance(num_chunks, int)
136+
137+
138+
@given(data=st.data())
139+
def test_get_chunks(libinfo: LibraryInfo, data: st.DataObject):
140+
for col, _ in columns_and_mock_columns(libinfo, data):
141+
num_chunks = col.num_chunks()
142+
n_chunks = data.draw(
143+
st.none() | st.integers(1, 2).map(lambda n: n * num_chunks),
144+
label="n_chunks",
145+
)
146+
if n_chunks is None and not data.draw(st.booleans(), label="pass n_chunks"):
147+
args = []
148+
else:
149+
args = [n_chunks]
150+
col.get_chunks(*args)
151+
152+
153+
@given(data=st.data())
154+
def test_get_buffers(libinfo: LibraryInfo, data: st.DataObject):
155+
for col, _ in columns_and_mock_columns(libinfo, data):
156+
bufinfo = col.get_buffers()
157+
assert isinstance(bufinfo, dict)
158+
for key in ["data", "validity", "offsets"]:
159+
assert key in bufinfo.keys()
160+
# TODO: test returned dtypes (probably generalise it)
161+
data = bufinfo["data"]
162+
assert isinstance(data, tuple)
163+
assert len(data) == 2
164+
validity = bufinfo["validity"]
165+
if validity is not None:
166+
assert isinstance(validity, tuple)
167+
assert len(validity) == 2
168+
offsets = bufinfo["offsets"]
169+
if offsets is not None:
170+
assert isinstance(offsets, tuple)
171+
assert len(offsets) == 2

tests/test_meta.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@
66
from hypothesis import given
77
from hypothesis import strategies as st
88

9-
from .strategies import mock_dataframes
9+
from .strategies import MockDataFrame, mock_dataframes
1010
from .wrappers import LibraryInfo
1111

1212

1313
@given(mock_dataframes())
14-
def test_mock_dataframes(_):
15-
pass
14+
def test_mock_dataframes(mock_df):
15+
assert isinstance(mock_df, MockDataFrame)
1616

1717

1818
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)