Skip to content

Commit 5a87d7e

Browse files
TST: add parquet test with tz datetime data for pyarrow
+ clean-up basic data types tests: make common dataframe with types supported by both pyarrow and fastparquet
1 parent 6b6cfb8 commit 5a87d7e

File tree

1 file changed

+44
-36
lines changed

1 file changed

+44
-36
lines changed

pandas/tests/io/test_parquet.py

Lines changed: 44 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,25 @@ def df_cross_compat():
9090
return df
9191

9292

93+
@pytest.fixture
94+
def df_full():
95+
return pd.DataFrame(
96+
{'string': list('abc'),
97+
'string_with_nan': ['a', np.nan, 'c'],
98+
'string_with_none': ['a', None, 'c'],
99+
'bytes': [b'foo', b'bar', b'baz'],
100+
'unicode': [u'foo', u'bar', u'baz'],
101+
'int': list(range(1, 4)),
102+
'uint': np.arange(3, 6).astype('u1'),
103+
'float': np.arange(4.0, 7.0, dtype='float64'),
104+
'float_with_nan': [2., np.nan, 3.],
105+
'bool': [True, False, True],
106+
'datetime': pd.date_range('20130101', periods=3),
107+
'datetime_with_nat': [pd.Timestamp('20130101'),
108+
pd.NaT,
109+
pd.Timestamp('20130103')]})
110+
111+
93112
def test_invalid_engine(df_compat):
94113

95114
with pytest.raises(ValueError):
@@ -300,27 +319,29 @@ def test_read_columns(self, engine):
300319

301320
class TestParquetPyArrow(Base):
302321

303-
def test_basic(self, pa):
322+
def test_basic(self, pa, df_full):
304323

305-
df = pd.DataFrame({'string': list('abc'),
306-
'string_with_nan': ['a', np.nan, 'c'],
307-
'string_with_none': ['a', None, 'c'],
308-
'bytes': [b'foo', b'bar', b'baz'],
309-
'unicode': [u'foo', u'bar', u'baz'],
310-
'int': list(range(1, 4)),
311-
'uint': np.arange(3, 6).astype('u1'),
312-
'float': np.arange(4.0, 7.0, dtype='float64'),
313-
'float_with_nan': [2., np.nan, 3.],
314-
'bool': [True, False, True],
315-
'bool_with_none': [True, None, True],
316-
'datetime_ns': pd.date_range('20130101', periods=3),
317-
'datetime_with_nat': [pd.Timestamp('20130101'),
318-
pd.NaT,
319-
pd.Timestamp('20130103')]
320-
})
324+
df = df_full
325+
326+
# additional supported types for pyarrow
327+
df['datetime_tz'] = pd.date_range('20130101', periods=3,
328+
tz='Europe/Brussels')
329+
df['bool_with_none'] = [True, None, True]
321330

322331
self.check_round_trip(df, pa)
323332

333+
@pytest.mark.xfail(reason="pyarrow fails on this (ARROW-1883)")
334+
def test_basic_subset_columns(self, pa, df_full):
335+
# GH18628
336+
337+
df = df_full
338+
# additional supported types for pyarrow
339+
df['datetime_tz'] = pd.date_range('20130101', periods=3,
340+
tz='Europe/Brussels')
341+
342+
self.check_round_trip(df, pa, expected=df[['string', 'int']],
343+
read_kwargs={'columns': ['string', 'int']})
344+
324345
def test_duplicate_columns(self, pa):
325346

326347
# not currently able to handle duplicate columns
@@ -363,25 +384,12 @@ def test_categorical_unsupported(self, pa_lt_070):
363384

364385
class TestParquetFastParquet(Base):
365386

366-
def test_basic(self, fp):
367-
368-
df = pd.DataFrame(
369-
{'string': list('abc'),
370-
'string_with_nan': ['a', np.nan, 'c'],
371-
'string_with_none': ['a', None, 'c'],
372-
'bytes': [b'foo', b'bar', b'baz'],
373-
'unicode': [u'foo', u'bar', u'baz'],
374-
'int': list(range(1, 4)),
375-
'uint': np.arange(3, 6).astype('u1'),
376-
'float': np.arange(4.0, 7.0, dtype='float64'),
377-
'float_with_nan': [2., np.nan, 3.],
378-
'bool': [True, False, True],
379-
'datetime': pd.date_range('20130101', periods=3),
380-
'datetime_with_nat': [pd.Timestamp('20130101'),
381-
pd.NaT,
382-
pd.Timestamp('20130103')],
383-
'timedelta': pd.timedelta_range('1 day', periods=3),
384-
})
387+
def test_basic(self, fp, df_full):
388+
389+
df = df_full
390+
391+
# additional supported types for fastparquet
392+
df['timedelta'] = pd.timedelta_range('1 day', periods=3)
385393

386394
self.check_round_trip(df, fp, write_kwargs={'compression': None})
387395

0 commit comments

Comments
 (0)