Skip to content

Commit ca3dca7

Browse files
committed
BUG: read_parquet does not respect index for arrow dtype backend
1 parent 723b8da commit ca3dca7

File tree

2 files changed

+14
-4
lines changed

2 files changed

+14
-4
lines changed

pandas/io/parquet.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,9 @@ def read(
259259
pa_table.column_names, pa_table.itercolumns()
260260
)
261261
}
262-
if len(index_columns) == 1 and isinstance(index_columns[0], dict):
262+
if len(index_columns) == 0:
263+
idx = None
264+
elif len(index_columns) == 1 and isinstance(index_columns[0], dict):
263265
params = index_columns[0]
264266
idx = RangeIndex(
265267
params.get("start"), params.get("stop"), params.get("step")
@@ -270,7 +272,10 @@ def read(
270272
result_dc.pop(index_col) for index_col in index_columns
271273
]
272274
if len(index_data) == 1:
273-
idx = Index(index_data[0], name=index_columns[0])
275+
name = index_columns[0]
276+
if isinstance(name, str) and name.startswith("__index_level_"):
277+
name = None
278+
idx = Index(index_data[0], name=name)
274279
else:
275280
idx = MultiIndex.from_arrays(index_data, names=index_columns)
276281
result = DataFrame(result_dc, index=idx)

pandas/tests/io/test_parquet.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1241,16 +1241,21 @@ def test_pyarrow_backed_df_index(self, index, pa):
12411241
expected = obj.astype("int64[pyarrow]").set_index(index)
12421242
tm.assert_frame_equal(result, expected)
12431243

1244-
def test_pyarrow_backed_df_range_index(self, pa):
1244+
@pytest.mark.parametrize("index", [True, False, None])
1245+
def test_pyarrow_backed_df_range_index(self, pa, index):
12451246
# GH#48944
12461247
df = pd.DataFrame(
12471248
data={"A": [0, 1], "B": [1, 0]}, index=RangeIndex(start=100, stop=102)
12481249
)
12491250
with tm.ensure_clean("test.parquet") as path:
12501251
with open(path.encode(), "wb") as f:
1251-
df.to_parquet(f)
1252+
df.to_parquet(f, index=index)
12521253

12531254
with pd.option_context("mode.dtype_backend", "pyarrow"):
12541255
result = read_parquet(path, engine="pyarrow")
12551256
expected = df.astype("int64[pyarrow]")
1257+
if index is False:
1258+
expected = expected.reset_index(drop=True)
1259+
elif index:
1260+
expected.index = pd.Index([100, 101], dtype="int64[pyarrow]")
12561261
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)