Skip to content

BUG: pivot_table losing tz #32558

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Mar 14, 2020
1 change: 1 addition & 0 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -563,6 +563,7 @@ def from_product(cls, iterables, sortorder=None, names=lib.no_default):
if names is lib.no_default:
names = [getattr(it, "name", None) for it in iterables]

# codes are all ndarrays, so cartesian_product is lossless
codes = cartesian_product(codes)
return MultiIndex(levels, codes, sortorder=sortorder, names=names)

Expand Down
64 changes: 57 additions & 7 deletions pandas/core/reshape/util.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import numpy as np

from pandas.core.dtypes.common import is_list_like
from pandas.core.dtypes.generic import ABCCategorical

import pandas.core.common as com
from pandas.core.indexes.api import Index, IntervalIndex


def cartesian_product(X):
Expand Down Expand Up @@ -51,9 +52,58 @@ def cartesian_product(X):
# if any factor is empty, the cartesian product is empty
b = np.zeros_like(cumprodX)

return [
np.tile(
np.repeat(np.asarray(com.values_from_object(x)), b[i]), np.product(a[i])
)
for i, x in enumerate(X)
]
return [_tile_compat(np.repeat(x, b[i]), np.product(a[i])) for i, x in enumerate(X)]


def _broadcast_tile(arr: np.ndarray, num: int) -> np.ndarray:
"""
Emulate np.tile but using views instead of copies.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this an important optimization? Currently we also use np.tile ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For large inputs yes

"""
shape = (len(arr), num)
middle = arr.reshape(len(arr), 1)
new_arr = np.broadcast_to(middle, shape)

# Note: doing `ravel` gives us the wrong order
return new_arr.reshape(-1, order="F")


def _tile_compat(arr, num: int):
"""
Index compat for np.tile.

Notes
-----
Does not support multi-dimensional `num`.
"""
if isinstance(arr, np.ndarray):
return _broadcast_tile(arr, num)

# Otherwise we have an Index
values = arr._data

if isinstance(values, np.ndarray):
result = _broadcast_tile(values, num)
return type(arr)._simple_new(result, name=arr.name)

elif isinstance(values, ABCCategorical):
codes = _broadcast_tile(values.codes, num)
result = type(values).from_codes(codes, dtype=values.dtype)
return type(arr)._simple_new(result, name=arr.name)

elif isinstance(arr, IntervalIndex):
new_left = _tile_compat(values.left, num)
new_right = _tile_compat(values.right, num)
result = type(values).from_arrays(new_left, new_right, closed=values.closed)
return type(arr)._simple_new(result, name=arr.name)

elif isinstance(values._data, np.ndarray):
# DatetimeIndex, TimedeltaIndex, PeriodIndex
data = _broadcast_tile(values._data, num)
result = type(values)._simple_new(data, dtype=values.dtype)
return type(arr)._simple_new(result, name=arr.name)

else:
# As of now this just leaves RangeIndex, which cannot
# use type(self)._simple_new
result = _broadcast_tile(values, num)
return Index(result, name=arr.name)
8 changes: 8 additions & 0 deletions pandas/tests/reshape/test_pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -1026,6 +1026,14 @@ def test_pivot_table_multiindex_only(self, cols):

tm.assert_frame_equal(result, expected)

def test_pivot_table_retains_tz(self):
dti = date_range("2016-01-01", periods=3, tz="Europe/Amsterdam")
df = DataFrame({"A": np.random.randn(3), "B": np.random.randn(3), "C": dti})
result = df.pivot_table(index=["B", "C"], dropna=False)

# check tz retention
assert result.index.levels[1].equals(dti)

def test_pivot_integer_columns(self):
# caused by upstream bug in unstack

Expand Down
16 changes: 16 additions & 0 deletions pandas/tests/reshape/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,22 @@ def test_datetimeindex(self):
tm.assert_index_equal(result1, expected1)
tm.assert_index_equal(result2, expected2)

def test_tzaware_retained(self):
x = date_range("2000-01-01", periods=2, tz="US/Pacific")
y = np.array([3, 4])
result1, result2 = cartesian_product([x, y])

expected = x.repeat(2)
tm.assert_index_equal(result1, expected)

def test_tzaware_retained_categorical(self):
x = date_range("2000-01-01", periods=2, tz="US/Pacific").astype("category")
y = np.array([3, 4])
result1, result2 = cartesian_product([x, y])

expected = x.repeat(2)
tm.assert_index_equal(result1, expected)

def test_empty(self):
# product of empty factors
X = [[], [0, 1], []]
Expand Down