Skip to content

Commit d2842b5

Browse files
move reset_index
1 parent 0ea2c59 commit d2842b5

File tree

2 files changed

+128
-3
lines changed

2 files changed

+128
-3
lines changed

pandas/core/resample.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1597,6 +1597,9 @@ def _gotitem(self, key, ndim, subset=None):
15971597
if isinstance(key, list) and self.key not in key and self.key is not None:
15981598
key.append(self.key)
15991599
groupby = self._groupby[key]
1600+
# GH 59350: Index is ignored when using the on keyword argument to resample
1601+
if self.key is not None:
1602+
groupby.obj = groupby.obj.reset_index(drop=True)
16001603
except IndexError:
16011604
groupby = self._groupby
16021605

@@ -1927,9 +1930,6 @@ def get_resampler_for_grouping(
19271930
"""
19281931
# .resample uses 'on' similar to how .groupby uses 'key'
19291932
tg = TimeGrouper(freq=rule, key=on, **kwargs)
1930-
# GH 59350: Index should be ignored when using the on keyword argument to resample
1931-
if on is not None:
1932-
groupby.obj = groupby.obj.reset_index(drop=True)
19331933
resampler = tg._get_resampler(groupby.obj)
19341934
return resampler._get_resampler_for_grouping(
19351935
groupby=groupby, include_groups=include_groups, key=tg.key

pandas/tests/resample/test_resampler_grouper.py

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -691,6 +691,38 @@ def test_groupby_resample_on_index_with_list_of_keys_missing_column():
691691
rs[["val_not_in_dataframe"]]
692692

693693

694+
def test_groupby_resample_after_set_index_and_not_on_column():
695+
# GH 59350
696+
df = DataFrame(
697+
data={
698+
"datetime": [
699+
pd.to_datetime("2024-07-30T00:00Z"),
700+
pd.to_datetime("2024-07-30T00:01Z"),
701+
],
702+
"group": ["A", "A"],
703+
"numbers": [100, 200],
704+
},
705+
index=[1, 0],
706+
).set_index("datetime")
707+
gb = df.groupby("group")
708+
rs = gb.resample("1min")
709+
result = rs.aggregate({"numbers": "sum"})
710+
711+
index = pd.MultiIndex.from_arrays(
712+
[
713+
["A", "A"],
714+
[pd.to_datetime("2024-07-30T00:00Z"), pd.to_datetime("2024-07-30T00:01Z")],
715+
],
716+
names=[
717+
"group",
718+
"datetime",
719+
],
720+
)
721+
expected = DataFrame({"numbers": [100, 200]}, index=index)
722+
723+
tm.assert_frame_equal(result, expected)
724+
725+
694726
@pytest.mark.parametrize(
695727
"df",
696728
[
@@ -746,3 +778,96 @@ def test_groupby_resample_on_column_when_index_is_unusual(df):
746778
expected = DataFrame({"numbers": [100, 200]}, index=index)
747779

748780
tm.assert_frame_equal(result, expected)
781+
782+
783+
def test_groupby_resample_then_groupby_is_reused_when_index_is_out_of_order():
784+
df = DataFrame(
785+
data={
786+
"datetime": [
787+
pd.to_datetime("2024-07-30T00:00Z"),
788+
pd.to_datetime("2024-07-30T00:01Z"),
789+
],
790+
"group": ["A", "A"],
791+
"numbers": [100, 200],
792+
},
793+
index=[1, 0],
794+
)
795+
796+
gb = df.groupby("group")
797+
798+
# use gb
799+
result_1 = gb[["numbers"]].transform("sum")
800+
801+
index = Index([1, 0])
802+
expected = DataFrame({"numbers": [300, 300]}, index=index)
803+
804+
tm.assert_frame_equal(result_1, expected)
805+
806+
# resample gb, unrelated to above
807+
rs = gb.resample("1min", on="datetime")
808+
result_2 = rs.aggregate({"numbers": "sum"})
809+
810+
index = pd.MultiIndex.from_arrays(
811+
[
812+
["A", "A"],
813+
[pd.to_datetime("2024-07-30T00:00Z"), pd.to_datetime("2024-07-30T00:01Z")],
814+
],
815+
names=[
816+
"group",
817+
"datetime",
818+
],
819+
)
820+
expected = DataFrame({"numbers": [100, 200]}, index=index)
821+
822+
tm.assert_frame_equal(result_2, expected)
823+
824+
# reuse gb, unrelated to above
825+
result_3 = gb[["numbers"]].transform("sum")
826+
827+
tm.assert_frame_equal(result_1, result_3)
828+
829+
830+
def test_groupby_resample_then_groupby_is_reused_when_index_is_set_from_column():
831+
df = DataFrame(
832+
data={
833+
"datetime": [
834+
pd.to_datetime("2024-07-30T00:00Z"),
835+
pd.to_datetime("2024-07-30T00:01Z"),
836+
],
837+
"group": ["A", "A"],
838+
"numbers": [100, 200],
839+
},
840+
).set_index("group")
841+
842+
gb = df.groupby("group")
843+
844+
# use gb
845+
result_1 = gb[["numbers"]].transform("sum")
846+
847+
index = Index(["A", "A"], name="group")
848+
expected = DataFrame({"numbers": [300, 300]}, index=index)
849+
850+
tm.assert_frame_equal(result_1, expected)
851+
852+
# resample gb, unrelated to above
853+
rs = gb.resample("1min", on="datetime")
854+
result_2 = rs.aggregate({"numbers": "sum"})
855+
856+
index = pd.MultiIndex.from_arrays(
857+
[
858+
["A", "A"],
859+
[pd.to_datetime("2024-07-30T00:00Z"), pd.to_datetime("2024-07-30T00:01Z")],
860+
],
861+
names=[
862+
"group",
863+
"datetime",
864+
],
865+
)
866+
expected = DataFrame({"numbers": [100, 200]}, index=index)
867+
868+
tm.assert_frame_equal(result_2, expected)
869+
870+
# reuse gb, unrelated to above
871+
result_3 = gb[["numbers"]].transform("sum")
872+
873+
tm.assert_frame_equal(result_1, result_3)

0 commit comments

Comments
 (0)