Skip to content

Commit 390e16e

Browse files
winklerandjreback
authored andcommitted
BUG: resampling PeriodIndex now returns PeriodIndex (GH 12884, 15944)
Exceptions: - force conversion to DatetimeIndex by kind='timestamp' param - if loffset is given, convert to timestamps in any case
1 parent c27f430 commit 390e16e

File tree

2 files changed

+108
-42
lines changed

2 files changed

+108
-42
lines changed

pandas/core/resample.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -849,16 +849,15 @@ def _convert_obj(self, obj):
849849
" use .set_index(...) to explicitly set index")
850850
raise NotImplementedError(msg)
851851

852-
offset = to_offset(self.freq)
853-
if offset.n > 1:
854-
if self.kind == 'period': # pragma: no cover
855-
print('Warning: multiple of frequency -> timestamps')
856-
857-
# Cannot have multiple of periods, convert to timestamp
852+
if self.loffset is not None:
853+
if self.kind == 'period':
854+
print('Warning: loffset -> convert PeriodIndex to timestamps')
855+
# Cannot apply loffset/timedelta to PeriodIndex -> convert to
856+
# timestamps
858857
self.kind = 'timestamp'
859858

860859
# convert to timestamp
861-
if not (self.kind is None or self.kind == 'period'):
860+
if self.kind == 'timestamp':
862861
obj = obj.to_timestamp(how=self.convention)
863862

864863
return obj
@@ -1278,8 +1277,10 @@ def _get_period_bins(self, ax):
12781277

12791278
memb = ax.asfreq(self.freq, how=self.convention)
12801279
i8 = memb.asi8
1281-
rng = np.arange(i8[0], i8[-1] + 1)
1282-
bins = memb.searchsorted(rng, side='right')
1280+
freq_mult = self.freq.n
1281+
rng = np.arange(i8[0], i8[-1] + 1, freq_mult)
1282+
rng += freq_mult
1283+
bins = memb.searchsorted(rng, side='left')
12831284

12841285
return binner, bins, labels
12851286

pandas/tests/test_resample.py

Lines changed: 98 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -2221,49 +2221,75 @@ def create_series(self):
22212221
return Series(np.arange(len(i)), index=i, name='pi')
22222222

22232223
def test_asfreq_downsample(self):
2224+
# GH 12884, 15944
22242225

2225-
# series
22262226
s = self.create_series()
2227-
expected = s.reindex(s.index.take(np.arange(0, len(s.index), 2)))
2228-
expected.index = expected.index.to_timestamp()
2229-
expected.index.freq = to_offset('2D')
2227+
start = s.index[0].to_timestamp(how='start')
2228+
end = (s.index[-1] + 1).to_timestamp(how='start')
22302229

2231-
# this is a bug, this *should* return a PeriodIndex
2232-
# directly
2233-
# GH 12884
2230+
new_index = date_range(start=start, end=end, freq='2D', closed='left')
2231+
# series
2232+
expected = s.to_timestamp().reindex(new_index).to_period('2D')
22342233
result = s.resample('2D').asfreq()
22352234
assert_series_equal(result, expected)
2235+
result_kind_period = s.resample('2D', kind='period').asfreq()
2236+
assert_series_equal(result_kind_period, expected)
22362237

22372238
# frame
22382239
frame = s.to_frame('value')
2239-
expected = frame.reindex(
2240-
frame.index.take(np.arange(0, len(frame.index), 2)))
2241-
expected.index = expected.index.to_timestamp()
2242-
expected.index.freq = to_offset('2D')
2240+
expected = frame.to_timestamp().reindex(new_index).to_period('2D')
22432241
result = frame.resample('2D').asfreq()
22442242
assert_frame_equal(result, expected)
2243+
result_kind_period = frame.resample('2D', kind='period').asfreq()
2244+
assert_frame_equal(result_kind_period, expected)
2245+
2246+
def test_asfreq_downsample_kind_timestamp(self):
2247+
# series
2248+
s = self.create_series()
2249+
expected = s.to_timestamp().resample('2D').asfreq()
2250+
result = s.resample('2D', kind='timestamp').asfreq()
2251+
assert_series_equal(result, expected)
2252+
2253+
# frame
2254+
frame = s.to_frame('value')
2255+
expected = frame.to_timestamp().resample('2D').asfreq()
2256+
result = frame.resample('2D', kind='timestamp').asfreq()
2257+
assert_frame_equal(result, expected)
22452258

22462259
def test_asfreq_upsample(self):
2260+
# GH 12884, 15944
22472261

2248-
# this is a bug, this *should* return a PeriodIndex
2249-
# directly
2250-
# GH 12884
22512262
s = self.create_series()
2252-
new_index = date_range(s.index[0].to_timestamp(how='start'),
2253-
(s.index[-1] + 1).to_timestamp(how='start'),
2254-
freq='1H',
2255-
closed='left')
2256-
expected = s.to_timestamp().reindex(new_index).to_period()
2257-
result = s.resample('1H').asfreq()
2263+
start = s.index[0].to_timestamp(how='start')
2264+
end = (s.index[-1] + 1).to_timestamp(how='start')
2265+
for freq in ['1H', '2H']:
2266+
# check base frequency and frequency multiple
2267+
new_index = date_range(start=start, end=end, freq=freq,
2268+
closed='left')
2269+
# series
2270+
expected = s.to_timestamp().reindex(new_index).to_period(freq)
2271+
result = s.resample(freq).asfreq()
2272+
assert_series_equal(result, expected)
2273+
result_kind_period = s.resample(freq, kind='period').asfreq()
2274+
assert_series_equal(result_kind_period, expected)
2275+
2276+
# frame
2277+
frame = s.to_frame('value')
2278+
expected = frame.to_timestamp().reindex(new_index).to_period(freq)
2279+
result = frame.resample(freq).asfreq()
2280+
assert_frame_equal(result, expected)
2281+
result_kind_period = frame.resample(freq, kind='period').asfreq()
2282+
assert_frame_equal(result_kind_period, expected)
2283+
2284+
def test_asfreq_upsample_kind_timestamp(self):
2285+
s = self.create_series()
2286+
expected = s.to_timestamp().resample('1H').asfreq()
2287+
result = s.resample('1H', kind='timestamp').asfreq()
22582288
assert_series_equal(result, expected)
22592289

22602290
frame = s.to_frame('value')
2261-
new_index = date_range(frame.index[0].to_timestamp(how='start'),
2262-
(frame.index[-1] + 1).to_timestamp(how='start'),
2263-
freq='1H',
2264-
closed='left')
2265-
expected = frame.to_timestamp().reindex(new_index).to_period()
2266-
result = frame.resample('1H').asfreq()
2291+
expected = frame.to_timestamp().resample('1H').asfreq()
2292+
result = frame.resample('1H', kind='timestamp').asfreq()
22672293
assert_frame_equal(result, expected)
22682294

22692295
def test_asfreq_fill_value(self):
@@ -2375,12 +2401,11 @@ def test_basic_upsample(self):
23752401
ts = _simple_pts('1/1/1990', '6/30/1995', freq='M')
23762402
result = ts.resample('a-dec').mean()
23772403

2378-
resampled = result.resample('D', convention='end').ffill()
2379-
2380-
expected = result.to_timestamp('D', how='end')
2381-
expected = expected.asfreq('D', 'ffill').to_period()
2382-
2383-
assert_series_equal(resampled, expected)
2404+
for freq in ['D', '2D']:
2405+
resampled = result.resample(freq, convention='end').ffill()
2406+
expected = result.to_timestamp(freq, how='end')
2407+
expected = expected.asfreq(freq, 'ffill').to_period(freq)
2408+
assert_series_equal(resampled, expected)
23842409

23852410
def test_upsample_with_limit(self):
23862411
rng = period_range('1/1/2000', periods=5, freq='A')
@@ -2451,10 +2476,13 @@ def test_resample_count(self):
24512476
series = pd.Series(1, index=pd.period_range(start='2000',
24522477
periods=100))
24532478
result = series.resample('M').count()
2454-
24552479
expected_index = pd.period_range(start='2000', freq='M', periods=4)
24562480
expected = pd.Series([31, 29, 31, 9], index=expected_index)
2481+
assert_series_equal(result, expected)
24572482

2483+
result = series.resample('2M').count()
2484+
expected_index = pd.period_range(start='2000', freq='2M', periods=2)
2485+
expected = pd.Series([31 + 29, 31 + 9], index=expected_index)
24582486
assert_series_equal(result, expected)
24592487

24602488
def test_resample_same_freq(self):
@@ -2596,7 +2624,17 @@ def test_resample_5minute(self):
25962624
rng = period_range('1/1/2000', '1/5/2000', freq='T')
25972625
ts = Series(np.random.randn(len(rng)), index=rng)
25982626

2627+
expected = ts.to_timestamp().resample('5min').mean().to_period('5min')
25992628
result = ts.resample('5min').mean()
2629+
assert_series_equal(result, expected)
2630+
result_kind_period = ts.resample('5min', kind='period').mean()
2631+
assert_series_equal(result_kind_period, expected)
2632+
2633+
def test_resample_5minute_kind_timestamp(self):
2634+
rng = period_range('1/1/2000', '1/5/2000', freq='T')
2635+
ts = Series(np.random.randn(len(rng)), index=rng)
2636+
2637+
result = ts.resample('5min', kind='timestamp').mean()
26002638
expected = ts.to_timestamp().resample('5min').mean()
26012639
assert_series_equal(result, expected)
26022640

@@ -2824,6 +2862,33 @@ def test_apply_to_empty_series(self):
28242862
for freq in ['M', 'D', 'H']:
28252863
with pytest.raises(TypeError):
28262864
series.resample(freq).apply(lambda x: 1)
2865+
def test_loffset_returns_datetimeindex(self):
2866+
# make sure passing loffset returns DatetimeIndex in all cases
2867+
# basic method taken from Base.test_resample_loffset_arg_type()
2868+
df = self.create_series().to_frame('value')
2869+
expected_means = [df.values[i:i + 2].mean()
2870+
for i in range(0, len(df.values), 2)]
2871+
expected_index = self.create_index(df.index[0], periods=len(df.index) /
2872+
2, freq='2D')
2873+
2874+
# loffset coreces PeriodIndex to DateTimeIndex
2875+
expected_index = expected_index.to_timestamp()
2876+
expected_index += timedelta(hours=2)
2877+
expected = DataFrame({'value': expected_means}, index=expected_index)
2878+
2879+
for arg in ['mean', {'value': 'mean'}, ['mean']]:
2880+
for kind_param in [None, 'period', 'timestamp']:
2881+
result_agg = (df.resample('2D', loffset='2H', kind=kind_param)
2882+
.agg(arg))
2883+
with tm.assert_produces_warning(FutureWarning,
2884+
check_stacklevel=False):
2885+
result_how = df.resample('2D', how=arg, loffset='2H',
2886+
kind=kind_param)
2887+
if isinstance(arg, list):
2888+
expected.columns = (pd.MultiIndex
2889+
.from_tuples([('value', 'mean')]))
2890+
assert_frame_equal(result_agg, expected)
2891+
assert_frame_equal(result_how, expected)
28272892

28282893

28292894
class TestTimedeltaIndex(Base):

0 commit comments

Comments
 (0)