Skip to content

Commit cde51cd

Browse files
committed
BUG: enable resampling with NaT in PeriodIndex (GH 13224)
1 parent 076cb6a commit cde51cd

File tree

2 files changed

+67
-4
lines changed

2 files changed

+67
-4
lines changed

pandas/core/resample.py

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1220,18 +1220,34 @@ def _get_period_bins(self, ax):
12201220
raise TypeError('axis must be a PeriodIndex, but got '
12211221
'an instance of %r' % type(ax).__name__)
12221222

1223-
if not len(ax):
1223+
memb = ax.asfreq(self.freq, how=self.convention)
1224+
# NaT handling as in pandas._lib.lib.generate_bins_dt64()
1225+
nat_count = 0
1226+
if memb.hasnans:
1227+
import warnings
1228+
with warnings.catch_warnings():
1229+
warnings.filterwarnings('ignore', 'numpy equal will not check '
1230+
'object identity')
1231+
nat_mask = memb.base == tslib.NaT
1232+
# raises "FutureWarning: numpy equal will not check object
1233+
# identity in the future. The comparison did not return the
1234+
# same result as suggested by the identity (`is`)) and will
1235+
# change."
1236+
nat_count = np.sum(nat_mask)
1237+
memb = memb[~nat_mask]
1238+
1239+
# if index contains no valid (non-NaT) values, return empty index
1240+
if not len(memb):
12241241
binner = labels = PeriodIndex(
12251242
data=[], freq=self.freq, name=ax.name)
12261243
return binner, [], labels
12271244

1228-
start = ax[0].asfreq(self.freq, how=self.convention)
1229-
end = ax[-1].asfreq(self.freq, how='end')
1245+
start = ax.min().asfreq(self.freq, how=self.convention)
1246+
end = ax.max().asfreq(self.freq, how='end')
12301247

12311248
labels = binner = PeriodIndex(start=start, end=end,
12321249
freq=self.freq, name=ax.name)
12331250

1234-
memb = ax.asfreq(self.freq, how=self.convention)
12351251
i8 = memb.asi8
12361252
freq_mult = self.freq.n
12371253
# when upsampling to subperiods, we need to generate enough bins
@@ -1241,6 +1257,14 @@ def _get_period_bins(self, ax):
12411257
rng += freq_mult
12421258
bins = memb.searchsorted(rng, side='left')
12431259

1260+
if nat_count > 0:
1261+
# NaT handling as in pandas._lib.lib.generate_bins_dt64()
1262+
# shift bins by the number of NaT
1263+
bins += nat_count
1264+
bins = np.insert(bins, 0, nat_count)
1265+
binner = binner.insert(0, tslib.NaT)
1266+
labels = labels.insert(0, tslib.NaT)
1267+
12441268
return binner, bins, labels
12451269

12461270

pandas/tests/test_resample.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2866,6 +2866,45 @@ def test_upsampling_ohlc_freq_multiples(self):
28662866
result = s.resample('12H', kind='period').ohlc()
28672867
assert_frame_equal(result, expected)
28682868

2869+
def test_resample_with_nat(self):
2870+
# GH 13224
2871+
index = PeriodIndex([pd.NaT, '1970-01-01 00:00:00', pd.NaT,
2872+
'1970-01-01 00:00:01', '1970-01-01 00:00:02'],
2873+
freq='S')
2874+
frame = DataFrame([2, 3, 5, 7, 11], index=index)
2875+
2876+
index_1s = PeriodIndex(['1970-01-01 00:00:00', '1970-01-01 00:00:01',
2877+
'1970-01-01 00:00:02'], freq='S')
2878+
frame_1s = DataFrame([3, 7, 11], index=index_1s)
2879+
result_1s = frame.resample('1s').mean()
2880+
assert_frame_equal(result_1s, frame_1s)
2881+
2882+
index_2s = PeriodIndex(['1970-01-01 00:00:00',
2883+
'1970-01-01 00:00:02'], freq='2S')
2884+
frame_2s = DataFrame([5, 11], index=index_2s)
2885+
result_2s = frame.resample('2s').mean()
2886+
assert_frame_equal(result_2s, frame_2s)
2887+
2888+
index_3s = PeriodIndex(['1970-01-01 00:00:00'], freq='3S')
2889+
frame_3s = DataFrame([7], index=index_3s)
2890+
result_3s = frame.resample('3s').mean()
2891+
assert_frame_equal(result_3s, frame_3s)
2892+
2893+
pi = PeriodIndex(['1970-01-01 00:00:00', pd.NaT,
2894+
'1970-01-01 00:00:02'], freq='S')
2895+
frame = DataFrame([2, 3, 5], index=pi)
2896+
expected_index = period_range(pi[0], periods=len(pi), freq=pi.freq)
2897+
expected = DataFrame([2, np.NaN, 5], index=expected_index)
2898+
result = frame.resample('1s').mean()
2899+
assert_frame_equal(result, expected)
2900+
2901+
pi = PeriodIndex([pd.NaT] * 3, freq='S')
2902+
frame = DataFrame([2, 3, 5], index=pi)
2903+
expected_index = PeriodIndex(data=[], freq=pi.freq)
2904+
expected = DataFrame([], index=expected_index)
2905+
result = frame.resample('1s').mean()
2906+
assert_frame_equal(result, expected)
2907+
28692908

28702909
class TestTimedeltaIndex(Base):
28712910
_index_factory = lambda x: timedelta_range

0 commit comments

Comments
 (0)