Skip to content

Commit a82879d

Browse files
winklerandjreback
authored andcommitted
BUG: enable resampling with NaT in PeriodIndex (GH 13224)
1 parent 23566c2 commit a82879d

File tree

2 files changed

+67
-4
lines changed

2 files changed

+67
-4
lines changed

pandas/core/resample.py

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1270,18 +1270,34 @@ def _get_period_bins(self, ax):
12701270
raise TypeError('axis must be a PeriodIndex, but got '
12711271
'an instance of %r' % type(ax).__name__)
12721272

1273-
if not len(ax):
1273+
memb = ax.asfreq(self.freq, how=self.convention)
1274+
# NaT handling as in pandas._lib.lib.generate_bins_dt64()
1275+
nat_count = 0
1276+
if memb.hasnans:
1277+
import warnings
1278+
with warnings.catch_warnings():
1279+
warnings.filterwarnings('ignore', 'numpy equal will not check '
1280+
'object identity')
1281+
nat_mask = memb.base == tslib.NaT
1282+
# raises "FutureWarning: numpy equal will not check object
1283+
# identity in the future. The comparison did not return the
1284+
# same result as suggested by the identity (`is`)) and will
1285+
# change."
1286+
nat_count = np.sum(nat_mask)
1287+
memb = memb[~nat_mask]
1288+
1289+
# if index contains no valid (non-NaT) values, return empty index
1290+
if not len(memb):
12741291
binner = labels = PeriodIndex(
12751292
data=[], freq=self.freq, name=ax.name)
12761293
return binner, [], labels
12771294

1278-
start = ax[0].asfreq(self.freq, how=self.convention)
1279-
end = ax[-1].asfreq(self.freq, how='end')
1295+
start = ax.min().asfreq(self.freq, how=self.convention)
1296+
end = ax.max().asfreq(self.freq, how='end')
12801297

12811298
labels = binner = PeriodIndex(start=start, end=end,
12821299
freq=self.freq, name=ax.name)
12831300

1284-
memb = ax.asfreq(self.freq, how=self.convention)
12851301
i8 = memb.asi8
12861302
freq_mult = self.freq.n
12871303
# when upsampling to subperiods, we need to generate enough bins
@@ -1291,6 +1307,14 @@ def _get_period_bins(self, ax):
12911307
rng += freq_mult
12921308
bins = memb.searchsorted(rng, side='left')
12931309

1310+
if nat_count > 0:
1311+
# NaT handling as in pandas._lib.lib.generate_bins_dt64()
1312+
# shift bins by the number of NaT
1313+
bins += nat_count
1314+
bins = np.insert(bins, 0, nat_count)
1315+
binner = binner.insert(0, tslib.NaT)
1316+
labels = labels.insert(0, tslib.NaT)
1317+
12941318
return binner, bins, labels
12951319

12961320

pandas/tests/test_resample.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2913,6 +2913,45 @@ def test_upsampling_ohlc_freq_multiples(self):
29132913
result = s.resample('12H', kind='period').ohlc()
29142914
assert_frame_equal(result, expected)
29152915

2916+
def test_resample_with_nat(self):
2917+
# GH 13224
2918+
index = PeriodIndex([pd.NaT, '1970-01-01 00:00:00', pd.NaT,
2919+
'1970-01-01 00:00:01', '1970-01-01 00:00:02'],
2920+
freq='S')
2921+
frame = DataFrame([2, 3, 5, 7, 11], index=index)
2922+
2923+
index_1s = PeriodIndex(['1970-01-01 00:00:00', '1970-01-01 00:00:01',
2924+
'1970-01-01 00:00:02'], freq='S')
2925+
frame_1s = DataFrame([3, 7, 11], index=index_1s)
2926+
result_1s = frame.resample('1s').mean()
2927+
assert_frame_equal(result_1s, frame_1s)
2928+
2929+
index_2s = PeriodIndex(['1970-01-01 00:00:00',
2930+
'1970-01-01 00:00:02'], freq='2S')
2931+
frame_2s = DataFrame([5, 11], index=index_2s)
2932+
result_2s = frame.resample('2s').mean()
2933+
assert_frame_equal(result_2s, frame_2s)
2934+
2935+
index_3s = PeriodIndex(['1970-01-01 00:00:00'], freq='3S')
2936+
frame_3s = DataFrame([7], index=index_3s)
2937+
result_3s = frame.resample('3s').mean()
2938+
assert_frame_equal(result_3s, frame_3s)
2939+
2940+
pi = PeriodIndex(['1970-01-01 00:00:00', pd.NaT,
2941+
'1970-01-01 00:00:02'], freq='S')
2942+
frame = DataFrame([2, 3, 5], index=pi)
2943+
expected_index = period_range(pi[0], periods=len(pi), freq=pi.freq)
2944+
expected = DataFrame([2, np.NaN, 5], index=expected_index)
2945+
result = frame.resample('1s').mean()
2946+
assert_frame_equal(result, expected)
2947+
2948+
pi = PeriodIndex([pd.NaT] * 3, freq='S')
2949+
frame = DataFrame([2, 3, 5], index=pi)
2950+
expected_index = PeriodIndex(data=[], freq=pi.freq)
2951+
expected = DataFrame([], index=expected_index)
2952+
result = frame.resample('1s').mean()
2953+
assert_frame_equal(result, expected)
2954+
29162955

29172956
class TestTimedeltaIndex(Base):
29182957
_index_factory = lambda x: timedelta_range

0 commit comments

Comments
 (0)