Skip to content

Commit 36a66d5

Browse files
committed
BUG: enable resampling with NaT in PeriodIndex (GH 13224)
1 parent 4211df2 commit 36a66d5

File tree

2 files changed

+67
-4
lines changed

2 files changed

+67
-4
lines changed

pandas/core/resample.py

+28-4
Original file line numberDiff line numberDiff line change
@@ -1246,18 +1246,34 @@ def _get_period_bins(self, ax):
12461246
raise TypeError('axis must be a PeriodIndex, but got '
12471247
'an instance of %r' % type(ax).__name__)
12481248

1249-
if not len(ax):
1249+
memb = ax.asfreq(self.freq, how=self.convention)
1250+
# NaT handling as in pandas._lib.lib.generate_bins_dt64()
1251+
nat_count = 0
1252+
if memb.hasnans:
1253+
import warnings
1254+
with warnings.catch_warnings():
1255+
warnings.filterwarnings('ignore', 'numpy equal will not check '
1256+
'object identity')
1257+
nat_mask = memb.base == tslib.NaT
1258+
# raises "FutureWarning: numpy equal will not check object
1259+
# identity in the future. The comparison did not return the
1260+
# same result as suggested by the identity (`is`)) and will
1261+
# change."
1262+
nat_count = np.sum(nat_mask)
1263+
memb = memb[~nat_mask]
1264+
1265+
# if index contains no valid (non-NaT) values, return empty index
1266+
if not len(memb):
12501267
binner = labels = PeriodIndex(
12511268
data=[], freq=self.freq, name=ax.name)
12521269
return binner, [], labels
12531270

1254-
start = ax[0].asfreq(self.freq, how=self.convention)
1255-
end = ax[-1].asfreq(self.freq, how='end')
1271+
start = ax.min().asfreq(self.freq, how=self.convention)
1272+
end = ax.max().asfreq(self.freq, how='end')
12561273

12571274
labels = binner = PeriodIndex(start=start, end=end,
12581275
freq=self.freq, name=ax.name)
12591276

1260-
memb = ax.asfreq(self.freq, how=self.convention)
12611277
i8 = memb.asi8
12621278
freq_mult = self.freq.n
12631279
# when upsampling to subperiods, we need to generate enough bins
@@ -1267,6 +1283,14 @@ def _get_period_bins(self, ax):
12671283
rng += freq_mult
12681284
bins = memb.searchsorted(rng, side='left')
12691285

1286+
if nat_count > 0:
1287+
# NaT handling as in pandas._lib.lib.generate_bins_dt64()
1288+
# shift bins by the number of NaT
1289+
bins += nat_count
1290+
bins = np.insert(bins, 0, nat_count)
1291+
binner = binner.insert(0, tslib.NaT)
1292+
labels = labels.insert(0, tslib.NaT)
1293+
12701294
return binner, bins, labels
12711295

12721296

pandas/tests/test_resample.py

+39
Original file line numberDiff line numberDiff line change
@@ -2870,6 +2870,45 @@ def test_upsampling_ohlc_freq_multiples(self):
28702870
result = s.resample('12H', kind='period').ohlc()
28712871
assert_frame_equal(result, expected)
28722872

2873+
def test_resample_with_nat(self):
2874+
# GH 13224
2875+
index = PeriodIndex([pd.NaT, '1970-01-01 00:00:00', pd.NaT,
2876+
'1970-01-01 00:00:01', '1970-01-01 00:00:02'],
2877+
freq='S')
2878+
frame = DataFrame([2, 3, 5, 7, 11], index=index)
2879+
2880+
index_1s = PeriodIndex(['1970-01-01 00:00:00', '1970-01-01 00:00:01',
2881+
'1970-01-01 00:00:02'], freq='S')
2882+
frame_1s = DataFrame([3, 7, 11], index=index_1s)
2883+
result_1s = frame.resample('1s').mean()
2884+
assert_frame_equal(result_1s, frame_1s)
2885+
2886+
index_2s = PeriodIndex(['1970-01-01 00:00:00',
2887+
'1970-01-01 00:00:02'], freq='2S')
2888+
frame_2s = DataFrame([5, 11], index=index_2s)
2889+
result_2s = frame.resample('2s').mean()
2890+
assert_frame_equal(result_2s, frame_2s)
2891+
2892+
index_3s = PeriodIndex(['1970-01-01 00:00:00'], freq='3S')
2893+
frame_3s = DataFrame([7], index=index_3s)
2894+
result_3s = frame.resample('3s').mean()
2895+
assert_frame_equal(result_3s, frame_3s)
2896+
2897+
pi = PeriodIndex(['1970-01-01 00:00:00', pd.NaT,
2898+
'1970-01-01 00:00:02'], freq='S')
2899+
frame = DataFrame([2, 3, 5], index=pi)
2900+
expected_index = period_range(pi[0], periods=len(pi), freq=pi.freq)
2901+
expected = DataFrame([2, np.NaN, 5], index=expected_index)
2902+
result = frame.resample('1s').mean()
2903+
assert_frame_equal(result, expected)
2904+
2905+
pi = PeriodIndex([pd.NaT] * 3, freq='S')
2906+
frame = DataFrame([2, 3, 5], index=pi)
2907+
expected_index = PeriodIndex(data=[], freq=pi.freq)
2908+
expected = DataFrame([], index=expected_index)
2909+
result = frame.resample('1s').mean()
2910+
assert_frame_equal(result, expected)
2911+
28732912

28742913
class TestTimedeltaIndex(Base, tm.TestCase):
28752914
_index_factory = lambda x: timedelta_range

0 commit comments

Comments
 (0)