Skip to content

Commit f8dd0cc

Browse files
committed
ENH: Introduces keyword in resample to handle open intervals. pandas-dev#10449
1 parent 0320e3b commit f8dd0cc

File tree

3 files changed

+88
-8
lines changed

3 files changed

+88
-8
lines changed

pandas/core/generic.py

+37-3
Original file line numberDiff line numberDiff line change
@@ -3557,7 +3557,7 @@ def between_time(self, start_time, end_time, include_start=True,
35573557

35583558
def resample(self, rule, how=None, axis=0, fill_method=None, closed=None,
35593559
label=None, convention='start', kind=None, loffset=None,
3560-
limit=None, base=0):
3560+
limit=None, base=0, was_closed=None):
35613561
"""
35623562
Convenience method for frequency conversion and resampling of regular
35633563
time-series data.
@@ -3578,6 +3578,11 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None,
35783578
For frequencies that evenly subdivide 1 day, the "origin" of the
35793579
aggregated intervals. For example, for '5min' frequency, base could
35803580
range from 0 through 4. Defaults to 0
3581+
was_closed: boolean
3582+
Interprets the given time-series as sampling points of a left-closed right-open interval (or vice
3583+
versa, if closed=='right'). Resamples up to (but excluding) the assumed end-point of the open side of the interval.
3584+
3585+
.. versionadded:: 0.18.0
35813586
35823587
35833588
Examples
@@ -3676,15 +3681,44 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None,
36763681
2000-01-01 00:03:00 17
36773682
2000-01-01 00:06:00 26
36783683
Freq: 3T, dtype: int64
3679-
3684+
3685+
Use was_closed to resample up to (excluding) the next sampling point
3686+
3687+
>>> series.resample("15s").pad()[24:]
3688+
2000-01-01 00:06:00 6
3689+
2000-01-01 00:06:15 6
3690+
2000-01-01 00:06:30 6
3691+
2000-01-01 00:06:45 6
3692+
2000-01-01 00:07:00 7
3693+
2000-01-01 00:07:15 7
3694+
2000-01-01 00:07:30 7
3695+
2000-01-01 00:07:45 7
3696+
2000-01-01 00:08:00 8
3697+
Freq: 15S, dtype: int64
3698+
3699+
>>> series.resample("15s", was_closed=True).pad()[24:]
3700+
2000-01-01 00:06:00 6
3701+
2000-01-01 00:06:15 6
3702+
2000-01-01 00:06:30 6
3703+
2000-01-01 00:06:45 6
3704+
2000-01-01 00:07:00 7
3705+
2000-01-01 00:07:15 7
3706+
2000-01-01 00:07:30 7
3707+
2000-01-01 00:07:45 7
3708+
2000-01-01 00:08:00 8
3709+
2000-01-01 00:08:15 8
3710+
2000-01-01 00:08:30 8
3711+
2000-01-01 00:08:45 8
3712+
Freq: 15S, dtype: int64
3713+
36803714
"""
36813715
from pandas.tseries.resample import resample
36823716

36833717
axis = self._get_axis_number(axis)
36843718
r = resample(self, freq=rule, label=label, closed=closed,
36853719
axis=axis, kind=kind, loffset=loffset,
36863720
fill_method=fill_method, convention=convention,
3687-
limit=limit, base=base)
3721+
limit=limit, base=base, was_closed=was_closed)
36883722

36893723
# deprecation warnings
36903724
# but call methods anyhow

pandas/tseries/resample.py

+33-5
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ class Resampler(_GroupBy):
4848

4949
# to the groupby descriptor
5050
_attributes = ['freq', 'axis', 'closed', 'label', 'convention',
51-
'loffset', 'base', 'kind']
51+
'loffset', 'base', 'kind', 'was_closed']
5252

5353
# API compat of allowed attributes
5454
_deprecated_valids = _attributes + ['_ipython_display_', '__doc__',
@@ -721,7 +721,8 @@ class TimeGrouper(Grouper):
721721
def __init__(self, freq='Min', closed=None, label=None, how='mean',
722722
nperiods=None, axis=0,
723723
fill_method=None, limit=None, loffset=None, kind=None,
724-
convention=None, base=0, **kwargs):
724+
convention=None, base=0,
725+
was_closed=None, **kwargs):
725726
freq = to_offset(freq)
726727

727728
end_types = set(['M', 'A', 'Q', 'BM', 'BA', 'BQ', 'W'])
@@ -752,6 +753,8 @@ def __init__(self, freq='Min', closed=None, label=None, how='mean',
752753
self.limit = limit
753754
self.base = base
754755

756+
self.was_closed = was_closed
757+
755758
# always sort time groupers
756759
kwargs['sort'] = True
757760

@@ -852,7 +855,8 @@ def _get_time_bins(self, ax):
852855
first, last = ax.min(), ax.max()
853856
first, last = _get_range_edges(first, last, self.freq,
854857
closed=self.closed,
855-
base=self.base)
858+
base=self.base,
859+
was_closed=self.was_closed, was_freq=ax.freq)
856860
tz = ax.tz
857861
binner = labels = DatetimeIndex(freq=self.freq,
858862
start=first.replace(tzinfo=None),
@@ -978,7 +982,32 @@ def _take_new_index(obj, indexer, new_index, axis=0):
978982
raise ValueError("'obj' should be either a Series or a DataFrame")
979983

980984

981-
def _get_range_edges(first, last, offset, closed='left', base=0):
985+
def _get_range_edges(first, last, offset, closed='left', base=0,
986+
was_closed=False, was_freq=None):
987+
988+
if was_closed is True:
989+
if not was_freq:
990+
raise ValueError("was_closed requires equidstant index.")
991+
992+
virt_first = first - (closed == 'right') * was_freq
993+
virt_last = last + (closed == 'left') * was_freq
994+
995+
diff_nano = (virt_last - virt_first).value
996+
997+
if closed == "left":
998+
tmp_first = virt_first
999+
tmp_last = virt_last
1000+
if diff_nano % offset.nanos > 0:
1001+
tmp_last = tmp_last + offset
1002+
else:
1003+
periods = (virt_last.value - virt_first.value) // offset.nanos
1004+
if diff_nano % offset.nanos > 0:
1005+
periods += 1
1006+
tmp_first = virt_last - periods * offset
1007+
tmp_last = virt_last
1008+
1009+
return tmp_first, tmp_last
1010+
9821011
if isinstance(offset, compat.string_types):
9831012
offset = to_offset(offset)
9841013

@@ -1000,7 +1029,6 @@ def _get_range_edges(first, last, offset, closed='left', base=0):
10001029
first = Timestamp(offset.rollback(first))
10011030
else:
10021031
first = Timestamp(first - offset)
1003-
10041032
last = Timestamp(last + offset)
10051033

10061034
return first, last

pandas/tseries/tests/test_resample.py

+18
Original file line numberDiff line numberDiff line change
@@ -1548,6 +1548,24 @@ def test_resmaple_dst_anchor(self):
15481548
freq='D', tz='Europe/Paris')),
15491549
'D Frequency')
15501550

1551+
def test_wasclosed(self):
1552+
df = pd.Series(1, index=pd.date_range(
1553+
"2015-01-01 00:00:00", "2015-01-01 02:00:00", freq="1h"))
1554+
tests = [17, 30, 55, 60, 77, 120, 127, 243]
1555+
expected_periods = [(60 % i > 0) + 180 // i for i in tests]
1556+
expected_freqs = ["%imin" % i for i in tests]
1557+
1558+
for i in range(len(tests)):
1559+
goal_left = pd.date_range(
1560+
start="2015-01-01 00:00:00", freq=expected_freqs[i], periods=expected_periods[i])
1561+
goal_right = pd.DatetimeIndex(
1562+
end="2015-01-01 02:00:00", freq=expected_freqs[i], periods=expected_periods[i])
1563+
resampled_left = df.resample(
1564+
expected_freqs[i], closed="left", was_closed=True).ffill().index
1565+
resampled_right = df.resample(
1566+
expected_freqs[i], closed="right", was_closed=True).ffill().index
1567+
tm.assert_index_equal(resampled_left, goal_left)
1568+
tm.assert_index_equal(resampled_right, goal_right)
15511569

15521570
def _simple_ts(start, end, freq='D'):
15531571
rng = date_range(start, end, freq=freq)

0 commit comments

Comments
 (0)