Skip to content

Commit 31ca168

Browse files
committed
ENH: anchor resampling frequencies like 5minute that evenly subdivide one day in resampling to always get regular intervals. a bit more testing needed, but close #1165
1 parent 73661c1 commit 31ca168

File tree

8 files changed

+88
-13
lines changed

8 files changed

+88
-13
lines changed

pandas/core/frame.py

-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232
from pandas.core.internals import BlockManager, make_block, form_blocks
3333
from pandas.core.series import Series, _radd_compat
3434
from pandas.compat.scipy import scoreatpercentile as _quantile
35-
from pandas.tseries.index import DatetimeIndex
3635
from pandas.tseries.period import PeriodIndex
3736
from pandas.util import py3compat
3837
from pandas.util.terminal import get_terminal_size

pandas/core/internals.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1013,7 +1013,8 @@ def form_blocks(data, axes):
10131013
blocks.append(int_block)
10141014

10151015
if len(datetime_dict):
1016-
datetime_block = _simple_blockify(datetime_dict, items, np.dtype('M8[us]'))
1016+
datetime_block = _simple_blockify(datetime_dict, items,
1017+
np.dtype('M8[us]'))
10171018
blocks.append(datetime_block)
10181019

10191020
if len(bool_dict):

pandas/tseries/index.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1138,7 +1138,7 @@ def _generate_regular_range(start, end, periods, offset):
11381138
raise ValueError('Must specify two of start, end, or periods')
11391139

11401140
if isinstance(offset, Tick):
1141-
stride = offset.us_stride()
1141+
stride = offset.micros
11421142
if periods is None:
11431143
b = Timestamp(start).value
11441144
e = Timestamp(end).value

pandas/tseries/offsets.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -962,7 +962,8 @@ def delta(self):
962962

963963
return self._delta
964964

965-
def us_stride(self):
965+
@property
966+
def micros(self):
966967
return _delta_to_microseconds(self.delta)
967968

968969
def apply(self, other):

pandas/tseries/resample.py

+49
Original file line numberDiff line numberDiff line change
@@ -233,12 +233,22 @@ def _make_time_bins(axis, freq, begin=None, end=None,
233233
return binner, bins, labels
234234

235235
def _get_range_edges(axis, begin, end, offset, closed='left'):
236+
from pandas.tseries.offsets import Tick, _delta_to_microseconds
236237
if isinstance(offset, basestring):
237238
offset = to_offset(offset)
238239

239240
if not isinstance(offset, DateOffset):
240241
raise ValueError("Rule not a recognized offset")
241242

243+
if isinstance(offset, Tick):
244+
day_micros = _delta_to_microseconds(timedelta(1))
245+
# #1165
246+
if ((day_micros % offset.micros) == 0 and begin is None
247+
and end is None):
248+
return _adjust_dates_anchored(axis[0], axis[-1], offset,
249+
closed=closed)
250+
251+
242252
if begin is None:
243253
if closed == 'left':
244254
first = Timestamp(offset.rollback(axis[0]))
@@ -255,6 +265,45 @@ def _get_range_edges(axis, begin, end, offset, closed='left'):
255265

256266
return first, last
257267

268+
269+
def _adjust_dates_anchored(first, last, offset, closed='right'):
270+
from pandas.tseries.tools import normalize_date
271+
272+
start_day_micros = Timestamp(normalize_date(first)).value
273+
last_day_micros = Timestamp(normalize_date(last)).value
274+
275+
foffset = (first.value - start_day_micros) % offset.micros
276+
loffset = (last.value - last_day_micros) % offset.micros
277+
278+
if closed == 'right':
279+
if foffset > 0:
280+
# roll back
281+
fresult = first.value - foffset
282+
else:
283+
fresult = first.value - offset.micros
284+
285+
if loffset > 0:
286+
# roll forward
287+
lresult = last.value + (offset.micros - loffset)
288+
else:
289+
# already the end of the road
290+
lresult = last.value
291+
else: # closed == 'left'
292+
if foffset > 0:
293+
fresult = first.value - foffset
294+
else:
295+
# start of the road
296+
fresult = first.value
297+
298+
if loffset > 0:
299+
# roll forward
300+
lresult = last.value + (offset.micros - loffset)
301+
else:
302+
lresult = last.value + offset.micros
303+
304+
return Timestamp(fresult), Timestamp(lresult)
305+
306+
258307
def asfreq(obj, freq, method=None, how=None):
259308
"""
260309
Utility frequency conversion method for Series/DataFrame

pandas/tseries/tests/test_resample.py

+25
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,31 @@ def test_resample_panel_numpy(self):
347347
expected = panel.resample('M', how='mean', axis=1)
348348
tm.assert_panel_equal(result, expected)
349349

350+
def test_resample_anchored_ticks(self):
351+
# If a fixed delta (5 minute, 4 hour) evenly divides a day, we should
352+
# "anchor" the origin at midnight so we get regular intervals rather
353+
# than starting from the first timestamp which might start in the middle
354+
# of a desired interval
355+
356+
rng = date_range('1/1/2000 04:00:00', periods=86400, freq='s')
357+
ts = Series(np.random.randn(len(rng)), index=rng)
358+
ts[:2] = np.nan # so results are the same
359+
360+
freqs = ['t', '5t', '15t', '30t', '4h', '12h']
361+
for freq in freqs:
362+
result = ts[2:].resample(freq, closed='left', label='left')
363+
expected = ts.resample(freq, closed='left', label='left')
364+
assert_series_equal(result, expected)
365+
366+
def test_resample_daily_anchored(self):
367+
rng = date_range('1/1/2000 0:00:00', periods=10000, freq='T')
368+
ts = Series(np.random.randn(len(rng)), index=rng)
369+
ts[:2] = np.nan # so results are the same
370+
371+
result = ts[2:].resample('D', closed='left', label='left')
372+
expected = ts.resample('D', closed='left', label='left')
373+
assert_series_equal(result, expected)
374+
350375

351376
def _simple_ts(start, end, freq='D'):
352377
rng = date_range(start, end, freq=freq)

pandas/tseries/tests/test_util.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,17 @@
66
from pandas import Series, date_range
77
import pandas.util.testing as tm
88

9-
from pandas.tseries.util import convert_to_annual, isleapyear
9+
from pandas.tseries.util import pivot_annual, isleapyear
1010

11-
class TestConvertAnnual(unittest.TestCase):
11+
class TestPivotAnnual(unittest.TestCase):
1212
"""
13-
New pandas of scikits.timeseries convert_to_annual
13+
New pandas of scikits.timeseries pivot_annual
1414
"""
1515
def test_daily(self):
1616
rng = date_range('1/1/2000', '12/31/2004', freq='D')
1717
ts = Series(np.random.randn(len(rng)), index=rng)
1818

19-
annual = convert_to_annual(ts, 'D')
19+
annual = pivot_annual(ts, 'D')
2020

2121
doy = ts.index.dayofyear
2222
doy[(-isleapyear(ts.index.year)) & (doy >= 60)] += 1
@@ -40,7 +40,7 @@ def test_monthly(self):
4040
rng = date_range('1/1/2000', '12/31/2004', freq='M')
4141
ts = Series(np.random.randn(len(rng)), index=rng)
4242

43-
annual = convert_to_annual(ts, 'M')
43+
annual = pivot_annual(ts, 'M')
4444

4545
month = ts.index.month
4646

@@ -49,13 +49,13 @@ def test_monthly(self):
4949
subset.index = [x.year for x in subset.index]
5050
tm.assert_series_equal(annual[i].dropna(), subset)
5151

52-
def test_interval_monthly(self):
52+
def test_period_monthly(self):
5353
pass
5454

55-
def test_interval_daily(self):
55+
def test_period_daily(self):
5656
pass
5757

58-
def test_interval_weekly(self):
58+
def test_period_weekly(self):
5959
pass
6060

6161
if __name__ == '__main__':

pandas/tseries/util.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from pandas.core.frame import DataFrame
44
import pandas.core.nanops as nanops
55

6-
def convert_to_annual(series, freq=None):
6+
def pivot_annual(series, freq=None):
77
"""
88
Group a series by years, taking leap years into account.
99

0 commit comments

Comments
 (0)