Skip to content

ENH date_range accepts timedelta as freq #6318

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/v0.14.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ Enhancements



- ``pd.date_range`` accepts datetime and numpy timedeltas (:issue:`6307`).



Expand Down
18 changes: 17 additions & 1 deletion pandas/tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,23 @@ def test_constructor_from_series(self):
# GH 6274
# infer freq of same
result = pd.infer_freq(df['date'])
self.assertEqual(result,'MS')
self.assertEqual(result, 'MS')

def test_timedelta_np(self):
from pandas import _np_version_under1p7
if _np_version_under1p7:
raise nose.SkipTest("to_offset with freq timedelta "
"not supported numpy < 1.7")

nptd = np.timedelta64(1, 's')
dti_n = DatetimeIndex(start='2014-02-01', freq=nptd, periods=2)
self.assertEqual(dti_n.freq, offsets.Second(1))

def test_timedelta_dt(self):
dttd = timedelta(1)
us = offsets.Day(1).nanos / 1000
dti_d = DatetimeIndex(start='2014-02-01', freq=dttd, periods=2)
self.assertEqual(dti_d.freq, offsets.Micro(us))

def test_constructor_ndarray_like(self):
# GH 5460#issuecomment-44474502
Expand Down
56 changes: 50 additions & 6 deletions pandas/tseries/frequencies.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from datetime import datetime
from datetime import datetime, timedelta
from pandas.compat import range, long, zip
from pandas import compat
from pandas import compat, _np_version_under1p7
import re

import numpy as np
Expand Down Expand Up @@ -241,7 +241,7 @@ def get_period_alias(offset_str):

def to_offset(freqstr):
"""
Return DateOffset object from string representation
Return DateOffset object from string representation, or timedelta

Examples
--------
Expand All @@ -251,16 +251,27 @@ def to_offset(freqstr):
if freqstr is None:
return None

if isinstance(freqstr, DateOffset):
return freqstr
elif isinstance(freqstr, DateOffset):
return _simplify_offset(freqstr)

if isinstance(freqstr, tuple):
elif isinstance(freqstr, tuple):
name = freqstr[0]
stride = freqstr[1]
if isinstance(stride, compat.string_types):
name, stride = stride, name
name, _ = _base_and_stride(name)
delta = get_offset(name) * stride

elif isinstance(freqstr, timedelta):
from pandas.tseries.offsets import _delta_to_tick
return _delta_to_tick(freqstr)

elif isinstance(freqstr, np.timedelta64):
# Note: numpy timedelta can deal with < ns
# however, pandas offsets do not
from pandas.tseries.offsets import _np_delta_to_tick
return _np_delta_to_tick(freqstr)

else:
delta = None
stride_sign = None
Expand Down Expand Up @@ -387,6 +398,39 @@ def get_legacy_offset_name(offset):
name = offset.name
return _legacy_reverse_map.get(name, name)


def _simplify_offset(offset):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is completely internal, yes? any reason to NOT always simplify? (e.g. in to_offset)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to_offset uses offsets._delta_to_tick which also does this simplifying in a similar operation (delta to offset)...

...Ah ha, but to_offset(an_offset) atm is just an_offset, are you suggesting it should return _simplify_offset(an_offset)? This seems reasonable/in line with rest of function.

'''
Simplify representation if possible.

Example
-------
>>> _simplify_offset(Second(60))
<Minute>

'''
from pandas.tseries.offsets import (Nano, Micro, Milli, Second,
Minute, Hour, Day)
if isinstance(offset, (Nano, Micro, Milli, Second, Minute, Hour,)):
ns = offset.nanos

def _offset(ns, unit, unit_ns):
units, rem = divmod(ns, unit_ns)
if rem == 0:
return unit(units)

units_in_ns = [(Day, 86400000000000), (Hour, 3600000000000),
(Minute, 60000000000), (Second, 1000000000),
(Milli, 1000000), (Micro, 1000)]

for unit, unit_ns in units_in_ns:
new_offset = _offset(ns, unit, unit_ns)
if new_offset:
return new_offset

return offset


def get_standard_freq(freq):
"""
Return the standardized frequency string
Expand Down
4 changes: 2 additions & 2 deletions pandas/tseries/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ class DatetimeIndex(DatetimeIndexOpsMixin, Int64Index):
Optional datetime-like data to construct index with
copy : bool
Make a copy of input ndarray
freq : string or pandas offset object, optional
freq : string, pandas offset object, timedelta, optional
One of pandas date offset strings or corresponding objects
start : starting value, datetime-like, optional
If data is None, start is used as the start point in generating regular
Expand Down Expand Up @@ -1897,7 +1897,7 @@ def date_range(start=None, end=None, periods=None, freq='D', tz=None,
Right bound for generating dates
periods : integer or None, default None
If None, must specify start and end
freq : string or DateOffset, default 'D' (calendar daily)
freq : string, DateOffset or timedelta, default 'D' (calendar daily)
Frequency strings can have multiples, e.g. '5H'
tz : string or None
Time zone name for returning localized DatetimeIndex, for example
Expand Down
30 changes: 26 additions & 4 deletions pandas/tseries/offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -1924,8 +1924,20 @@ def onOffset(self, dt):


def _tick_comp(op):
def delta_with_ns_remainder(self):
if isinstance(self.delta, timedelta):
return (self.delta, 0)
else:
# it's a numpy.datetime64[ns]
ms, ns_rem = divmod(self.n, 10**3)
return timedelta(microseconds=ms), ns_rem

def f(self, other):
return op(self.delta, other.delta)
if type(self) == type(other):
return op(self.delta, other.delta)
else:
return op(delta_with_ns_remainder(self),
delta_with_ns_remainder(other))

return f

Expand Down Expand Up @@ -1958,7 +1970,7 @@ def __eq__(self, other):
other = to_offset(other)

if isinstance(other, Tick):
return self.delta == other.delta
return _tick_comp(operator.eq)(self, other)
else:
return DateOffset.__eq__(self, other)

Expand Down Expand Up @@ -2026,15 +2038,25 @@ def _delta_to_tick(delta):
def _delta_to_nanoseconds(delta):
if isinstance(delta, np.timedelta64):
return delta.astype('timedelta64[ns]').item()
elif isinstance(delta, Tick):
if isinstance(delta, Tick):
delta = delta.delta

if isinstance(delta, int):
return delta * 1000
return (delta.days * 24 * 60 * 60 * 1000000
+ delta.seconds * 1000000
+ delta.microseconds) * 1000


class Day(Tick):
def _np_delta_to_tick(npdelta):
one_ns = np.timedelta64(1, 'ns') if not _np_version_under1p7 else 1
ns = npdelta / one_ns
if ns % 1000 == 0:
return _delta_to_tick(timedelta(microseconds=ns / 1000))
return Nano(ns)


class Day(CacheableOffset, Tick):
_inc = timedelta(1)
_prefix = 'D'

Expand Down
22 changes: 21 additions & 1 deletion pandas/tseries/tests/test_daterange.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from datetime import datetime
from datetime import datetime, timedelta
from pandas.compat import range
import pickle
import nose
Expand Down Expand Up @@ -370,6 +370,26 @@ def test_range_bug(self):
exp_values = [start + i * offset for i in range(5)]
self.assert_numpy_array_equal(result, DatetimeIndex(exp_values))

def test_freq_timedelta_np(self):
from pandas import _np_version_under1p7
if _np_version_under1p7:
raise nose.SkipTest("date_range with freq timedelta "
"not supported numpy < 1.7")
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this may not need to be skipped


from pandas.tseries.offsets import Nano, Micro, Second, Day

nptd = np.timedelta64(1, 's')
dti_n = date_range(start='2014-02-01', freq=nptd, periods=2)
self.assertEqual(dti_n.freq, Second(1))

def test_freq_timedelta_dt(self):
from pandas.tseries.offsets import Nano, Micro, Second, Day

dttd = timedelta(1)
us = Day(1).nanos / 1000
dti_d = date_range(start='2014-02-01', freq=dttd, periods=2)
self.assertEqual(dti_d.freq, Micro(us))

def test_range_tz_pytz(self):
# GH 2906
_skip_if_no_pytz()
Expand Down
Loading