Skip to content

ENH: rounding for datetimelike Indexes/Scalars #11690

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 29, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -526,6 +526,7 @@ These can be accessed like ``Series.dt.<property>``.
Series.dt.tz_convert
Series.dt.normalize
Series.dt.strftime
Series.dt.round

**Timedelta Properties**

Expand Down Expand Up @@ -1507,7 +1508,7 @@ Time-specific operations
DatetimeIndex.snap
DatetimeIndex.tz_convert
DatetimeIndex.tz_localize

DatetimeIndex.round

Conversion
~~~~~~~~~~
Expand Down Expand Up @@ -1548,6 +1549,7 @@ Conversion

TimedeltaIndex.to_pytimedelta
TimedeltaIndex.to_series
TimedeltaIndex.round

GroupBy
-------
Expand Down
45 changes: 44 additions & 1 deletion doc/source/whatsnew/v0.18.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,53 @@ New features
Other enhancements
^^^^^^^^^^^^^^^^^^

.. _whatsnew_0180.enhancements.rounding:

Datetimelike rounding
^^^^^^^^^^^^^^^^^^^^^

``DatetimeIndex``, ``Timestamp``, ``TimedeltaIndex``, ``Timedelta`` have gained the ``.round()`` method for datetimelike rounding. (:issue:`4314`)

Naive datetimes

.. ipython:: python

dr = pd.date_range('20130101 09:12:56.1234', periods=3)
dr
dr.round('s')

# Timestamp scalar
dr[0]
dr[0].round('10s')

Tz-aware are rounded in local times

.. ipython:: python

dr = dr.tz_localize('US/Eastern')
dr
dr.round('s')

Timedeltas

.. ipython:: python

t = timedelta_range('1 days 2 hr 13 min 45 us',periods=3,freq='d')
t
t.round('10min')

# Timedelta scalar
t[0]
t[0].round('2h')


In addition, ``.round()`` will be available thru the ``.dt`` accessor of ``Series``.

.. ipython:: python

s = Series(dr)
s
s.dt.round('D')

.. _whatsnew_0180.api:

Expand Down Expand Up @@ -65,6 +105,9 @@ Other API Changes






.. _whatsnew_0180.deprecations:

Deprecations
Expand Down Expand Up @@ -107,5 +150,5 @@ Bug Fixes
~~~~~~~~~



- Bug in ``Timedelta.round`` with negative values (:issue:`11690`)
- Bug in ``.loc`` against ``CategoricalIndex`` may result in normal ``Index`` (:issue:`11586`)
1 change: 1 addition & 0 deletions pandas/tests/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -3753,6 +3753,7 @@ def test_dt_accessor_api_for_categorical(self):
special_func_defs = [
('strftime', ("%Y-%m-%d",), {}),
('tz_convert', ("EST",), {}),
('round', ("D",), {}),
#('tz_localize', ("UTC",), {}),
]
_special_func_names = [f[0] for f in special_func_defs]
Expand Down
15 changes: 13 additions & 2 deletions pandas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,9 @@ def test_dt_namespace_accessor(self):
ok_for_period_methods = ['strftime']
ok_for_dt = ok_for_base + ['date','time','microsecond','nanosecond', 'is_month_start', 'is_month_end', 'is_quarter_start',
'is_quarter_end', 'is_year_start', 'is_year_end', 'tz']
ok_for_dt_methods = ['to_period','to_pydatetime','tz_localize','tz_convert', 'normalize', 'strftime']
ok_for_dt_methods = ['to_period','to_pydatetime','tz_localize','tz_convert', 'normalize', 'strftime', 'round']
ok_for_td = ['days','seconds','microseconds','nanoseconds']
ok_for_td_methods = ['components','to_pytimedelta','total_seconds']
ok_for_td_methods = ['components','to_pytimedelta','total_seconds','round']

def get_expected(s, name):
result = getattr(Index(s._values),prop)
Expand Down Expand Up @@ -139,6 +139,17 @@ def compare(s, name):
expected = Series(DatetimeIndex(s.values).tz_localize('UTC').tz_convert('US/Eastern'),index=s.index)
tm.assert_series_equal(result, expected)

# round
s = Series(date_range('20130101 09:10:11',periods=5))
result = s.dt.round('D')
expected = Series(date_range('20130101',periods=5))
tm.assert_series_equal(result, expected)

# round with tz
result = s.dt.tz_localize('UTC').dt.tz_convert('US/Eastern').dt.round('D')
expected = Series(date_range('20130101',periods=5)).dt.tz_localize('US/Eastern')
tm.assert_series_equal(result, expected)

# datetimeindex with tz
s = Series(date_range('20130101',periods=5,tz='US/Eastern'))
for prop in ok_for_dt:
Expand Down
40 changes: 40 additions & 0 deletions pandas/tseries/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,46 @@ def strftime(self, date_format):
"""
return np.asarray(self.format(date_format=date_format))

class TimelikeOps(object):
""" common ops for TimedeltaIndex/DatetimeIndex, but not PeriodIndex """

def round(self, freq):
"""
Round the index to the specified freq; this is a floor type of operation

Paramaters
----------
freq : freq string/object

Returns
-------
index of same type

Raises
------
ValueError if the freq cannot be converted
"""

from pandas.tseries.frequencies import to_offset
unit = to_offset(freq).nanos

# round the local times
if getattr(self,'tz',None) is not None:
values = self.tz_localize(None).asi8
else:
values = self.asi8
result = (unit*np.floor(values/unit)).astype('i8')
attribs = self._get_attributes_dict()
if 'freq' in attribs:
attribs['freq'] = None
if 'tz' in attribs:
attribs['tz'] = None
result = self._shallow_copy(result, **attribs)

# reconvert to local tz
if getattr(self,'tz',None) is not None:
result = result.tz_localize(self.tz)
return result

class DatetimeIndexOpsMixin(object):
""" common ops mixin to support a unified inteface datetimelike Index """
Expand Down
5 changes: 3 additions & 2 deletions pandas/tseries/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,8 @@ def to_pydatetime(self):
accessors=DatetimeIndex._datetimelike_ops,
typ='property')
DatetimeProperties._add_delegate_accessors(delegate=DatetimeIndex,
accessors=["to_period","tz_localize","tz_convert","normalize","strftime"],
accessors=["to_period","tz_localize","tz_convert",
"normalize","strftime","round"],
typ='method')

class TimedeltaProperties(Properties):
Expand Down Expand Up @@ -181,7 +182,7 @@ def components(self):
accessors=TimedeltaIndex._datetimelike_ops,
typ='property')
TimedeltaProperties._add_delegate_accessors(delegate=TimedeltaIndex,
accessors=["to_pytimedelta", "total_seconds"],
accessors=["to_pytimedelta", "total_seconds", "round"],
typ='method')

class PeriodProperties(Properties):
Expand Down
4 changes: 2 additions & 2 deletions pandas/tseries/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from pandas.tseries.frequencies import (
to_offset, get_period_alias,
Resolution)
from pandas.tseries.base import DatelikeOps, DatetimeIndexOpsMixin
from pandas.tseries.base import DatelikeOps, TimelikeOps, DatetimeIndexOpsMixin
from pandas.tseries.offsets import DateOffset, generate_range, Tick, CDay
from pandas.tseries.tools import parse_time_string, normalize_date
from pandas.tseries.timedeltas import to_timedelta
Expand Down Expand Up @@ -126,7 +126,7 @@ def _new_DatetimeIndex(cls, d):
result = result.tz_localize('UTC').tz_convert(tz)
return result

class DatetimeIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index):
class DatetimeIndex(DatelikeOps, TimelikeOps, DatetimeIndexOpsMixin, Int64Index):
"""
Immutable ndarray of datetime64 data, represented internally as int64, and
which can be boxed to Timestamp objects that are subclasses of datetime and
Expand Down
4 changes: 4 additions & 0 deletions pandas/tseries/offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,10 @@ def freqstr(self):

return fstr

@property
def nanos(self):
raise ValueError("{0} is a non-fixed frequency".format(self))

class SingleConstructorOffset(DateOffset):

@classmethod
Expand Down
19 changes: 4 additions & 15 deletions pandas/tseries/tdi.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from pandas.tseries.frequencies import to_offset
import pandas.core.common as com
from pandas.tseries import timedeltas
from pandas.tseries.base import DatetimeIndexOpsMixin
from pandas.tseries.base import TimelikeOps, DatetimeIndexOpsMixin
from pandas.tseries.timedeltas import to_timedelta, _coerce_scalar_to_timedelta_type
import pandas.tseries.offsets as offsets
from pandas.tseries.offsets import Tick, DateOffset
Expand All @@ -24,16 +24,6 @@

Timedelta = tslib.Timedelta

_resolution_map = {
'ns' : offsets.Nano,
'us' : offsets.Micro,
'ms' : offsets.Milli,
's' : offsets.Second,
'm' : offsets.Minute,
'h' : offsets.Hour,
'D' : offsets.Day,
}

def _td_index_cmp(opname, nat_result=False):
"""
Wrap comparison operations to convert timedelta-like to timedelta64
Expand Down Expand Up @@ -73,7 +63,7 @@ def wrapper(self, other):
return wrapper


class TimedeltaIndex(DatetimeIndexOpsMixin, Int64Index):
class TimedeltaIndex(DatetimeIndexOpsMixin, TimelikeOps, Int64Index):
"""
Immutable ndarray of timedelta64 data, represented internally as int64, and
which can be boxed to timedelta objects
Expand Down Expand Up @@ -706,7 +696,7 @@ def _maybe_cast_slice_bound(self, label, side, kind):
if side == 'left':
return lbound
else:
return (lbound + _resolution_map[parsed.resolution]() -
return (lbound + to_offset(parsed.resolution) -
Timedelta(1, 'ns'))
elif is_integer(label) or is_float(label):
self._invalid_indexer('slice',label)
Expand Down Expand Up @@ -734,9 +724,8 @@ def _partial_td_slice(self, key, freq, use_lhs=True, use_rhs=True):

# figure out the resolution of the passed td
# and round to it
reso = parsed.resolution
t1 = parsed.round(reso)
t2 = t1 + _resolution_map[reso]() - Timedelta(1,'ns')
t2 = t1 + to_offset(parsed.resolution) - Timedelta(1,'ns')

stamps = self.asi8

Expand Down
61 changes: 60 additions & 1 deletion pandas/tseries/tests/test_timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,11 @@ def test_construction(self):
self.assertEqual(Timedelta('-1:00:00'), -timedelta(hours=1))
self.assertEqual(Timedelta('-01:00:00'), -timedelta(hours=1))

# more strings
# more strings & abbrevs
# GH 8190
self.assertEqual(Timedelta('1 h'), timedelta(hours=1))
self.assertEqual(Timedelta('1 hour'), timedelta(hours=1))
self.assertEqual(Timedelta('1 hr'), timedelta(hours=1))
self.assertEqual(Timedelta('1 hours'), timedelta(hours=1))
self.assertEqual(Timedelta('-1 hours'), -timedelta(hours=1))
self.assertEqual(Timedelta('1 m'), timedelta(minutes=1))
Expand Down Expand Up @@ -164,6 +165,64 @@ def test_construction(self):
self.assertEqual(Timedelta(pd.offsets.Hour(2)),Timedelta('0 days, 02:00:00'))
self.assertEqual(Timedelta(pd.offsets.Second(2)),Timedelta('0 days, 00:00:02'))

def test_round(self):

t1 = Timedelta('1 days 02:34:56.789123456')
t2 = Timedelta('-1 days 02:34:56.789123456')

for (freq, s1, s2) in [('N', t1, t2),
('U', Timedelta('1 days 02:34:56.789123000'),Timedelta('-1 days 02:34:56.789123000')),
('L', Timedelta('1 days 02:34:56.789000000'),Timedelta('-1 days 02:34:56.789000000')),
('S', Timedelta('1 days 02:34:56'),Timedelta('-1 days 02:34:56')),
('2S', Timedelta('1 days 02:34:56'),Timedelta('-1 days 02:34:56')),
('5S', Timedelta('1 days 02:34:55'),Timedelta('-1 days 02:34:55')),
('T', Timedelta('1 days 02:34:00'),Timedelta('-1 days 02:34:00')),
('12T', Timedelta('1 days 02:24:00'),Timedelta('-1 days 02:24:00')),
('H', Timedelta('1 days 02:00:00'),Timedelta('-1 days 02:00:00')),
('d', Timedelta('1 days'),Timedelta('-1 days'))]:
r1 = t1.round(freq)
self.assertEqual(r1, s1)
r2 = t2.round(freq)
self.assertEqual(r2, s2)

# invalid
for freq in ['Y','M','foobar']:
self.assertRaises(ValueError, lambda : t1.round(freq))

t1 = timedelta_range('1 days',periods=3,freq='1 min 2 s 3 us')
t2 = -1*t1
t1a = timedelta_range('1 days',periods=3,freq='1 min 2 s')
t1b = timedelta_range('1 days',periods=3,freq='1 min')
t1c = pd.TimedeltaIndex([1,1,1],unit='D')

# note that negative times round DOWN! so don't give whole numbers
for (freq, s1, s2) in [('N', t1, t2),
('U', t1, t2),
('L', t1a, TimedeltaIndex(['-1 days +00:00:00', '-2 days +23:58:57.999000',
'-2 days +23:57:55.999000'],
dtype='timedelta64[ns]', freq=None)),
('S', t1a, TimedeltaIndex(['-1 days +00:00:00', '-2 days +23:58:57', '-2 days +23:57:55'],
dtype='timedelta64[ns]', freq=None)),
('2S', t1a, TimedeltaIndex(['-1 days +00:00:00', '-2 days +23:58:56', '-2 days +23:57:54'],
dtype='timedelta64[ns]', freq=None)),
('5S', t1b, TimedeltaIndex(['-1 days +00:00:00', '-2 days +23:58:55', '-2 days +23:57:55'],
dtype='timedelta64[ns]', freq=None)),
('T', t1b, TimedeltaIndex(['-1 days +00:00:00', '-2 days +23:58:00', '-2 days +23:57:00'],
dtype='timedelta64[ns]', freq=None)),
('12T', t1c, TimedeltaIndex(['-1 days +00:00:00', '-2 days +23:48:00', '-2 days +23:48:00'],
dtype='timedelta64[ns]', freq=None)),
('H', t1c, TimedeltaIndex(['-1 days +00:00:00', '-2 days +23:00:00', '-2 days +23:00:00'],
dtype='timedelta64[ns]', freq=None)),
('d', t1c, pd.TimedeltaIndex([-1,-2,-2],unit='D'))]:
r1 = t1.round(freq)
tm.assert_index_equal(r1, s1)
r2 = t2.round(freq)
tm.assert_index_equal(r2, s2)

# invalid
for freq in ['Y','M','foobar']:
self.assertRaises(ValueError, lambda : t1.round(freq))

def test_repr(self):

self.assertEqual(repr(Timedelta(10,unit='d')),"Timedelta('10 days 00:00:00')")
Expand Down
Loading