Skip to content

Commit 0dfe3a6

Browse files
committed
ENH: rounding for DatetimeIndex,TimedeltaIndex,Timedelta,Timestamp, pandas-dev#4314
1 parent e42a196 commit 0dfe3a6

File tree

12 files changed

+220
-16
lines changed

12 files changed

+220
-16
lines changed

doc/source/api.rst

+3-1
Original file line numberDiff line numberDiff line change
@@ -526,6 +526,7 @@ These can be accessed like ``Series.dt.<property>``.
526526
Series.dt.tz_convert
527527
Series.dt.normalize
528528
Series.dt.strftime
529+
Series.dt.round
529530

530531
**Timedelta Properties**
531532

@@ -1507,7 +1508,7 @@ Time-specific operations
15071508
DatetimeIndex.snap
15081509
DatetimeIndex.tz_convert
15091510
DatetimeIndex.tz_localize
1510-
1511+
DatetimeIndex.round
15111512

15121513
Conversion
15131514
~~~~~~~~~~
@@ -1548,6 +1549,7 @@ Conversion
15481549

15491550
TimedeltaIndex.to_pytimedelta
15501551
TimedeltaIndex.to_series
1552+
TimedeltaIndex.round
15511553

15521554
GroupBy
15531555
-------

doc/source/whatsnew/v0.18.0.txt

+45-2
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,53 @@ New features
3131
Other enhancements
3232
^^^^^^^^^^^^^^^^^^
3333

34+
.. _whatsnew_0180.enhancements.rounding:
3435

36+
Datetimelike rounding
37+
^^^^^^^^^^^^^^^^^^^^^
3538

39+
``DatetimeIndex``, ``Timestamp``, ``TimedeltaIndex``, ``Timedelta`` have gained the ``.round()`` method for datetimelike rounding. (:issue:`4314`)
3640

41+
Naive datetimes
3742

43+
.. ipython:: python
3844

45+
dr = pd.date_range('20130101 09:12:56.1234', periods=3)
46+
dr
47+
dr.round('s')
3948

49+
# Timestamp scalar
50+
dr[0]
51+
dr[0].round('10s')
4052

53+
Tz-aware are rounded in local times
54+
55+
.. ipython:: python
56+
57+
dr = dr.tz_localize('US/Eastern')
58+
dr
59+
dr.round('s')
60+
61+
Timedeltas
62+
63+
.. ipython:: python
64+
65+
t = timedelta_range('1 days 2 hr 13 min 45 us',periods=3,freq='d')
66+
t
67+
t.round('10min')
68+
69+
# Timedelta scalar
70+
t[0]
71+
t[0].round('2h')
72+
73+
74+
In addition, ``.round()`` will be available thru the ``.dt`` accessor of ``Series``.
75+
76+
.. ipython:: python
77+
78+
s = Series(dr)
79+
s
80+
s.dt.round('D')
4181

4282
.. _whatsnew_0180.api:
4383

@@ -57,7 +97,10 @@ Backwards incompatible API changes
5797
Other API Changes
5898
^^^^^^^^^^^^^^^^^
5999

60-
- ``Timedelta.resolution`` will now return proper offset frequency strings
100+
101+
102+
103+
61104

62105

63106

@@ -107,5 +150,5 @@ Bug Fixes
107150
~~~~~~~~~
108151

109152

110-
- Bug in ``Timedelta.round`` with negative values (:issue:``)
153+
- Bug in ``Timedelta.round`` with negative values (:issue:`11690`)
111154
- Bug in ``.loc`` against ``CategoricalIndex`` may result in normal ``Index`` (:issue:`11586`)

pandas/tests/test_categorical.py

+1
Original file line numberDiff line numberDiff line change
@@ -3753,6 +3753,7 @@ def test_dt_accessor_api_for_categorical(self):
37533753
special_func_defs = [
37543754
('strftime', ("%Y-%m-%d",), {}),
37553755
('tz_convert', ("EST",), {}),
3756+
('round', ("D",), {}),
37563757
#('tz_localize', ("UTC",), {}),
37573758
]
37583759
_special_func_names = [f[0] for f in special_func_defs]

pandas/tests/test_series.py

+13-2
Original file line numberDiff line numberDiff line change
@@ -88,9 +88,9 @@ def test_dt_namespace_accessor(self):
8888
ok_for_period_methods = ['strftime']
8989
ok_for_dt = ok_for_base + ['date','time','microsecond','nanosecond', 'is_month_start', 'is_month_end', 'is_quarter_start',
9090
'is_quarter_end', 'is_year_start', 'is_year_end', 'tz']
91-
ok_for_dt_methods = ['to_period','to_pydatetime','tz_localize','tz_convert', 'normalize', 'strftime']
91+
ok_for_dt_methods = ['to_period','to_pydatetime','tz_localize','tz_convert', 'normalize', 'strftime', 'round']
9292
ok_for_td = ['days','seconds','microseconds','nanoseconds']
93-
ok_for_td_methods = ['components','to_pytimedelta','total_seconds']
93+
ok_for_td_methods = ['components','to_pytimedelta','total_seconds','round']
9494

9595
def get_expected(s, name):
9696
result = getattr(Index(s._values),prop)
@@ -139,6 +139,17 @@ def compare(s, name):
139139
expected = Series(DatetimeIndex(s.values).tz_localize('UTC').tz_convert('US/Eastern'),index=s.index)
140140
tm.assert_series_equal(result, expected)
141141

142+
# round
143+
s = Series(date_range('20130101 09:10:11',periods=5))
144+
result = s.dt.round('D')
145+
expected = Series(date_range('20130101',periods=5))
146+
tm.assert_series_equal(result, expected)
147+
148+
# round with tz
149+
result = s.dt.tz_localize('UTC').dt.tz_convert('US/Eastern').dt.round('D')
150+
expected = Series(date_range('20130101',periods=5)).dt.tz_localize('US/Eastern')
151+
tm.assert_series_equal(result, expected)
152+
142153
# datetimeindex with tz
143154
s = Series(date_range('20130101',periods=5,tz='US/Eastern'))
144155
for prop in ok_for_dt:

pandas/tseries/base.py

+40
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,46 @@ def strftime(self, date_format):
4141
"""
4242
return np.asarray(self.format(date_format=date_format))
4343

44+
class TimelikeOps(object):
45+
""" common ops for TimedeltaIndex/DatetimeIndex, but not PeriodIndex """
46+
47+
def round(self, freq):
48+
"""
49+
Round the index to the specified freq; this is a floor type of operation
50+
51+
Paramaters
52+
----------
53+
freq : freq string/object
54+
55+
Returns
56+
-------
57+
index of same type
58+
59+
Raises
60+
------
61+
ValueError if the freq cannot be converted
62+
"""
63+
64+
from pandas.tseries.frequencies import to_offset
65+
unit = to_offset(freq).nanos
66+
67+
# round the local times
68+
if getattr(self,'tz',None) is not None:
69+
values = self.tz_localize(None).asi8
70+
else:
71+
values = self.asi8
72+
result = (unit*np.floor(values/unit)).astype('i8')
73+
attribs = self._get_attributes_dict()
74+
if 'freq' in attribs:
75+
attribs['freq'] = None
76+
if 'tz' in attribs:
77+
attribs['tz'] = None
78+
result = self._shallow_copy(result, **attribs)
79+
80+
# reconvert to local tz
81+
if getattr(self,'tz',None) is not None:
82+
result = result.tz_localize(self.tz)
83+
return result
4484

4585
class DatetimeIndexOpsMixin(object):
4686
""" common ops mixin to support a unified inteface datetimelike Index """

pandas/tseries/common.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,8 @@ def to_pydatetime(self):
145145
accessors=DatetimeIndex._datetimelike_ops,
146146
typ='property')
147147
DatetimeProperties._add_delegate_accessors(delegate=DatetimeIndex,
148-
accessors=["to_period","tz_localize","tz_convert","normalize","strftime"],
148+
accessors=["to_period","tz_localize","tz_convert",
149+
"normalize","strftime","round"],
149150
typ='method')
150151

151152
class TimedeltaProperties(Properties):
@@ -181,7 +182,7 @@ def components(self):
181182
accessors=TimedeltaIndex._datetimelike_ops,
182183
typ='property')
183184
TimedeltaProperties._add_delegate_accessors(delegate=TimedeltaIndex,
184-
accessors=["to_pytimedelta", "total_seconds"],
185+
accessors=["to_pytimedelta", "total_seconds", "round"],
185186
typ='method')
186187

187188
class PeriodProperties(Properties):

pandas/tseries/index.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from pandas.tseries.frequencies import (
2020
to_offset, get_period_alias,
2121
Resolution)
22-
from pandas.tseries.base import DatelikeOps, DatetimeIndexOpsMixin
22+
from pandas.tseries.base import DatelikeOps, TimelikeOps, DatetimeIndexOpsMixin
2323
from pandas.tseries.offsets import DateOffset, generate_range, Tick, CDay
2424
from pandas.tseries.tools import parse_time_string, normalize_date
2525
from pandas.tseries.timedeltas import to_timedelta
@@ -126,7 +126,7 @@ def _new_DatetimeIndex(cls, d):
126126
result = result.tz_localize('UTC').tz_convert(tz)
127127
return result
128128

129-
class DatetimeIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index):
129+
class DatetimeIndex(DatelikeOps, TimelikeOps, DatetimeIndexOpsMixin, Int64Index):
130130
"""
131131
Immutable ndarray of datetime64 data, represented internally as int64, and
132132
which can be boxed to Timestamp objects that are subclasses of datetime and

pandas/tseries/offsets.py

+4
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,10 @@ def freqstr(self):
496496

497497
return fstr
498498

499+
@property
500+
def nanos(self):
501+
raise ValueError("{0} is a non-fixed frequency".format(self))
502+
499503
class SingleConstructorOffset(DateOffset):
500504

501505
@classmethod

pandas/tseries/tdi.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from pandas.tseries.frequencies import to_offset
1313
import pandas.core.common as com
1414
from pandas.tseries import timedeltas
15-
from pandas.tseries.base import DatetimeIndexOpsMixin
15+
from pandas.tseries.base import TimelikeOps, DatetimeIndexOpsMixin
1616
from pandas.tseries.timedeltas import to_timedelta, _coerce_scalar_to_timedelta_type
1717
import pandas.tseries.offsets as offsets
1818
from pandas.tseries.offsets import Tick, DateOffset
@@ -63,7 +63,7 @@ def wrapper(self, other):
6363
return wrapper
6464

6565

66-
class TimedeltaIndex(DatetimeIndexOpsMixin, Int64Index):
66+
class TimedeltaIndex(DatetimeIndexOpsMixin, TimelikeOps, Int64Index):
6767
"""
6868
Immutable ndarray of timedelta64 data, represented internally as int64, and
6969
which can be boxed to timedelta objects

pandas/tseries/tests/test_timedeltas.py

+46-4
Original file line numberDiff line numberDiff line change
@@ -74,10 +74,11 @@ def test_construction(self):
7474
self.assertEqual(Timedelta('-1:00:00'), -timedelta(hours=1))
7575
self.assertEqual(Timedelta('-01:00:00'), -timedelta(hours=1))
7676

77-
# more strings
77+
# more strings & abbrevs
7878
# GH 8190
7979
self.assertEqual(Timedelta('1 h'), timedelta(hours=1))
8080
self.assertEqual(Timedelta('1 hour'), timedelta(hours=1))
81+
self.assertEqual(Timedelta('1 hr'), timedelta(hours=1))
8182
self.assertEqual(Timedelta('1 hours'), timedelta(hours=1))
8283
self.assertEqual(Timedelta('-1 hours'), -timedelta(hours=1))
8384
self.assertEqual(Timedelta('1 m'), timedelta(minutes=1))
@@ -169,18 +170,59 @@ def test_round(self):
169170
t1 = Timedelta('1 days 02:34:56.789123456')
170171
t2 = Timedelta('-1 days 02:34:56.789123456')
171172

172-
for (reso, s1, s2) in [('N', t1, t2),
173+
for (freq, s1, s2) in [('N', t1, t2),
173174
('U', Timedelta('1 days 02:34:56.789123000'),Timedelta('-1 days 02:34:56.789123000')),
174175
('L', Timedelta('1 days 02:34:56.789000000'),Timedelta('-1 days 02:34:56.789000000')),
175176
('S', Timedelta('1 days 02:34:56'),Timedelta('-1 days 02:34:56')),
177+
('2S', Timedelta('1 days 02:34:56'),Timedelta('-1 days 02:34:56')),
178+
('5S', Timedelta('1 days 02:34:55'),Timedelta('-1 days 02:34:55')),
176179
('T', Timedelta('1 days 02:34:00'),Timedelta('-1 days 02:34:00')),
180+
('12T', Timedelta('1 days 02:24:00'),Timedelta('-1 days 02:24:00')),
177181
('H', Timedelta('1 days 02:00:00'),Timedelta('-1 days 02:00:00')),
178182
('d', Timedelta('1 days'),Timedelta('-1 days'))]:
179-
r1 = t1.round(reso)
183+
r1 = t1.round(freq)
180184
self.assertEqual(r1, s1)
181-
r2 = t2.round(reso)
185+
r2 = t2.round(freq)
182186
self.assertEqual(r2, s2)
183187

188+
# invalid
189+
for freq in ['Y','M','foobar']:
190+
self.assertRaises(ValueError, lambda : t1.round(freq))
191+
192+
t1 = timedelta_range('1 days',periods=3,freq='1 min 2 s 3 us')
193+
t2 = -1*t1
194+
t1a = timedelta_range('1 days',periods=3,freq='1 min 2 s')
195+
t1b = timedelta_range('1 days',periods=3,freq='1 min')
196+
t1c = pd.TimedeltaIndex([1,1,1],unit='D')
197+
198+
# note that negative times round DOWN! so don't give whole numbers
199+
for (freq, s1, s2) in [('N', t1, t2),
200+
('U', t1, t2),
201+
('L', t1a, TimedeltaIndex(['-1 days +00:00:00', '-2 days +23:58:57.999000',
202+
'-2 days +23:57:55.999000'],
203+
dtype='timedelta64[ns]', freq=None)),
204+
('S', t1a, TimedeltaIndex(['-1 days +00:00:00', '-2 days +23:58:57', '-2 days +23:57:55'],
205+
dtype='timedelta64[ns]', freq=None)),
206+
('2S', t1a, TimedeltaIndex(['-1 days +00:00:00', '-2 days +23:58:56', '-2 days +23:57:54'],
207+
dtype='timedelta64[ns]', freq=None)),
208+
('5S', t1b, TimedeltaIndex(['-1 days +00:00:00', '-2 days +23:58:55', '-2 days +23:57:55'],
209+
dtype='timedelta64[ns]', freq=None)),
210+
('T', t1b, TimedeltaIndex(['-1 days +00:00:00', '-2 days +23:58:00', '-2 days +23:57:00'],
211+
dtype='timedelta64[ns]', freq=None)),
212+
('12T', t1c, TimedeltaIndex(['-1 days +00:00:00', '-2 days +23:48:00', '-2 days +23:48:00'],
213+
dtype='timedelta64[ns]', freq=None)),
214+
('H', t1c, TimedeltaIndex(['-1 days +00:00:00', '-2 days +23:00:00', '-2 days +23:00:00'],
215+
dtype='timedelta64[ns]', freq=None)),
216+
('d', t1c, pd.TimedeltaIndex([-1,-2,-2],unit='D'))]:
217+
r1 = t1.round(freq)
218+
tm.assert_index_equal(r1, s1)
219+
r2 = t2.round(freq)
220+
tm.assert_index_equal(r2, s2)
221+
222+
# invalid
223+
for freq in ['Y','M','foobar']:
224+
self.assertRaises(ValueError, lambda : t1.round(freq))
225+
184226
def test_repr(self):
185227

186228
self.assertEqual(repr(Timedelta(10,unit='d')),"Timedelta('10 days 00:00:00')")

pandas/tseries/tests/test_timeseries.py

+35
Original file line numberDiff line numberDiff line change
@@ -2702,6 +2702,41 @@ def test_sort_values(self):
27022702
self.assertTrue(ordered[::-1].is_monotonic)
27032703
self.assert_numpy_array_equal(dexer, [0, 2, 1])
27042704

2705+
def test_round(self):
2706+
2707+
# round
2708+
dt = Timestamp('20130101 09:10:11')
2709+
result = dt.round('D')
2710+
expected = Timestamp('20130101')
2711+
self.assertEqual(result, expected)
2712+
2713+
dti = date_range('20130101 09:10:11',periods=5)
2714+
result = dti.round('D')
2715+
expected = date_range('20130101',periods=5)
2716+
tm.assert_index_equal(result, expected)
2717+
2718+
# round with tz
2719+
dt = Timestamp('20130101 09:10:11',tz='US/Eastern')
2720+
result = dt.round('D')
2721+
expected = Timestamp('20130101',tz='US/Eastern')
2722+
self.assertEqual(result, expected)
2723+
2724+
dt = Timestamp('20130101 09:10:11',tz='US/Eastern')
2725+
result = dt.round('s')
2726+
self.assertEqual(result, dt)
2727+
2728+
dti = date_range('20130101 09:10:11',periods=5).tz_localize('UTC').tz_convert('US/Eastern')
2729+
result = dti.round('D')
2730+
expected = date_range('20130101',periods=5).tz_localize('US/Eastern')
2731+
tm.assert_index_equal(result, expected)
2732+
2733+
result = dti.round('s')
2734+
tm.assert_index_equal(result, dti)
2735+
2736+
# invalid
2737+
for freq in ['Y','M','foobar']:
2738+
self.assertRaises(ValueError, lambda : dti.round(freq))
2739+
27052740
def test_insert(self):
27062741
idx = DatetimeIndex(['2000-01-04', '2000-01-01', '2000-01-02'], name='idx')
27072742

0 commit comments

Comments
 (0)