diff --git a/doc/source/api.rst b/doc/source/api.rst index ac79528f31e04..c7f815914358b 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -526,6 +526,7 @@ These can be accessed like ``Series.dt.``. Series.dt.tz_convert Series.dt.normalize Series.dt.strftime + Series.dt.round **Timedelta Properties** @@ -1507,7 +1508,7 @@ Time-specific operations DatetimeIndex.snap DatetimeIndex.tz_convert DatetimeIndex.tz_localize - + DatetimeIndex.round Conversion ~~~~~~~~~~ @@ -1548,6 +1549,7 @@ Conversion TimedeltaIndex.to_pytimedelta TimedeltaIndex.to_series + TimedeltaIndex.round GroupBy ------- diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index 926e191c96754..d233edd9c88f3 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -31,13 +31,53 @@ New features Other enhancements ^^^^^^^^^^^^^^^^^^ +.. _whatsnew_0180.enhancements.rounding: +Datetimelike rounding +^^^^^^^^^^^^^^^^^^^^^ +``DatetimeIndex``, ``Timestamp``, ``TimedeltaIndex``, ``Timedelta`` have gained the ``.round()`` method for datetimelike rounding. (:issue:`4314`) +Naive datetimes +.. ipython:: python + dr = pd.date_range('20130101 09:12:56.1234', periods=3) + dr + dr.round('s') + # Timestamp scalar + dr[0] + dr[0].round('10s') +Tz-aware are rounded in local times + +.. ipython:: python + + dr = dr.tz_localize('US/Eastern') + dr + dr.round('s') + +Timedeltas + +.. ipython:: python + + t = timedelta_range('1 days 2 hr 13 min 45 us',periods=3,freq='d') + t + t.round('10min') + + # Timedelta scalar + t[0] + t[0].round('2h') + + +In addition, ``.round()`` will be available thru the ``.dt`` accessor of ``Series``. + +.. ipython:: python + + s = Series(dr) + s + s.dt.round('D') .. _whatsnew_0180.api: @@ -65,6 +105,9 @@ Other API Changes + + + .. _whatsnew_0180.deprecations: Deprecations @@ -107,5 +150,5 @@ Bug Fixes ~~~~~~~~~ - +- Bug in ``Timedelta.round`` with negative values (:issue:`11690`) - Bug in ``.loc`` against ``CategoricalIndex`` may result in normal ``Index`` (:issue:`11586`) diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index e98c98fdec8b3..64908f96bfdd8 100755 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -3753,6 +3753,7 @@ def test_dt_accessor_api_for_categorical(self): special_func_defs = [ ('strftime', ("%Y-%m-%d",), {}), ('tz_convert', ("EST",), {}), + ('round', ("D",), {}), #('tz_localize', ("UTC",), {}), ] _special_func_names = [f[0] for f in special_func_defs] diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 4b0f9a9f633b4..0fb66ee2dfa7c 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -88,9 +88,9 @@ def test_dt_namespace_accessor(self): ok_for_period_methods = ['strftime'] ok_for_dt = ok_for_base + ['date','time','microsecond','nanosecond', 'is_month_start', 'is_month_end', 'is_quarter_start', 'is_quarter_end', 'is_year_start', 'is_year_end', 'tz'] - ok_for_dt_methods = ['to_period','to_pydatetime','tz_localize','tz_convert', 'normalize', 'strftime'] + ok_for_dt_methods = ['to_period','to_pydatetime','tz_localize','tz_convert', 'normalize', 'strftime', 'round'] ok_for_td = ['days','seconds','microseconds','nanoseconds'] - ok_for_td_methods = ['components','to_pytimedelta','total_seconds'] + ok_for_td_methods = ['components','to_pytimedelta','total_seconds','round'] def get_expected(s, name): result = getattr(Index(s._values),prop) @@ -139,6 +139,17 @@ def compare(s, name): expected = Series(DatetimeIndex(s.values).tz_localize('UTC').tz_convert('US/Eastern'),index=s.index) tm.assert_series_equal(result, expected) + # round + s = Series(date_range('20130101 09:10:11',periods=5)) + result = s.dt.round('D') + expected = Series(date_range('20130101',periods=5)) + tm.assert_series_equal(result, expected) + + # round with tz + result = s.dt.tz_localize('UTC').dt.tz_convert('US/Eastern').dt.round('D') + expected = Series(date_range('20130101',periods=5)).dt.tz_localize('US/Eastern') + tm.assert_series_equal(result, expected) + # datetimeindex with tz s = Series(date_range('20130101',periods=5,tz='US/Eastern')) for prop in ok_for_dt: diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index b063360b91280..4f0780ef2d660 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -41,6 +41,46 @@ def strftime(self, date_format): """ return np.asarray(self.format(date_format=date_format)) +class TimelikeOps(object): + """ common ops for TimedeltaIndex/DatetimeIndex, but not PeriodIndex """ + + def round(self, freq): + """ + Round the index to the specified freq; this is a floor type of operation + + Paramaters + ---------- + freq : freq string/object + + Returns + ------- + index of same type + + Raises + ------ + ValueError if the freq cannot be converted + """ + + from pandas.tseries.frequencies import to_offset + unit = to_offset(freq).nanos + + # round the local times + if getattr(self,'tz',None) is not None: + values = self.tz_localize(None).asi8 + else: + values = self.asi8 + result = (unit*np.floor(values/unit)).astype('i8') + attribs = self._get_attributes_dict() + if 'freq' in attribs: + attribs['freq'] = None + if 'tz' in attribs: + attribs['tz'] = None + result = self._shallow_copy(result, **attribs) + + # reconvert to local tz + if getattr(self,'tz',None) is not None: + result = result.tz_localize(self.tz) + return result class DatetimeIndexOpsMixin(object): """ common ops mixin to support a unified inteface datetimelike Index """ diff --git a/pandas/tseries/common.py b/pandas/tseries/common.py index 31b5281aa86a6..c033706a4d82f 100644 --- a/pandas/tseries/common.py +++ b/pandas/tseries/common.py @@ -145,7 +145,8 @@ def to_pydatetime(self): accessors=DatetimeIndex._datetimelike_ops, typ='property') DatetimeProperties._add_delegate_accessors(delegate=DatetimeIndex, - accessors=["to_period","tz_localize","tz_convert","normalize","strftime"], + accessors=["to_period","tz_localize","tz_convert", + "normalize","strftime","round"], typ='method') class TimedeltaProperties(Properties): @@ -181,7 +182,7 @@ def components(self): accessors=TimedeltaIndex._datetimelike_ops, typ='property') TimedeltaProperties._add_delegate_accessors(delegate=TimedeltaIndex, - accessors=["to_pytimedelta", "total_seconds"], + accessors=["to_pytimedelta", "total_seconds", "round"], typ='method') class PeriodProperties(Properties): diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 4fd61e28233a6..007e4381c47d4 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -19,7 +19,7 @@ from pandas.tseries.frequencies import ( to_offset, get_period_alias, Resolution) -from pandas.tseries.base import DatelikeOps, DatetimeIndexOpsMixin +from pandas.tseries.base import DatelikeOps, TimelikeOps, DatetimeIndexOpsMixin from pandas.tseries.offsets import DateOffset, generate_range, Tick, CDay from pandas.tseries.tools import parse_time_string, normalize_date from pandas.tseries.timedeltas import to_timedelta @@ -126,7 +126,7 @@ def _new_DatetimeIndex(cls, d): result = result.tz_localize('UTC').tz_convert(tz) return result -class DatetimeIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index): +class DatetimeIndex(DatelikeOps, TimelikeOps, DatetimeIndexOpsMixin, Int64Index): """ Immutable ndarray of datetime64 data, represented internally as int64, and which can be boxed to Timestamp objects that are subclasses of datetime and diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 0dac09a243d36..69c1c60c354dc 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -496,6 +496,10 @@ def freqstr(self): return fstr + @property + def nanos(self): + raise ValueError("{0} is a non-fixed frequency".format(self)) + class SingleConstructorOffset(DateOffset): @classmethod diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py index 89229fc48bcb2..d2f53b165f557 100644 --- a/pandas/tseries/tdi.py +++ b/pandas/tseries/tdi.py @@ -12,7 +12,7 @@ from pandas.tseries.frequencies import to_offset import pandas.core.common as com from pandas.tseries import timedeltas -from pandas.tseries.base import DatetimeIndexOpsMixin +from pandas.tseries.base import TimelikeOps, DatetimeIndexOpsMixin from pandas.tseries.timedeltas import to_timedelta, _coerce_scalar_to_timedelta_type import pandas.tseries.offsets as offsets from pandas.tseries.offsets import Tick, DateOffset @@ -24,16 +24,6 @@ Timedelta = tslib.Timedelta -_resolution_map = { - 'ns' : offsets.Nano, - 'us' : offsets.Micro, - 'ms' : offsets.Milli, - 's' : offsets.Second, - 'm' : offsets.Minute, - 'h' : offsets.Hour, - 'D' : offsets.Day, - } - def _td_index_cmp(opname, nat_result=False): """ Wrap comparison operations to convert timedelta-like to timedelta64 @@ -73,7 +63,7 @@ def wrapper(self, other): return wrapper -class TimedeltaIndex(DatetimeIndexOpsMixin, Int64Index): +class TimedeltaIndex(DatetimeIndexOpsMixin, TimelikeOps, Int64Index): """ Immutable ndarray of timedelta64 data, represented internally as int64, and which can be boxed to timedelta objects @@ -706,7 +696,7 @@ def _maybe_cast_slice_bound(self, label, side, kind): if side == 'left': return lbound else: - return (lbound + _resolution_map[parsed.resolution]() - + return (lbound + to_offset(parsed.resolution) - Timedelta(1, 'ns')) elif is_integer(label) or is_float(label): self._invalid_indexer('slice',label) @@ -734,9 +724,8 @@ def _partial_td_slice(self, key, freq, use_lhs=True, use_rhs=True): # figure out the resolution of the passed td # and round to it - reso = parsed.resolution t1 = parsed.round(reso) - t2 = t1 + _resolution_map[reso]() - Timedelta(1,'ns') + t2 = t1 + to_offset(parsed.resolution) - Timedelta(1,'ns') stamps = self.asi8 diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py index 45b98b0f85b1c..4bff2e3c5c2cd 100644 --- a/pandas/tseries/tests/test_timedeltas.py +++ b/pandas/tseries/tests/test_timedeltas.py @@ -74,10 +74,11 @@ def test_construction(self): self.assertEqual(Timedelta('-1:00:00'), -timedelta(hours=1)) self.assertEqual(Timedelta('-01:00:00'), -timedelta(hours=1)) - # more strings + # more strings & abbrevs # GH 8190 self.assertEqual(Timedelta('1 h'), timedelta(hours=1)) self.assertEqual(Timedelta('1 hour'), timedelta(hours=1)) + self.assertEqual(Timedelta('1 hr'), timedelta(hours=1)) self.assertEqual(Timedelta('1 hours'), timedelta(hours=1)) self.assertEqual(Timedelta('-1 hours'), -timedelta(hours=1)) self.assertEqual(Timedelta('1 m'), timedelta(minutes=1)) @@ -164,6 +165,64 @@ def test_construction(self): self.assertEqual(Timedelta(pd.offsets.Hour(2)),Timedelta('0 days, 02:00:00')) self.assertEqual(Timedelta(pd.offsets.Second(2)),Timedelta('0 days, 00:00:02')) + def test_round(self): + + t1 = Timedelta('1 days 02:34:56.789123456') + t2 = Timedelta('-1 days 02:34:56.789123456') + + for (freq, s1, s2) in [('N', t1, t2), + ('U', Timedelta('1 days 02:34:56.789123000'),Timedelta('-1 days 02:34:56.789123000')), + ('L', Timedelta('1 days 02:34:56.789000000'),Timedelta('-1 days 02:34:56.789000000')), + ('S', Timedelta('1 days 02:34:56'),Timedelta('-1 days 02:34:56')), + ('2S', Timedelta('1 days 02:34:56'),Timedelta('-1 days 02:34:56')), + ('5S', Timedelta('1 days 02:34:55'),Timedelta('-1 days 02:34:55')), + ('T', Timedelta('1 days 02:34:00'),Timedelta('-1 days 02:34:00')), + ('12T', Timedelta('1 days 02:24:00'),Timedelta('-1 days 02:24:00')), + ('H', Timedelta('1 days 02:00:00'),Timedelta('-1 days 02:00:00')), + ('d', Timedelta('1 days'),Timedelta('-1 days'))]: + r1 = t1.round(freq) + self.assertEqual(r1, s1) + r2 = t2.round(freq) + self.assertEqual(r2, s2) + + # invalid + for freq in ['Y','M','foobar']: + self.assertRaises(ValueError, lambda : t1.round(freq)) + + t1 = timedelta_range('1 days',periods=3,freq='1 min 2 s 3 us') + t2 = -1*t1 + t1a = timedelta_range('1 days',periods=3,freq='1 min 2 s') + t1b = timedelta_range('1 days',periods=3,freq='1 min') + t1c = pd.TimedeltaIndex([1,1,1],unit='D') + + # note that negative times round DOWN! so don't give whole numbers + for (freq, s1, s2) in [('N', t1, t2), + ('U', t1, t2), + ('L', t1a, TimedeltaIndex(['-1 days +00:00:00', '-2 days +23:58:57.999000', + '-2 days +23:57:55.999000'], + dtype='timedelta64[ns]', freq=None)), + ('S', t1a, TimedeltaIndex(['-1 days +00:00:00', '-2 days +23:58:57', '-2 days +23:57:55'], + dtype='timedelta64[ns]', freq=None)), + ('2S', t1a, TimedeltaIndex(['-1 days +00:00:00', '-2 days +23:58:56', '-2 days +23:57:54'], + dtype='timedelta64[ns]', freq=None)), + ('5S', t1b, TimedeltaIndex(['-1 days +00:00:00', '-2 days +23:58:55', '-2 days +23:57:55'], + dtype='timedelta64[ns]', freq=None)), + ('T', t1b, TimedeltaIndex(['-1 days +00:00:00', '-2 days +23:58:00', '-2 days +23:57:00'], + dtype='timedelta64[ns]', freq=None)), + ('12T', t1c, TimedeltaIndex(['-1 days +00:00:00', '-2 days +23:48:00', '-2 days +23:48:00'], + dtype='timedelta64[ns]', freq=None)), + ('H', t1c, TimedeltaIndex(['-1 days +00:00:00', '-2 days +23:00:00', '-2 days +23:00:00'], + dtype='timedelta64[ns]', freq=None)), + ('d', t1c, pd.TimedeltaIndex([-1,-2,-2],unit='D'))]: + r1 = t1.round(freq) + tm.assert_index_equal(r1, s1) + r2 = t2.round(freq) + tm.assert_index_equal(r2, s2) + + # invalid + for freq in ['Y','M','foobar']: + self.assertRaises(ValueError, lambda : t1.round(freq)) + def test_repr(self): self.assertEqual(repr(Timedelta(10,unit='d')),"Timedelta('10 days 00:00:00')") diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index b5556804b3548..de264f5559fd0 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -2702,6 +2702,41 @@ def test_sort_values(self): self.assertTrue(ordered[::-1].is_monotonic) self.assert_numpy_array_equal(dexer, [0, 2, 1]) + def test_round(self): + + # round + dt = Timestamp('20130101 09:10:11') + result = dt.round('D') + expected = Timestamp('20130101') + self.assertEqual(result, expected) + + dti = date_range('20130101 09:10:11',periods=5) + result = dti.round('D') + expected = date_range('20130101',periods=5) + tm.assert_index_equal(result, expected) + + # round with tz + dt = Timestamp('20130101 09:10:11',tz='US/Eastern') + result = dt.round('D') + expected = Timestamp('20130101',tz='US/Eastern') + self.assertEqual(result, expected) + + dt = Timestamp('20130101 09:10:11',tz='US/Eastern') + result = dt.round('s') + self.assertEqual(result, dt) + + dti = date_range('20130101 09:10:11',periods=5).tz_localize('UTC').tz_convert('US/Eastern') + result = dti.round('D') + expected = date_range('20130101',periods=5).tz_localize('US/Eastern') + tm.assert_index_equal(result, expected) + + result = dti.round('s') + tm.assert_index_equal(result, dti) + + # invalid + for freq in ['Y','M','foobar']: + self.assertRaises(ValueError, lambda : dti.round(freq)) + def test_insert(self): idx = DatetimeIndex(['2000-01-04', '2000-01-01', '2000-01-02'], name='idx') diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 713cf08bfc3e2..fa263b458c1e9 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -362,6 +362,28 @@ class Timestamp(_Timestamp): def _repr_base(self): return '%s %s' % (self._date_repr, self._time_repr) + def round(self, freq): + """ + return a new Timestamp rounded to this resolution + + Parameters + ---------- + freq : a freq string indicating the rouding resolution + """ + cdef int64_t unit + cdef object result, value + + from pandas.tseries.frequencies import to_offset + unit = to_offset(freq).nanos + if self.tz is not None: + value = self.tz_localize(None).value + else: + value = self.value + result = Timestamp(unit*np.floor(value/unit),unit='ns') + if self.tz is not None: + result = result.tz_localize(self.tz) + return result + @property def tz(self): """ @@ -2301,52 +2323,34 @@ class Timedelta(_Timedelta): self._ensure_components() if self._ns: - return "ns" + return "N" elif self._us: - return "us" + return "U" elif self._ms: - return "ms" + return "L" elif self._s: - return "s" + return "S" elif self._m: - return "m" + return "T" elif self._h: - return "h" + return "H" else: return "D" - def round(self, reso): + def round(self, freq): """ return a new Timedelta rounded to this resolution Parameters ---------- - reso : a string indicating the rouding resolution, accepting values - d,h,m,s,ms,us - + freq : a freq string indicating the rouding resolution """ - cdef int64_t frac, value = np.abs(self.value) - - self._ensure_components() - frac = int(self._ms*1e6 + self._us*1e3+ self._ns) - if reso == 'us': - value -= self._ns - elif reso == 'ms': - value -= self._us*1000 + self._ns - elif reso == 's': - value -= frac - elif reso == 'm': - value -= int(self._s*1e9) + frac - elif reso == 'h': - value -= int((60*self._m + self._s)*1e9) + frac - elif reso == 'd' or reso == 'D': - value -= int((3600*self._h + 60*self._m + self._s)*1e9) + frac - else: - raise ValueError("invalid resolution") + cdef int64_t result, unit - if self._sign < 0: - value *= -1 - return Timedelta(value,unit='ns') + from pandas.tseries.frequencies import to_offset + unit = to_offset(freq).nanos + result = unit*np.floor(self.value/unit) + return Timedelta(result,unit='ns') def _repr_base(self, format=None): """ @@ -2636,11 +2640,13 @@ def convert_to_timedelta(object ts, object unit='ns', errors='raise'): assert is_raise or is_ignore or is_coerce return convert_to_timedelta64(ts, unit, is_coerce) -cdef dict timedelta_abbrevs = { 'd' : 'd', +cdef dict timedelta_abbrevs = { 'D' : 'd', + 'd' : 'd', 'days' : 'd', 'day' : 'd', 'hours' : 'h', 'hour' : 'h', + 'hr' : 'h', 'h' : 'h', 'm' : 'm', 'minute' : 'm', @@ -2666,6 +2672,7 @@ cdef dict timedelta_abbrevs = { 'd' : 'd', 'nanos' : 'ns', 'nanosecond' : 'ns', } +timedelta_abbrevs_map = timedelta_abbrevs cdef inline int64_t timedelta_as_neg(int64_t value, bint neg): """