From 6a042f76ea1646faef8d9d11b506ddb705c2918d Mon Sep 17 00:00:00 2001 From: Andy Hayden Date: Mon, 10 Feb 2014 13:04:33 -0800 Subject: [PATCH 1/2] ENH date_range accepts timedelta as freq FIX compare ns Ticker with others FIX remove reference to no_simple_ctr (nowhere else) DOC move to 0.14.1 --- doc/source/v0.14.1.txt | 1 + pandas/tests/test_index.py | 18 ++- pandas/tseries/frequencies.py | 53 ++++++++- pandas/tseries/index.py | 4 +- pandas/tseries/offsets.py | 26 ++++- pandas/tseries/tests/test_daterange.py | 22 +++- pandas/tseries/tests/test_frequencies.py | 138 ++++++++++++----------- pandas/tseries/tests/test_offsets.py | 3 +- 8 files changed, 188 insertions(+), 77 deletions(-) diff --git a/doc/source/v0.14.1.txt b/doc/source/v0.14.1.txt index cfdef3adb1f34..c8682abc8c20b 100644 --- a/doc/source/v0.14.1.txt +++ b/doc/source/v0.14.1.txt @@ -97,6 +97,7 @@ Enhancements +- ``pd.date_range`` accepts datetime and numpy timedeltas (:issue:`6307`). diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 0752ec52c9a1e..c28e64fb2683d 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -171,7 +171,23 @@ def test_constructor_from_series(self): # GH 6274 # infer freq of same result = pd.infer_freq(df['date']) - self.assertEqual(result,'MS') + self.assertEqual(result, 'MS') + + def test_timedelta_np(self): + from pandas import _np_version_under1p7 + if _np_version_under1p7: + raise nose.SkipTest("to_offset with freq timedelta " + "not supported numpy < 1.7") + + nptd = np.timedelta64(1, 's') + dti_n = DatetimeIndex(start='2014-02-01', freq=nptd, periods=2) + self.assertEqual(dti_n.freq, offsets.Second(1)) + + def test_timedelta_dt(self): + dttd = timedelta(1) + us = offsets.Day(1).nanos / 1000 + dti_d = DatetimeIndex(start='2014-02-01', freq=dttd, periods=2) + self.assertEqual(dti_d.freq, offsets.Micro(us)) def test_constructor_ndarray_like(self): # GH 5460#issuecomment-44474502 diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 09ff6578160f8..2a3d68ade3f28 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -1,6 +1,6 @@ -from datetime import datetime +from datetime import datetime, timedelta from pandas.compat import range, long, zip -from pandas import compat +from pandas import compat, _np_version_under1p7 import re import numpy as np @@ -241,7 +241,7 @@ def get_period_alias(offset_str): def to_offset(freqstr): """ - Return DateOffset object from string representation + Return DateOffset object from string representation, or timedelta Examples -------- @@ -251,16 +251,27 @@ def to_offset(freqstr): if freqstr is None: return None - if isinstance(freqstr, DateOffset): + elif isinstance(freqstr, DateOffset): return freqstr - if isinstance(freqstr, tuple): + elif isinstance(freqstr, tuple): name = freqstr[0] stride = freqstr[1] if isinstance(stride, compat.string_types): name, stride = stride, name name, _ = _base_and_stride(name) delta = get_offset(name) * stride + + elif isinstance(freqstr, timedelta): + from pandas.tseries.offsets import _delta_to_tick + return _delta_to_tick(freqstr) + + elif isinstance(freqstr, np.timedelta64): + # Note: numpy timedelta can deal with < ns + # however, pandas offsets do not + from pandas.tseries.offsets import _np_delta_to_tick + return _np_delta_to_tick(freqstr) + else: delta = None stride_sign = None @@ -387,6 +398,38 @@ def get_legacy_offset_name(offset): name = offset.name return _legacy_reverse_map.get(name, name) + +def _simplify_offset(offset): + ''' + Simplify representation if possible. + + Example + ------- + >>> _simplify_offset(Second(60)) + + + ''' + from pandas.tseries.offsets import (Nano, Micro, Milli, Second, + Minute, Hour, Day) + if isinstance((Nano, Micro, Milli, Second, Hour,)): + ns = offset.nanos + + def higher_offset(ns, unit, unit_ns): + units, rem = divmod(ns, unit_ns) + if not rem: + return unit(units) + + units_in_ns = [(Day, 86400000000000), (Hour, 3600000000000), + (Minute, 60000000000), (Second, 1000000000) + (Milli, 1000000), (Micro, 1000)] + + # None if can't simplify + simplified = any(higher_offset(ns, unit, unit_ns) + for unit, unit_ns in units_in_ns) + + return simplified or offset + + def get_standard_freq(freq): """ Return the standardized frequency string diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 7f0e00105bba5..df207d20ab103 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -115,7 +115,7 @@ class DatetimeIndex(DatetimeIndexOpsMixin, Int64Index): Optional datetime-like data to construct index with copy : bool Make a copy of input ndarray - freq : string or pandas offset object, optional + freq : string, pandas offset object, timedelta, optional One of pandas date offset strings or corresponding objects start : starting value, datetime-like, optional If data is None, start is used as the start point in generating regular @@ -1897,7 +1897,7 @@ def date_range(start=None, end=None, periods=None, freq='D', tz=None, Right bound for generating dates periods : integer or None, default None If None, must specify start and end - freq : string or DateOffset, default 'D' (calendar daily) + freq : string, DateOffset or timedelta, default 'D' (calendar daily) Frequency strings can have multiples, e.g. '5H' tz : string or None Time zone name for returning localized DatetimeIndex, for example diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 9cbef50f2d82f..20c4a773bf9a6 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -1924,8 +1924,20 @@ def onOffset(self, dt): def _tick_comp(op): + def delta_with_ns_remainder(self): + if isinstance(self.delta, timedelta): + return (self.delta, 0) + else: + # it's a numpy.datetime64[ns] + ms, ns_rem = divmod(self.n, 10**3) + return timedelta(microseconds=ms), ns_rem + def f(self, other): - return op(self.delta, other.delta) + if type(self) == type(other): + return op(self.delta, other.delta) + else: + return op(delta_with_ns_remainder(self), + delta_with_ns_remainder(other)) return f @@ -1958,7 +1970,7 @@ def __eq__(self, other): other = to_offset(other) if isinstance(other, Tick): - return self.delta == other.delta + return _tick_comp(operator.eq)(self, other) else: return DateOffset.__eq__(self, other) @@ -2034,7 +2046,15 @@ def _delta_to_nanoseconds(delta): + delta.microseconds) * 1000 -class Day(Tick): +def _np_delta_to_tick(npdelta): + one_ns = np.timedelta64(1, 'ns') if not _np_version_under1p7 else 1 + ns = npdelta / one_ns + if ns % 1000 == 0: + return _delta_to_tick(timedelta(microseconds=ns / 1000)) + return Nano(ns) + + +class Day(CacheableOffset, Tick): _inc = timedelta(1) _prefix = 'D' diff --git a/pandas/tseries/tests/test_daterange.py b/pandas/tseries/tests/test_daterange.py index 0a732ac7bc7e8..f842a02bb36bc 100644 --- a/pandas/tseries/tests/test_daterange.py +++ b/pandas/tseries/tests/test_daterange.py @@ -1,4 +1,4 @@ -from datetime import datetime +from datetime import datetime, timedelta from pandas.compat import range import pickle import nose @@ -370,6 +370,26 @@ def test_range_bug(self): exp_values = [start + i * offset for i in range(5)] self.assert_numpy_array_equal(result, DatetimeIndex(exp_values)) + def test_freq_timedelta_np(self): + from pandas import _np_version_under1p7 + if _np_version_under1p7: + raise nose.SkipTest("date_range with freq timedelta " + "not supported numpy < 1.7") + + from pandas.tseries.offsets import Nano, Micro, Second, Day + + nptd = np.timedelta64(1, 's') + dti_n = date_range(start='2014-02-01', freq=nptd, periods=2) + self.assertEqual(dti_n.freq, Second(1)) + + def test_freq_timedelta_dt(self): + from pandas.tseries.offsets import Nano, Micro, Second, Day + + dttd = timedelta(1) + us = Day(1).nanos / 1000 + dti_d = date_range(start='2014-02-01', freq=dttd, periods=2) + self.assertEqual(dti_d.freq, Micro(us)) + def test_range_tz_pytz(self): # GH 2906 _skip_if_no_pytz() diff --git a/pandas/tseries/tests/test_frequencies.py b/pandas/tseries/tests/test_frequencies.py index 9089ca85ac3bb..d58329369632c 100644 --- a/pandas/tseries/tests/test_frequencies.py +++ b/pandas/tseries/tests/test_frequencies.py @@ -19,81 +19,92 @@ from pandas import _np_version_under1p7 import pandas.util.testing as tm -def test_to_offset_multiple(): - freqstr = '2h30min' - freqstr2 = '2h 30min' - - result = to_offset(freqstr) - assert(result == to_offset(freqstr2)) - expected = offsets.Minute(150) - assert(result == expected) - - freqstr = '2h30min15s' - result = to_offset(freqstr) - expected = offsets.Second(150 * 60 + 15) - assert(result == expected) - - freqstr = '2h 60min' - result = to_offset(freqstr) - expected = offsets.Hour(3) - assert(result == expected) - - freqstr = '15l500u' - result = to_offset(freqstr) - expected = offsets.Micro(15500) - assert(result == expected) - - freqstr = '10s75L' - result = to_offset(freqstr) - expected = offsets.Milli(10075) - assert(result == expected) - - if not _np_version_under1p7: - freqstr = '2800N' + +_dti = DatetimeIndex + + +class TestToOffset(tm.TestCase): + + def test_to_offset_multiple(self): + freqstr = '2h30min' + freqstr2 = '2h 30min' + result = to_offset(freqstr) - expected = offsets.Nano(2800) - assert(result == expected) + self.assertEqual(result, to_offset(freqstr2)) + expected = offsets.Minute(150) + self.assertEqual(result, expected) - # malformed - try: - to_offset('2h20m') - except ValueError: - pass - else: - assert(False) + freqstr = '2h30min15s' + result = to_offset(freqstr) + expected = offsets.Second(150 * 60 + 15) + self.assertEqual(result, expected) + freqstr = '2h 60min' + result = to_offset(freqstr) + expected = offsets.Hour(3) + self.assertEqual(result, expected) -def test_to_offset_negative(): - freqstr = '-1S' - result = to_offset(freqstr) - assert(result.n == -1) + freqstr = '15l500u' + result = to_offset(freqstr) + expected = offsets.Micro(15500) + self.assertEqual(result, expected) - freqstr = '-5min10s' - result = to_offset(freqstr) - assert(result.n == -310) + freqstr = '10s75L' + result = to_offset(freqstr) + expected = offsets.Milli(10075) + self.assertEqual(result, expected) + if not _np_version_under1p7: + freqstr = '2800N' + result = to_offset(freqstr) + expected = offsets.Nano(2800) + self.assertEqual(result, expected) -def test_to_offset_leading_zero(): - freqstr = '00H 00T 01S' - result = to_offset(freqstr) - assert(result.n == 1) + # malformed + self.assertRaises(ValueError, to_offset, '2h20m') - freqstr = '-00H 03T 14S' - result = to_offset(freqstr) - assert(result.n == -194) + def test_to_offset_negative(self): + freqstr = '-1S' + result = to_offset(freqstr) + self.assertEqual(result.n, -1) + freqstr = '-5min10s' + result = to_offset(freqstr) + self.assertEqual(result.n, -310) -def test_anchored_shortcuts(): - result = to_offset('W') - expected = to_offset('W-SUN') - assert(result == expected) + def test_to_offset_leading_zero(self): + freqstr = '00H 00T 01S' + result = to_offset(freqstr) + self.assertEqual(result.n, 1) - result = to_offset('Q') - expected = to_offset('Q-DEC') - assert(result == expected) + freqstr = '-00H 03T 14S' + result = to_offset(freqstr) + self.assertEqual(result.n, -194) + def test_anchored_shortcuts(self): + result = to_offset('W') + expected = to_offset('W-SUN') + self.assertEqual(result, expected) -_dti = DatetimeIndex + result = to_offset('Q') + expected = to_offset('Q-DEC') + self.assertEqual(result, expected) + + def test_offset_timedelta_np(self): + from pandas import _np_version_under1p7 + if _np_version_under1p7: + raise nose.SkipTest("to_offset with freq timedelta " + "not supported numpy < 1.7") + + nptd = to_offset(np.timedelta64(100, 'ns')) + self.assertEqual(nptd, offsets.Nano(100)) + nptd = to_offset(np.timedelta64(1, 'ms')) + self.assertEqual(nptd, offsets.Nano(10 ** 6)) + + def test_offset_timedelta_dt(self): + dttd = to_offset(timedelta(1)) + ms = offsets.Day(1).nanos / 1000 + self.assertEqual(dttd, offsets.Micro(ms)) class TestFrequencyInference(tm.TestCase): @@ -175,7 +186,8 @@ def test_week_of_month(self): def test_week_of_month_fake(self): #All of these dates are on same day of week and are 4 or 5 weeks apart - index = DatetimeIndex(["2013-08-27","2013-10-01","2013-10-29","2013-11-26"]) + index = DatetimeIndex(["2013-08-27", "2013-10-01", + "2013-10-29", "2013-11-26"]) assert infer_freq(index) != 'WOM-4TUE' def test_monthly(self): diff --git a/pandas/tseries/tests/test_offsets.py b/pandas/tseries/tests/test_offsets.py index 4fc7d281bc473..c8cc2c8cf1975 100644 --- a/pandas/tseries/tests/test_offsets.py +++ b/pandas/tseries/tests/test_offsets.py @@ -2986,8 +2986,7 @@ def test_should_cache_week_month(self): def test_all_cacheableoffsets(self): for subclass in get_all_subclasses(CacheableOffset): - if subclass.__name__[0] == "_" \ - or subclass in TestCaching.no_simple_ctr: + if subclass.__name__[0] == "_" : continue self.run_X_index_creation(subclass) From b454af42fb183188ada8017439ee6895804ceeec Mon Sep 17 00:00:00 2001 From: Andy Hayden Date: Fri, 30 May 2014 13:30:47 -0700 Subject: [PATCH 2/2] ENH/FIX to_offset simplifies offset --- pandas/tseries/frequencies.py | 19 ++++++++++--------- pandas/tseries/offsets.py | 4 +++- pandas/tseries/tests/test_frequencies.py | 19 +++++++++++++++++++ 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 2a3d68ade3f28..004ecf250a311 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -252,7 +252,7 @@ def to_offset(freqstr): return None elif isinstance(freqstr, DateOffset): - return freqstr + return _simplify_offset(freqstr) elif isinstance(freqstr, tuple): name = freqstr[0] @@ -411,23 +411,24 @@ def _simplify_offset(offset): ''' from pandas.tseries.offsets import (Nano, Micro, Milli, Second, Minute, Hour, Day) - if isinstance((Nano, Micro, Milli, Second, Hour,)): + if isinstance(offset, (Nano, Micro, Milli, Second, Minute, Hour,)): ns = offset.nanos - def higher_offset(ns, unit, unit_ns): + def _offset(ns, unit, unit_ns): units, rem = divmod(ns, unit_ns) - if not rem: + if rem == 0: return unit(units) units_in_ns = [(Day, 86400000000000), (Hour, 3600000000000), - (Minute, 60000000000), (Second, 1000000000) + (Minute, 60000000000), (Second, 1000000000), (Milli, 1000000), (Micro, 1000)] - # None if can't simplify - simplified = any(higher_offset(ns, unit, unit_ns) - for unit, unit_ns in units_in_ns) + for unit, unit_ns in units_in_ns: + new_offset = _offset(ns, unit, unit_ns) + if new_offset: + return new_offset - return simplified or offset + return offset def get_standard_freq(freq): diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 20c4a773bf9a6..d547b243d8b8b 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -2038,9 +2038,11 @@ def _delta_to_tick(delta): def _delta_to_nanoseconds(delta): if isinstance(delta, np.timedelta64): return delta.astype('timedelta64[ns]').item() - elif isinstance(delta, Tick): + if isinstance(delta, Tick): delta = delta.delta + if isinstance(delta, int): + return delta * 1000 return (delta.days * 24 * 60 * 60 * 1000000 + delta.seconds * 1000000 + delta.microseconds) * 1000 diff --git a/pandas/tseries/tests/test_frequencies.py b/pandas/tseries/tests/test_frequencies.py index d58329369632c..3b265863b8863 100644 --- a/pandas/tseries/tests/test_frequencies.py +++ b/pandas/tseries/tests/test_frequencies.py @@ -13,6 +13,7 @@ from pandas.tseries.tools import to_datetime import pandas.tseries.frequencies as fmod import pandas.tseries.offsets as offsets +from pandas.tseries.offsets import Nano, Micro, Second, Minute, Hour, Day from pandas.tseries.period import PeriodIndex import pandas.compat as compat @@ -106,6 +107,24 @@ def test_offset_timedelta_dt(self): ms = offsets.Day(1).nanos / 1000 self.assertEqual(dttd, offsets.Micro(ms)) + def test_to_offset_offset(self): + off = Nano(1000) + offed = to_offset(off) + exp = Micro(1) + assert isinstance(offed, Micro) + self.assertEqual(exp, offed) + + off = Minute(60) + offed = to_offset(off) + exp = Hour(1) + assert isinstance(offed, Hour) + self.assertEqual(exp, offed) + + off = Minute(90) + offed = to_offset(off) + exp = off + assert isinstance(offed, Minute) + self.assertEqual(exp, offed) class TestFrequencyInference(tm.TestCase):