diff --git a/doc/source/whatsnew/v0.19.1.txt b/doc/source/whatsnew/v0.19.1.txt index db5bd22393e64..545b4380d9b75 100644 --- a/doc/source/whatsnew/v0.19.1.txt +++ b/doc/source/whatsnew/v0.19.1.txt @@ -58,4 +58,4 @@ Bug Fixes - Bug in ``df.groupby`` causing an ``AttributeError`` when grouping a single index frame by a column and the index level (:issue`14327`) - Bug in ``df.groupby`` where ``TypeError`` raised when ``pd.Grouper(key=...)`` is passed in a list (:issue:`14334`) - Bug in ``pd.pivot_table`` may raise ``TypeError`` or ``ValueError`` when ``index`` or ``columns`` - is not scalar and ``values`` is not specified (:issue:`14380`) \ No newline at end of file + is not scalar and ``values`` is not specified (:issue:`14380`) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index d0009efd2d994..5cc9d575521f3 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -52,6 +52,9 @@ Other enhancements - ``pd.read_excel`` now preserves sheet order when using ``sheetname=None`` (:issue:`9930`) + +- Multiple offset aliases with decimal points are now supported (e.g. '0.5min' is parsed as '30s') (:issue:`8419`) + - New ``UnsortedIndexError`` (subclass of ``KeyError``) raised when indexing/slicing into an unsorted MultiIndex (:issue:`11897`). This allows differentiation between errors due to lack of sorting or an incorrect key. See :ref:`here ` diff --git a/pandas/src/period.pyx b/pandas/src/period.pyx index 5565f25937394..2d92b9f192328 100644 --- a/pandas/src/period.pyx +++ b/pandas/src/period.pyx @@ -45,12 +45,12 @@ cdef bint PY2 = version_info[0] == 2 cdef int64_t NPY_NAT = util.get_nat() -cdef int US_RESO = frequencies.US_RESO -cdef int MS_RESO = frequencies.MS_RESO -cdef int S_RESO = frequencies.S_RESO -cdef int T_RESO = frequencies.T_RESO -cdef int H_RESO = frequencies.H_RESO -cdef int D_RESO = frequencies.D_RESO +cdef int RESO_US = frequencies.RESO_US +cdef int RESO_MS = frequencies.RESO_MS +cdef int RESO_SEC = frequencies.RESO_SEC +cdef int RESO_MIN = frequencies.RESO_MIN +cdef int RESO_HR = frequencies.RESO_HR +cdef int RESO_DAY = frequencies.RESO_DAY cdef extern from "period_helper.h": ctypedef struct date_info: @@ -516,7 +516,7 @@ cpdef resolution(ndarray[int64_t] stamps, tz=None): cdef: Py_ssize_t i, n = len(stamps) pandas_datetimestruct dts - int reso = D_RESO, curr_reso + int reso = RESO_DAY, curr_reso if tz is not None: tz = maybe_get_tz(tz) @@ -535,20 +535,20 @@ cpdef resolution(ndarray[int64_t] stamps, tz=None): cdef inline int _reso_stamp(pandas_datetimestruct *dts): if dts.us != 0: if dts.us % 1000 == 0: - return MS_RESO - return US_RESO + return RESO_MS + return RESO_US elif dts.sec != 0: - return S_RESO + return RESO_SEC elif dts.min != 0: - return T_RESO + return RESO_MIN elif dts.hour != 0: - return H_RESO - return D_RESO + return RESO_HR + return RESO_DAY cdef _reso_local(ndarray[int64_t] stamps, object tz): cdef: Py_ssize_t n = len(stamps) - int reso = D_RESO, curr_reso + int reso = RESO_DAY, curr_reso ndarray[int64_t] trans, deltas, pos pandas_datetimestruct dts diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index ac094c1f545f3..e0c602bf5a037 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -38,32 +38,55 @@ class FreqGroup(object): FR_NS = 12000 -US_RESO = 0 -MS_RESO = 1 -S_RESO = 2 -T_RESO = 3 -H_RESO = 4 -D_RESO = 5 +RESO_NS = 0 +RESO_US = 1 +RESO_MS = 2 +RESO_SEC = 3 +RESO_MIN = 4 +RESO_HR = 5 +RESO_DAY = 6 class Resolution(object): - # defined in period.pyx - # note that these are different from freq codes - RESO_US = US_RESO - RESO_MS = MS_RESO - RESO_SEC = S_RESO - RESO_MIN = T_RESO - RESO_HR = H_RESO - RESO_DAY = D_RESO + RESO_US = RESO_US + RESO_MS = RESO_MS + RESO_SEC = RESO_SEC + RESO_MIN = RESO_MIN + RESO_HR = RESO_HR + RESO_DAY = RESO_DAY _reso_str_map = { + RESO_NS: 'nanosecond', RESO_US: 'microsecond', RESO_MS: 'millisecond', RESO_SEC: 'second', RESO_MIN: 'minute', RESO_HR: 'hour', - RESO_DAY: 'day'} + RESO_DAY: 'day' + } + + # factor to multiply a value by to convert it to the next finer grained + # resolution + _reso_mult_map = { + RESO_NS: None, + RESO_US: 1000, + RESO_MS: 1000, + RESO_SEC: 1000, + RESO_MIN: 60, + RESO_HR: 60, + RESO_DAY: 24 + } + + _reso_str_bump_map = { + 'D': 'H', + 'H': 'T', + 'T': 'S', + 'S': 'L', + 'L': 'U', + 'U': 'N', + 'N': None + } _str_reso_map = dict([(v, k) for k, v in compat.iteritems(_reso_str_map)]) @@ -160,6 +183,47 @@ def get_reso_from_freq(cls, freq): """ return cls.get_reso(cls.get_str_from_freq(freq)) + @classmethod + def get_stride_from_decimal(cls, value, freq): + """ + Convert freq with decimal stride into a higher freq with integer stride + + Parameters + ---------- + value : integer or float + freq : string + Frequency string + + Raises + ------ + ValueError + If the float cannot be converted to an integer at any resolution. + + Example + ------- + >>> Resolution.get_stride_from_decimal(1.5, 'T') + (90, 'S') + + >>> Resolution.get_stride_from_decimal(1.04, 'H') + (3744, 'S') + + >>> Resolution.get_stride_from_decimal(1, 'D') + (1, 'D') + """ + + if np.isclose(value % 1, 0): + return int(value), freq + else: + start_reso = cls.get_reso_from_freq(freq) + if start_reso == 0: + raise ValueError( + "Could not convert to integer offset at any resolution" + ) + + next_value = cls._reso_mult_map[start_reso] * value + next_name = cls._reso_str_bump_map[freq] + return cls.get_stride_from_decimal(next_value, next_name) + def get_to_timestamp_base(base): """ @@ -472,12 +536,17 @@ def to_offset(freq): splitted[2::4]): if sep != '' and not sep.isspace(): raise ValueError('separator must be spaces') - offset = get_offset(name) + prefix = _lite_rule_alias.get(name) or name if stride_sign is None: stride_sign = -1 if stride.startswith('-') else 1 if not stride: stride = 1 + if prefix in Resolution._reso_str_bump_map.keys(): + stride, name = Resolution.get_stride_from_decimal( + float(stride), prefix + ) stride = int(stride) + offset = get_offset(name) offset = offset * int(np.fabs(stride) * stride_sign) if delta is None: delta = offset @@ -493,7 +562,9 @@ def to_offset(freq): # hack to handle WOM-1MON -opattern = re.compile(r'([\-]?\d*)\s*([A-Za-z]+([\-][\dA-Za-z\-]+)?)') +opattern = re.compile( + r'([\-]?\d*|[\-]?\d*\.\d*)\s*([A-Za-z]+([\-][\dA-Za-z\-]+)?)' +) def _base_and_stride(freqstr): diff --git a/pandas/tseries/tests/test_frequencies.py b/pandas/tseries/tests/test_frequencies.py index 5ba98f15aed8d..dfb7b26371d7a 100644 --- a/pandas/tseries/tests/test_frequencies.py +++ b/pandas/tseries/tests/test_frequencies.py @@ -39,6 +39,21 @@ def test_to_offset_multiple(self): expected = offsets.Hour(3) assert (result == expected) + freqstr = '2h 20.5min' + result = frequencies.to_offset(freqstr) + expected = offsets.Second(8430) + assert (result == expected) + + freqstr = '1.5min' + result = frequencies.to_offset(freqstr) + expected = offsets.Second(90) + assert (result == expected) + + freqstr = '0.5S' + result = frequencies.to_offset(freqstr) + expected = offsets.Milli(500) + assert (result == expected) + freqstr = '15l500u' result = frequencies.to_offset(freqstr) expected = offsets.Micro(15500) @@ -49,6 +64,16 @@ def test_to_offset_multiple(self): expected = offsets.Milli(10075) assert (result == expected) + freqstr = '1s0.25ms' + result = frequencies.to_offset(freqstr) + expected = offsets.Micro(1000250) + assert (result == expected) + + freqstr = '1s0.25L' + result = frequencies.to_offset(freqstr) + expected = offsets.Micro(1000250) + assert (result == expected) + freqstr = '2800N' result = frequencies.to_offset(freqstr) expected = offsets.Nano(2800) @@ -107,10 +132,8 @@ def test_to_offset_invalid(self): frequencies.to_offset('-2-3U') with tm.assertRaisesRegexp(ValueError, 'Invalid frequency: -2D:3H'): frequencies.to_offset('-2D:3H') - - # ToDo: Must be fixed in #8419 - with tm.assertRaisesRegexp(ValueError, 'Invalid frequency: .5S'): - frequencies.to_offset('.5S') + with tm.assertRaisesRegexp(ValueError, 'Invalid frequency: 1.5.0S'): + frequencies.to_offset('1.5.0S') # split offsets with spaces are valid assert frequencies.to_offset('2D 3H') == offsets.Hour(51) @@ -379,6 +402,26 @@ def test_freq_to_reso(self): result = Reso.get_freq(Reso.get_str(Reso.get_reso_from_freq(freq))) self.assertEqual(freq, result) + def test_resolution_bumping(self): + # GH 14378 + Reso = frequencies.Resolution + + self.assertEqual(Reso.get_stride_from_decimal(1.5, 'T'), (90, 'S')) + self.assertEqual(Reso.get_stride_from_decimal(62.4, 'T'), (3744, 'S')) + self.assertEqual(Reso.get_stride_from_decimal(1.04, 'H'), (3744, 'S')) + self.assertEqual(Reso.get_stride_from_decimal(1, 'D'), (1, 'D')) + self.assertEqual(Reso.get_stride_from_decimal(0.342931, 'H'), + (1234551600, 'U')) + self.assertEqual(Reso.get_stride_from_decimal(1.2345, 'D'), + (106660800, 'L')) + + with self.assertRaises(ValueError): + Reso.get_stride_from_decimal(0.5, 'N') + + # too much precision in the input can prevent + with self.assertRaises(ValueError): + Reso.get_stride_from_decimal(0.3429324798798269273987982, 'H') + def test_get_freq_code(self): # freqstr self.assertEqual(frequencies.get_freq_code('A'), diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py index b45f867be65dd..58ec1561b2535 100644 --- a/pandas/tseries/tests/test_tslib.py +++ b/pandas/tseries/tests/test_tslib.py @@ -14,7 +14,7 @@ from pandas.tseries.index import date_range, DatetimeIndex from pandas.tseries.frequencies import ( get_freq, - US_RESO, MS_RESO, S_RESO, H_RESO, D_RESO, T_RESO + RESO_US, RESO_MS, RESO_SEC, RESO_HR, RESO_DAY, RESO_MIN ) import pandas.tseries.tools as tools import pandas.tseries.offsets as offsets @@ -1528,11 +1528,11 @@ def test_resolution(self): for freq, expected in zip(['A', 'Q', 'M', 'D', 'H', 'T', 'S', 'L', 'U'], - [D_RESO, D_RESO, - D_RESO, D_RESO, - H_RESO, T_RESO, - S_RESO, MS_RESO, - US_RESO]): + [RESO_DAY, RESO_DAY, + RESO_DAY, RESO_DAY, + RESO_HR, RESO_MIN, + RESO_SEC, RESO_MS, + RESO_US]): for tz in [None, 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Eastern']: idx = date_range(start='2013-04-01', periods=30, freq=freq,