From 789e9aac213bccd594a937d73c74158f626221f5 Mon Sep 17 00:00:00 2001 From: sinhrks Date: Tue, 30 Oct 2018 18:32:32 +0900 Subject: [PATCH 1/2] BUG: Cleanup timedelta offset --- doc/source/whatsnew/v0.24.0.txt | 2 + pandas/_libs/tslibs/timedeltas.pyx | 66 +++++++++++++--- pandas/core/tools/timedeltas.py | 54 +++---------- .../tests/scalar/timedelta/test_timedelta.py | 76 +++++++++++++------ 4 files changed, 122 insertions(+), 76 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index fa748cccc0d65..567f84d612445 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1127,6 +1127,8 @@ Timedelta - Fixed bug in adding a :class:`DataFrame` with all-`timedelta64[ns]` dtypes to a :class:`DataFrame` with all-integer dtypes returning incorrect results instead of raising ``TypeError`` (:issue:`22696`) - Bug in :class:`TimedeltaIndex` where adding a timezone-aware datetime scalar incorrectly returned a timezone-naive :class:`DatetimeIndex` (:issue:`23215`) - Bug in :class:`TimedeltaIndex` where adding ``np.timedelta64('NaT')`` incorrectly returned an all-`NaT` :class:`DatetimeIndex` instead of an all-`NaT` :class:`TimedeltaIndex` (:issue:`23215`) +- Bug in :class:`Timedelta` and :func:`to_timedelta()` have inconsistencies in supported unit string (:issue:`21762`) + Timezones ^^^^^^^^^ diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index f0a57c49a98fc..c09a8e5b395ee 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -45,10 +45,16 @@ Components = collections.namedtuple('Components', [ 'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds', 'nanoseconds']) -cdef dict timedelta_abbrevs = { 'D': 'd', - 'd': 'd', - 'days': 'd', - 'day': 'd', + +cdef dict timedelta_abbrevs = { 'Y': 'Y', + 'y': 'Y', + 'M': 'M', + 'W': 'W', + 'w': 'W', + 'D': 'D', + 'd': 'D', + 'days': 'D', + 'day': 'D', 'hours': 'h', 'hour': 'h', 'hr': 'h', @@ -57,6 +63,7 @@ cdef dict timedelta_abbrevs = { 'D': 'd', 'minute': 'm', 'min': 'm', 'minutes': 'm', + 't': 'm', 's': 's', 'seconds': 's', 'sec': 's', @@ -66,16 +73,19 @@ cdef dict timedelta_abbrevs = { 'D': 'd', 'millisecond': 'ms', 'milli': 'ms', 'millis': 'ms', + 'l': 'ms', 'us': 'us', 'microseconds': 'us', 'microsecond': 'us', 'micro': 'us', 'micros': 'us', + 'u': 'us', 'ns': 'ns', 'nanoseconds': 'ns', 'nano': 'ns', 'nanos': 'ns', - 'nanosecond': 'ns'} + 'nanosecond': 'ns', + 'n': 'ns'} _no_input = object() @@ -140,7 +150,8 @@ cpdef int64_t delta_to_nanoseconds(delta) except? -1: cpdef convert_to_timedelta64(object ts, object unit): """ - Convert an incoming object to a timedelta64 if possible + Convert an incoming object to a timedelta64 if possible. + Before calling, unit must be standardized to avoid repeated unit conversion Handle these types of objects: - timedelta/Timedelta @@ -228,6 +239,7 @@ def array_to_timedelta64(object[:] values, unit='ns', errors='raise'): for i in range(n): result[i] = parse_timedelta_string(values[i]) except: + unit = parse_timedelta_unit(unit) for i in range(n): try: result[i] = convert_to_timedelta64(values[i], unit) @@ -247,7 +259,16 @@ cdef inline int64_t cast_from_unit(object ts, object unit) except? -1: int64_t m int p - if unit == 'D' or unit == 'd': + if unit == 'Y': + m = 1000000000L * 31556952 + p = 9 + elif unit == 'M': + m = 1000000000L * 2629746 + p = 9 + elif unit == 'W': + m = 1000000000L * 86400 * 7 + p = 9 + elif unit == 'D' or unit == 'd': m = 1000000000L * 86400 p = 9 elif unit == 'h': @@ -485,7 +506,11 @@ cdef inline timedelta_from_spec(object number, object frac, object unit): try: unit = ''.join(unit) - unit = timedelta_abbrevs[unit.lower()] + if unit == 'M': + # To parse ISO 8601 string, 'M' should be treated as minute, + # not month + unit = 'm' + unit = parse_timedelta_unit(unit) except KeyError: raise ValueError("invalid abbreviation: {unit}".format(unit=unit)) @@ -493,6 +518,22 @@ cdef inline timedelta_from_spec(object number, object frac, object unit): return cast_from_unit(float(n), unit) +cpdef inline object parse_timedelta_unit(object unit): + """ + Parameters + ---------- + unit : an unit string + """ + if unit is None: + return 'ns' + elif unit == 'M': + return unit + try: + return timedelta_abbrevs[unit.lower()] + except (KeyError, AttributeError): + raise ValueError("invalid unit abbreviation: {unit}" + .format(unit=unit)) + # ---------------------------------------------------------------------- # Timedelta ops utilities @@ -1070,7 +1111,13 @@ class Timedelta(_Timedelta): Parameters ---------- value : Timedelta, timedelta, np.timedelta64, string, or integer - unit : string, {'ns', 'us', 'ms', 's', 'm', 'h', 'D'}, optional + unit : string, {'Y', 'M', 'W', 'D', 'days', 'day', + 'hours', hour', 'hr', 'h', 'm', 'minute', 'min', 'minutes', + 'T', 'S', 'seconds', 'sec', 'second', 'ms', + 'milliseconds', 'millisecond', 'milli', 'millis', 'L', + 'us', 'microseconds', 'microsecond', 'micro', 'micros', + 'U', 'ns', 'nanoseconds', 'nano', 'nanos', 'nanosecond' + 'N'}, optional Denote the unit of the input, if input is an integer. Default 'ns'. days, seconds, microseconds, milliseconds, minutes, hours, weeks : numeric, optional @@ -1121,6 +1168,7 @@ class Timedelta(_Timedelta): value = np.timedelta64(delta_to_nanoseconds(value.delta), 'ns') elif is_integer_object(value) or is_float_object(value): # unit=None is de-facto 'ns' + unit = parse_timedelta_unit(unit) value = convert_to_timedelta64(value, unit) elif checknull_with_nat(value): return NaT diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index 4dc4fcb00d84d..220b14a9cb7c6 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -6,7 +6,8 @@ import pandas as pd from pandas._libs import tslibs from pandas._libs.tslibs.timedeltas import (convert_to_timedelta64, - array_to_timedelta64) + array_to_timedelta64, + parse_timedelta_unit) from pandas.core.dtypes.common import ( ensure_object, @@ -23,8 +24,14 @@ def to_timedelta(arg, unit='ns', box=True, errors='raise'): Parameters ---------- arg : string, timedelta, list, tuple, 1-d array, or Series - unit : unit of the arg (D,h,m,s,ms,us,ns) denote the unit, which is an - integer/float number + unit : string, {'Y', 'M', 'W', 'D', 'days', 'day', + 'hours', hour', 'hr', 'h', 'm', 'minute', 'min', 'minutes', + 'T', 'S', 'seconds', 'sec', 'second', 'ms', + 'milliseconds', 'millisecond', 'milli', 'millis', 'L', + 'us', 'microseconds', 'microsecond', 'micro', 'micros', + 'U', 'ns', 'nanoseconds', 'nano', 'nanos', 'nanosecond' + 'N'}, optional + Denote the unit of the input, if input is an integer. Default 'ns'. box : boolean, default True - If True returns a Timedelta/TimedeltaIndex of the results - if False returns a np.timedelta64 or ndarray of values of dtype @@ -69,7 +76,7 @@ def to_timedelta(arg, unit='ns', box=True, errors='raise'): pandas.DataFrame.astype : Cast argument to a specified dtype. pandas.to_datetime : Convert argument to datetime. """ - unit = _validate_timedelta_unit(unit) + unit = parse_timedelta_unit(unit) if errors not in ('ignore', 'raise', 'coerce'): raise ValueError("errors must be one of 'ignore', " @@ -99,45 +106,6 @@ def to_timedelta(arg, unit='ns', box=True, errors='raise'): box=box, errors=errors) -_unit_map = { - 'Y': 'Y', - 'y': 'Y', - 'W': 'W', - 'w': 'W', - 'D': 'D', - 'd': 'D', - 'days': 'D', - 'Days': 'D', - 'day': 'D', - 'Day': 'D', - 'M': 'M', - 'H': 'h', - 'h': 'h', - 'm': 'm', - 'T': 'm', - 'S': 's', - 's': 's', - 'L': 'ms', - 'MS': 'ms', - 'ms': 'ms', - 'US': 'us', - 'us': 'us', - 'NS': 'ns', - 'ns': 'ns', -} - - -def _validate_timedelta_unit(arg): - """ provide validation / translation for timedelta short units """ - try: - return _unit_map[arg] - except (KeyError, TypeError): - if arg is None: - return 'ns' - raise ValueError("invalid timedelta unit {arg} provided" - .format(arg=arg)) - - def _coerce_scalar_to_timedelta_type(r, unit='ns', box=True, errors='raise'): """Convert string 'r' to a timedelta object.""" diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index 0cac1119f76b5..959586e53f630 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -293,37 +293,65 @@ def test_nat_converters(self): assert to_timedelta('nat', box=False).astype('int64') == iNaT assert to_timedelta('nan', box=False).astype('int64') == iNaT - def testit(unit, transform): - - # array - result = to_timedelta(np.arange(5), unit=unit) - expected = TimedeltaIndex([np.timedelta64(i, transform(unit)) + @pytest.mark.parametrize('units, np_unit', + [(['Y', 'y'], 'Y'), + (['M'], 'M'), + (['W', 'w'], 'W'), + (['D', 'd', 'days', 'day', 'Days', 'Day'], 'D'), + (['m', 'minute', 'min', 'minutes', 't', + 'Minute', 'Min', 'Minutes', 'T'], 'm'), + (['s', 'seconds', 'sec', 'second', + 'S', 'Seconds', 'Sec', 'Second'], 's'), + (['ms', 'milliseconds', 'millisecond', 'milli', + 'millis', 'l', 'MS', 'Milliseconds', + 'Millisecond', 'Milli', 'Millis', 'L'], 'ms'), + (['us', 'microseconds', 'microsecond', 'micro', + 'micros', 'u', 'US', 'Microseconds', + 'Microsecond', 'Micro', 'Micros', 'U'], 'us'), + (['ns', 'nanoseconds', 'nanosecond', 'nano', + 'nanos', 'n', 'NS', 'Nanoseconds', + 'Nanosecond', 'Nano', 'Nanos', 'N'], 'ns')]) + def test_unit_parser(self, units, np_unit): + # validate all units, GH 6855, GH 21762 + for unit in units: + # array-likes + expected = TimedeltaIndex([np.timedelta64(i, np_unit) for i in np.arange(5).tolist()]) - tm.assert_index_equal(result, expected) + for wrapper in [np.array, list, pd.Index]: + result = to_timedelta(wrapper(range(5)), unit=unit) + tm.assert_index_equal(result, expected) + result = TimedeltaIndex(wrapper(range(5)), unit=unit) + tm.assert_index_equal(result, expected) + + if unit == 'M': + # M is treated as minutes in string repr + expected = TimedeltaIndex([np.timedelta64(i, 'm') + for i in np.arange(5).tolist()]) + + for wrapper in [np.array, list, pd.Index]: + str_repr = ['{}{}'.format(x, unit) for x in np.arange(5)] + result = to_timedelta(wrapper(str_repr)) + tm.assert_index_equal(result, expected) + result = TimedeltaIndex(wrapper(str_repr)) + tm.assert_index_equal(result, expected) # scalar - result = to_timedelta(2, unit=unit) - expected = Timedelta(np.timedelta64(2, transform(unit)).astype( + expected = Timedelta(np.timedelta64(2, np_unit).astype( 'timedelta64[ns]')) - assert result == expected - # validate all units - # GH 6855 - for unit in ['Y', 'M', 'W', 'D', 'y', 'w', 'd']: - testit(unit, lambda x: x.upper()) - for unit in ['days', 'day', 'Day', 'Days']: - testit(unit, lambda x: 'D') - for unit in ['h', 'm', 's', 'ms', 'us', 'ns', 'H', 'S', 'MS', 'US', - 'NS']: - testit(unit, lambda x: x.lower()) - - # offsets + result = to_timedelta(2, unit=unit) + assert result == expected + result = Timedelta(2, unit=unit) + assert result == expected - # m - testit('T', lambda x: 'm') + if unit == 'M': + expected = Timedelta(np.timedelta64(2, 'm').astype( + 'timedelta64[ns]')) - # ms - testit('L', lambda x: 'ms') + result = to_timedelta('2{}'.format(unit)) + assert result == expected + result = Timedelta('2{}'.format(unit)) + assert result == expected def test_numeric_conversions(self): assert ct(0) == np.timedelta64(0, 'ns') From 04f3cebba2d8bdce367b289ba28cb0c41432c336 Mon Sep 17 00:00:00 2001 From: sinhrks Date: Mon, 5 Nov 2018 12:50:48 +0900 Subject: [PATCH 2/2] fix tests --- .../tests/scalar/timedelta/test_timedelta.py | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index 959586e53f630..58064213d9b3b 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -311,29 +311,28 @@ def test_nat_converters(self): (['ns', 'nanoseconds', 'nanosecond', 'nano', 'nanos', 'n', 'NS', 'Nanoseconds', 'Nanosecond', 'Nano', 'Nanos', 'N'], 'ns')]) - def test_unit_parser(self, units, np_unit): + @pytest.mark.parametrize('wrapper', [np.array, list, pd.Index]) + def test_unit_parser(self, units, np_unit, wrapper): # validate all units, GH 6855, GH 21762 for unit in units: # array-likes expected = TimedeltaIndex([np.timedelta64(i, np_unit) for i in np.arange(5).tolist()]) - for wrapper in [np.array, list, pd.Index]: - result = to_timedelta(wrapper(range(5)), unit=unit) - tm.assert_index_equal(result, expected) - result = TimedeltaIndex(wrapper(range(5)), unit=unit) - tm.assert_index_equal(result, expected) + result = to_timedelta(wrapper(range(5)), unit=unit) + tm.assert_index_equal(result, expected) + result = TimedeltaIndex(wrapper(range(5)), unit=unit) + tm.assert_index_equal(result, expected) if unit == 'M': # M is treated as minutes in string repr expected = TimedeltaIndex([np.timedelta64(i, 'm') for i in np.arange(5).tolist()]) - for wrapper in [np.array, list, pd.Index]: - str_repr = ['{}{}'.format(x, unit) for x in np.arange(5)] - result = to_timedelta(wrapper(str_repr)) - tm.assert_index_equal(result, expected) - result = TimedeltaIndex(wrapper(str_repr)) - tm.assert_index_equal(result, expected) + str_repr = ['{}{}'.format(x, unit) for x in np.arange(5)] + result = to_timedelta(wrapper(str_repr)) + tm.assert_index_equal(result, expected) + result = TimedeltaIndex(wrapper(str_repr)) + tm.assert_index_equal(result, expected) # scalar expected = Timedelta(np.timedelta64(2, np_unit).astype(