Skip to content

Commit 3ade6a9

Browse files
committed
BUG: Cleanup timedelta offset
1 parent 93aba79 commit 3ade6a9

File tree

4 files changed

+122
-76
lines changed

4 files changed

+122
-76
lines changed

doc/source/whatsnew/v0.24.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -1126,6 +1126,8 @@ Timedelta
11261126
- Fixed bug in adding a :class:`DataFrame` with all-`timedelta64[ns]` dtypes to a :class:`DataFrame` with all-integer dtypes returning incorrect results instead of raising ``TypeError`` (:issue:`22696`)
11271127
- Bug in :class:`TimedeltaIndex` where adding a timezone-aware datetime scalar incorrectly returned a timezone-naive :class:`DatetimeIndex` (:issue:`23215`)
11281128
- Bug in :class:`TimedeltaIndex` where adding ``np.timedelta64('NaT')`` incorrectly returned an all-`NaT` :class:`DatetimeIndex` instead of an all-`NaT` :class:`TimedeltaIndex` (:issue:`23215`)
1129+
- Bug in :class:`Timedelta` and :func:`to_timedelta()` have inconsistencies in supported unit string (:issue:`21762`)
1130+
11291131

11301132
Timezones
11311133
^^^^^^^^^

pandas/_libs/tslibs/timedeltas.pyx

+57-9
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,16 @@ Components = collections.namedtuple('Components', [
4444
'days', 'hours', 'minutes', 'seconds',
4545
'milliseconds', 'microseconds', 'nanoseconds'])
4646

47-
cdef dict timedelta_abbrevs = { 'D': 'd',
48-
'd': 'd',
49-
'days': 'd',
50-
'day': 'd',
47+
48+
cdef dict timedelta_abbrevs = { 'Y': 'Y',
49+
'y': 'Y',
50+
'M': 'M',
51+
'W': 'W',
52+
'w': 'W',
53+
'D': 'D',
54+
'd': 'D',
55+
'days': 'D',
56+
'day': 'D',
5157
'hours': 'h',
5258
'hour': 'h',
5359
'hr': 'h',
@@ -56,6 +62,7 @@ cdef dict timedelta_abbrevs = { 'D': 'd',
5662
'minute': 'm',
5763
'min': 'm',
5864
'minutes': 'm',
65+
't': 'm',
5966
's': 's',
6067
'seconds': 's',
6168
'sec': 's',
@@ -65,16 +72,19 @@ cdef dict timedelta_abbrevs = { 'D': 'd',
6572
'millisecond': 'ms',
6673
'milli': 'ms',
6774
'millis': 'ms',
75+
'l': 'ms',
6876
'us': 'us',
6977
'microseconds': 'us',
7078
'microsecond': 'us',
7179
'micro': 'us',
7280
'micros': 'us',
81+
'u': 'us',
7382
'ns': 'ns',
7483
'nanoseconds': 'ns',
7584
'nano': 'ns',
7685
'nanos': 'ns',
77-
'nanosecond': 'ns'}
86+
'nanosecond': 'ns',
87+
'n': 'ns'}
7888

7989
_no_input = object()
8090

@@ -137,7 +147,8 @@ cpdef int64_t delta_to_nanoseconds(delta) except? -1:
137147

138148
cpdef convert_to_timedelta64(object ts, object unit):
139149
"""
140-
Convert an incoming object to a timedelta64 if possible
150+
Convert an incoming object to a timedelta64 if possible.
151+
Before calling, unit must be standardized to avoid repeated unit conversion
141152
142153
Handle these types of objects:
143154
- timedelta/Timedelta
@@ -223,6 +234,7 @@ def array_to_timedelta64(object[:] values, unit='ns', errors='raise'):
223234
for i in range(n):
224235
result[i] = parse_timedelta_string(values[i])
225236
except:
237+
unit = parse_timedelta_unit(unit)
226238
for i in range(n):
227239
try:
228240
result[i] = convert_to_timedelta64(values[i], unit)
@@ -242,7 +254,16 @@ cdef inline int64_t cast_from_unit(object ts, object unit) except? -1:
242254
int64_t m
243255
int p
244256

245-
if unit == 'D' or unit == 'd':
257+
if unit == 'Y':
258+
m = 1000000000L * 31556952
259+
p = 9
260+
elif unit == 'M':
261+
m = 1000000000L * 2629746
262+
p = 9
263+
elif unit == 'W':
264+
m = 1000000000L * 86400 * 7
265+
p = 9
266+
elif unit == 'D' or unit == 'd':
246267
m = 1000000000L * 86400
247268
p = 9
248269
elif unit == 'h':
@@ -480,14 +501,34 @@ cdef inline timedelta_from_spec(object number, object frac, object unit):
480501

481502
try:
482503
unit = ''.join(unit)
483-
unit = timedelta_abbrevs[unit.lower()]
504+
if unit == 'M':
505+
# To parse ISO 8601 string, 'M' should be treated as minute,
506+
# not month
507+
unit = 'm'
508+
unit = parse_timedelta_unit(unit)
484509
except KeyError:
485510
raise ValueError("invalid abbreviation: {unit}".format(unit=unit))
486511

487512
n = ''.join(number) + '.' + ''.join(frac)
488513
return cast_from_unit(float(n), unit)
489514

490515

516+
cpdef inline object parse_timedelta_unit(object unit):
517+
"""
518+
Parameters
519+
----------
520+
unit : an unit string
521+
"""
522+
if unit is None:
523+
return 'ns'
524+
elif unit == 'M':
525+
return unit
526+
try:
527+
return timedelta_abbrevs[unit.lower()]
528+
except (KeyError, AttributeError):
529+
raise ValueError("invalid unit abbreviation: {unit}"
530+
.format(unit=unit))
531+
491532
# ----------------------------------------------------------------------
492533
# Timedelta ops utilities
493534

@@ -1065,7 +1106,13 @@ class Timedelta(_Timedelta):
10651106
Parameters
10661107
----------
10671108
value : Timedelta, timedelta, np.timedelta64, string, or integer
1068-
unit : string, {'ns', 'us', 'ms', 's', 'm', 'h', 'D'}, optional
1109+
unit : string, {'Y', 'M', 'W', 'D', 'days', 'day',
1110+
'hours', hour', 'hr', 'h', 'm', 'minute', 'min', 'minutes',
1111+
'T', 'S', 'seconds', 'sec', 'second', 'ms',
1112+
'milliseconds', 'millisecond', 'milli', 'millis', 'L',
1113+
'us', 'microseconds', 'microsecond', 'micro', 'micros',
1114+
'U', 'ns', 'nanoseconds', 'nano', 'nanos', 'nanosecond'
1115+
'N'}, optional
10691116
Denote the unit of the input, if input is an integer. Default 'ns'.
10701117
days, seconds, microseconds,
10711118
milliseconds, minutes, hours, weeks : numeric, optional
@@ -1116,6 +1163,7 @@ class Timedelta(_Timedelta):
11161163
value = np.timedelta64(delta_to_nanoseconds(value.delta), 'ns')
11171164
elif is_integer_object(value) or is_float_object(value):
11181165
# unit=None is de-facto 'ns'
1166+
unit = parse_timedelta_unit(unit)
11191167
value = convert_to_timedelta64(value, unit)
11201168
elif checknull_with_nat(value):
11211169
return NaT

pandas/core/tools/timedeltas.py

+11-43
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
import pandas as pd
77
from pandas._libs import tslibs
88
from pandas._libs.tslibs.timedeltas import (convert_to_timedelta64,
9-
array_to_timedelta64)
9+
array_to_timedelta64,
10+
parse_timedelta_unit)
1011

1112
from pandas.core.dtypes.common import (
1213
ensure_object,
@@ -23,8 +24,14 @@ def to_timedelta(arg, unit='ns', box=True, errors='raise'):
2324
Parameters
2425
----------
2526
arg : string, timedelta, list, tuple, 1-d array, or Series
26-
unit : unit of the arg (D,h,m,s,ms,us,ns) denote the unit, which is an
27-
integer/float number
27+
unit : string, {'Y', 'M', 'W', 'D', 'days', 'day',
28+
'hours', hour', 'hr', 'h', 'm', 'minute', 'min', 'minutes',
29+
'T', 'S', 'seconds', 'sec', 'second', 'ms',
30+
'milliseconds', 'millisecond', 'milli', 'millis', 'L',
31+
'us', 'microseconds', 'microsecond', 'micro', 'micros',
32+
'U', 'ns', 'nanoseconds', 'nano', 'nanos', 'nanosecond'
33+
'N'}, optional
34+
Denote the unit of the input, if input is an integer. Default 'ns'.
2835
box : boolean, default True
2936
- If True returns a Timedelta/TimedeltaIndex of the results
3037
- if False returns a np.timedelta64 or ndarray of values of dtype
@@ -69,7 +76,7 @@ def to_timedelta(arg, unit='ns', box=True, errors='raise'):
6976
pandas.DataFrame.astype : Cast argument to a specified dtype.
7077
pandas.to_datetime : Convert argument to datetime.
7178
"""
72-
unit = _validate_timedelta_unit(unit)
79+
unit = parse_timedelta_unit(unit)
7380

7481
if errors not in ('ignore', 'raise', 'coerce'):
7582
raise ValueError("errors must be one of 'ignore', "
@@ -99,45 +106,6 @@ def to_timedelta(arg, unit='ns', box=True, errors='raise'):
99106
box=box, errors=errors)
100107

101108

102-
_unit_map = {
103-
'Y': 'Y',
104-
'y': 'Y',
105-
'W': 'W',
106-
'w': 'W',
107-
'D': 'D',
108-
'd': 'D',
109-
'days': 'D',
110-
'Days': 'D',
111-
'day': 'D',
112-
'Day': 'D',
113-
'M': 'M',
114-
'H': 'h',
115-
'h': 'h',
116-
'm': 'm',
117-
'T': 'm',
118-
'S': 's',
119-
's': 's',
120-
'L': 'ms',
121-
'MS': 'ms',
122-
'ms': 'ms',
123-
'US': 'us',
124-
'us': 'us',
125-
'NS': 'ns',
126-
'ns': 'ns',
127-
}
128-
129-
130-
def _validate_timedelta_unit(arg):
131-
""" provide validation / translation for timedelta short units """
132-
try:
133-
return _unit_map[arg]
134-
except (KeyError, TypeError):
135-
if arg is None:
136-
return 'ns'
137-
raise ValueError("invalid timedelta unit {arg} provided"
138-
.format(arg=arg))
139-
140-
141109
def _coerce_scalar_to_timedelta_type(r, unit='ns', box=True, errors='raise'):
142110
"""Convert string 'r' to a timedelta object."""
143111

pandas/tests/scalar/timedelta/test_timedelta.py

+52-24
Original file line numberDiff line numberDiff line change
@@ -293,37 +293,65 @@ def test_nat_converters(self):
293293
assert to_timedelta('nat', box=False).astype('int64') == iNaT
294294
assert to_timedelta('nan', box=False).astype('int64') == iNaT
295295

296-
def testit(unit, transform):
297-
298-
# array
299-
result = to_timedelta(np.arange(5), unit=unit)
300-
expected = TimedeltaIndex([np.timedelta64(i, transform(unit))
296+
@pytest.mark.parametrize('units, np_unit',
297+
[(['Y', 'y'], 'Y'),
298+
(['M'], 'M'),
299+
(['W', 'w'], 'W'),
300+
(['D', 'd', 'days', 'day', 'Days', 'Day'], 'D'),
301+
(['m', 'minute', 'min', 'minutes', 't',
302+
'Minute', 'Min', 'Minutes', 'T'], 'm'),
303+
(['s', 'seconds', 'sec', 'second',
304+
'S', 'Seconds', 'Sec', 'Second'], 's'),
305+
(['ms', 'milliseconds', 'millisecond', 'milli',
306+
'millis', 'l', 'MS', 'Milliseconds',
307+
'Millisecond', 'Milli', 'Millis', 'L'], 'ms'),
308+
(['us', 'microseconds', 'microsecond', 'micro',
309+
'micros', 'u', 'US', 'Microseconds',
310+
'Microsecond', 'Micro', 'Micros', 'U'], 'us'),
311+
(['ns', 'nanoseconds', 'nanosecond', 'nano',
312+
'nanos', 'n', 'NS', 'Nanoseconds',
313+
'Nanosecond', 'Nano', 'Nanos', 'N'], 'ns')])
314+
def test_unit_parser(self, units, np_unit):
315+
# validate all units, GH 6855, GH 21762
316+
for unit in units:
317+
# array-likes
318+
expected = TimedeltaIndex([np.timedelta64(i, np_unit)
301319
for i in np.arange(5).tolist()])
302-
tm.assert_index_equal(result, expected)
320+
for wrapper in [np.array, list, pd.Index]:
321+
result = to_timedelta(wrapper(range(5)), unit=unit)
322+
tm.assert_index_equal(result, expected)
323+
result = TimedeltaIndex(wrapper(range(5)), unit=unit)
324+
tm.assert_index_equal(result, expected)
325+
326+
if unit == 'M':
327+
# M is treated as minutes in string repr
328+
expected = TimedeltaIndex([np.timedelta64(i, 'm')
329+
for i in np.arange(5).tolist()])
330+
331+
for wrapper in [np.array, list, pd.Index]:
332+
str_repr = ['{}{}'.format(x, unit) for x in np.arange(5)]
333+
result = to_timedelta(wrapper(str_repr))
334+
tm.assert_index_equal(result, expected)
335+
result = TimedeltaIndex(wrapper(str_repr))
336+
tm.assert_index_equal(result, expected)
303337

304338
# scalar
305-
result = to_timedelta(2, unit=unit)
306-
expected = Timedelta(np.timedelta64(2, transform(unit)).astype(
339+
expected = Timedelta(np.timedelta64(2, np_unit).astype(
307340
'timedelta64[ns]'))
308-
assert result == expected
309341

310-
# validate all units
311-
# GH 6855
312-
for unit in ['Y', 'M', 'W', 'D', 'y', 'w', 'd']:
313-
testit(unit, lambda x: x.upper())
314-
for unit in ['days', 'day', 'Day', 'Days']:
315-
testit(unit, lambda x: 'D')
316-
for unit in ['h', 'm', 's', 'ms', 'us', 'ns', 'H', 'S', 'MS', 'US',
317-
'NS']:
318-
testit(unit, lambda x: x.lower())
319-
320-
# offsets
342+
result = to_timedelta(2, unit=unit)
343+
assert result == expected
344+
result = Timedelta(2, unit=unit)
345+
assert result == expected
321346

322-
# m
323-
testit('T', lambda x: 'm')
347+
if unit == 'M':
348+
expected = Timedelta(np.timedelta64(2, 'm').astype(
349+
'timedelta64[ns]'))
324350

325-
# ms
326-
testit('L', lambda x: 'ms')
351+
result = to_timedelta('2{}'.format(unit))
352+
assert result == expected
353+
result = Timedelta('2{}'.format(unit))
354+
assert result == expected
327355

328356
def test_numeric_conversions(self):
329357
assert ct(0) == np.timedelta64(0, 'ns')

0 commit comments

Comments
 (0)