Skip to content

Commit 11c0d28

Browse files
sinhrksjreback
authored andcommitted
BUG: Cleanup timedelta offset (#23439)
1 parent 6a5c34c commit 11c0d28

File tree

4 files changed

+120
-75
lines changed

4 files changed

+120
-75
lines changed

doc/source/whatsnew/v0.24.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -1131,6 +1131,8 @@ Timedelta
11311131
- Fixed bug in adding a :class:`DataFrame` with all-`timedelta64[ns]` dtypes to a :class:`DataFrame` with all-integer dtypes returning incorrect results instead of raising ``TypeError`` (:issue:`22696`)
11321132
- Bug in :class:`TimedeltaIndex` where adding a timezone-aware datetime scalar incorrectly returned a timezone-naive :class:`DatetimeIndex` (:issue:`23215`)
11331133
- Bug in :class:`TimedeltaIndex` where adding ``np.timedelta64('NaT')`` incorrectly returned an all-`NaT` :class:`DatetimeIndex` instead of an all-`NaT` :class:`TimedeltaIndex` (:issue:`23215`)
1134+
- Bug in :class:`Timedelta` and :func:`to_timedelta()` have inconsistencies in supported unit string (:issue:`21762`)
1135+
11341136

11351137
Timezones
11361138
^^^^^^^^^

pandas/_libs/tslibs/timedeltas.pyx

+57-9
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,16 @@ Components = collections.namedtuple('Components', [
4545
'days', 'hours', 'minutes', 'seconds',
4646
'milliseconds', 'microseconds', 'nanoseconds'])
4747

48-
cdef dict timedelta_abbrevs = { 'D': 'd',
49-
'd': 'd',
50-
'days': 'd',
51-
'day': 'd',
48+
49+
cdef dict timedelta_abbrevs = { 'Y': 'Y',
50+
'y': 'Y',
51+
'M': 'M',
52+
'W': 'W',
53+
'w': 'W',
54+
'D': 'D',
55+
'd': 'D',
56+
'days': 'D',
57+
'day': 'D',
5258
'hours': 'h',
5359
'hour': 'h',
5460
'hr': 'h',
@@ -57,6 +63,7 @@ cdef dict timedelta_abbrevs = { 'D': 'd',
5763
'minute': 'm',
5864
'min': 'm',
5965
'minutes': 'm',
66+
't': 'm',
6067
's': 's',
6168
'seconds': 's',
6269
'sec': 's',
@@ -66,16 +73,19 @@ cdef dict timedelta_abbrevs = { 'D': 'd',
6673
'millisecond': 'ms',
6774
'milli': 'ms',
6875
'millis': 'ms',
76+
'l': 'ms',
6977
'us': 'us',
7078
'microseconds': 'us',
7179
'microsecond': 'us',
7280
'micro': 'us',
7381
'micros': 'us',
82+
'u': 'us',
7483
'ns': 'ns',
7584
'nanoseconds': 'ns',
7685
'nano': 'ns',
7786
'nanos': 'ns',
78-
'nanosecond': 'ns'}
87+
'nanosecond': 'ns',
88+
'n': 'ns'}
7989

8090
_no_input = object()
8191

@@ -140,7 +150,8 @@ cpdef int64_t delta_to_nanoseconds(delta) except? -1:
140150

141151
cpdef convert_to_timedelta64(object ts, object unit):
142152
"""
143-
Convert an incoming object to a timedelta64 if possible
153+
Convert an incoming object to a timedelta64 if possible.
154+
Before calling, unit must be standardized to avoid repeated unit conversion
144155
145156
Handle these types of objects:
146157
- timedelta/Timedelta
@@ -228,6 +239,7 @@ def array_to_timedelta64(object[:] values, unit='ns', errors='raise'):
228239
for i in range(n):
229240
result[i] = parse_timedelta_string(values[i])
230241
except:
242+
unit = parse_timedelta_unit(unit)
231243
for i in range(n):
232244
try:
233245
result[i] = convert_to_timedelta64(values[i], unit)
@@ -247,7 +259,16 @@ cdef inline int64_t cast_from_unit(object ts, object unit) except? -1:
247259
int64_t m
248260
int p
249261

250-
if unit == 'D' or unit == 'd':
262+
if unit == 'Y':
263+
m = 1000000000L * 31556952
264+
p = 9
265+
elif unit == 'M':
266+
m = 1000000000L * 2629746
267+
p = 9
268+
elif unit == 'W':
269+
m = 1000000000L * 86400 * 7
270+
p = 9
271+
elif unit == 'D' or unit == 'd':
251272
m = 1000000000L * 86400
252273
p = 9
253274
elif unit == 'h':
@@ -485,14 +506,34 @@ cdef inline timedelta_from_spec(object number, object frac, object unit):
485506

486507
try:
487508
unit = ''.join(unit)
488-
unit = timedelta_abbrevs[unit.lower()]
509+
if unit == 'M':
510+
# To parse ISO 8601 string, 'M' should be treated as minute,
511+
# not month
512+
unit = 'm'
513+
unit = parse_timedelta_unit(unit)
489514
except KeyError:
490515
raise ValueError("invalid abbreviation: {unit}".format(unit=unit))
491516

492517
n = ''.join(number) + '.' + ''.join(frac)
493518
return cast_from_unit(float(n), unit)
494519

495520

521+
cpdef inline object parse_timedelta_unit(object unit):
522+
"""
523+
Parameters
524+
----------
525+
unit : an unit string
526+
"""
527+
if unit is None:
528+
return 'ns'
529+
elif unit == 'M':
530+
return unit
531+
try:
532+
return timedelta_abbrevs[unit.lower()]
533+
except (KeyError, AttributeError):
534+
raise ValueError("invalid unit abbreviation: {unit}"
535+
.format(unit=unit))
536+
496537
# ----------------------------------------------------------------------
497538
# Timedelta ops utilities
498539

@@ -1070,7 +1111,13 @@ class Timedelta(_Timedelta):
10701111
Parameters
10711112
----------
10721113
value : Timedelta, timedelta, np.timedelta64, string, or integer
1073-
unit : string, {'ns', 'us', 'ms', 's', 'm', 'h', 'D'}, optional
1114+
unit : string, {'Y', 'M', 'W', 'D', 'days', 'day',
1115+
'hours', hour', 'hr', 'h', 'm', 'minute', 'min', 'minutes',
1116+
'T', 'S', 'seconds', 'sec', 'second', 'ms',
1117+
'milliseconds', 'millisecond', 'milli', 'millis', 'L',
1118+
'us', 'microseconds', 'microsecond', 'micro', 'micros',
1119+
'U', 'ns', 'nanoseconds', 'nano', 'nanos', 'nanosecond'
1120+
'N'}, optional
10741121
Denote the unit of the input, if input is an integer. Default 'ns'.
10751122
days, seconds, microseconds,
10761123
milliseconds, minutes, hours, weeks : numeric, optional
@@ -1121,6 +1168,7 @@ class Timedelta(_Timedelta):
11211168
value = np.timedelta64(delta_to_nanoseconds(value.delta), 'ns')
11221169
elif is_integer_object(value) or is_float_object(value):
11231170
# unit=None is de-facto 'ns'
1171+
unit = parse_timedelta_unit(unit)
11241172
value = convert_to_timedelta64(value, unit)
11251173
elif checknull_with_nat(value):
11261174
return NaT

pandas/core/tools/timedeltas.py

+11-43
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
import pandas as pd
77
from pandas._libs import tslibs
88
from pandas._libs.tslibs.timedeltas import (convert_to_timedelta64,
9-
array_to_timedelta64)
9+
array_to_timedelta64,
10+
parse_timedelta_unit)
1011

1112
from pandas.core.dtypes.common import (
1213
ensure_object,
@@ -23,8 +24,14 @@ def to_timedelta(arg, unit='ns', box=True, errors='raise'):
2324
Parameters
2425
----------
2526
arg : string, timedelta, list, tuple, 1-d array, or Series
26-
unit : unit of the arg (D,h,m,s,ms,us,ns) denote the unit, which is an
27-
integer/float number
27+
unit : string, {'Y', 'M', 'W', 'D', 'days', 'day',
28+
'hours', hour', 'hr', 'h', 'm', 'minute', 'min', 'minutes',
29+
'T', 'S', 'seconds', 'sec', 'second', 'ms',
30+
'milliseconds', 'millisecond', 'milli', 'millis', 'L',
31+
'us', 'microseconds', 'microsecond', 'micro', 'micros',
32+
'U', 'ns', 'nanoseconds', 'nano', 'nanos', 'nanosecond'
33+
'N'}, optional
34+
Denote the unit of the input, if input is an integer. Default 'ns'.
2835
box : boolean, default True
2936
- If True returns a Timedelta/TimedeltaIndex of the results
3037
- if False returns a np.timedelta64 or ndarray of values of dtype
@@ -69,7 +76,7 @@ def to_timedelta(arg, unit='ns', box=True, errors='raise'):
6976
pandas.DataFrame.astype : Cast argument to a specified dtype.
7077
pandas.to_datetime : Convert argument to datetime.
7178
"""
72-
unit = _validate_timedelta_unit(unit)
79+
unit = parse_timedelta_unit(unit)
7380

7481
if errors not in ('ignore', 'raise', 'coerce'):
7582
raise ValueError("errors must be one of 'ignore', "
@@ -99,45 +106,6 @@ def to_timedelta(arg, unit='ns', box=True, errors='raise'):
99106
box=box, errors=errors)
100107

101108

102-
_unit_map = {
103-
'Y': 'Y',
104-
'y': 'Y',
105-
'W': 'W',
106-
'w': 'W',
107-
'D': 'D',
108-
'd': 'D',
109-
'days': 'D',
110-
'Days': 'D',
111-
'day': 'D',
112-
'Day': 'D',
113-
'M': 'M',
114-
'H': 'h',
115-
'h': 'h',
116-
'm': 'm',
117-
'T': 'm',
118-
'S': 's',
119-
's': 's',
120-
'L': 'ms',
121-
'MS': 'ms',
122-
'ms': 'ms',
123-
'US': 'us',
124-
'us': 'us',
125-
'NS': 'ns',
126-
'ns': 'ns',
127-
}
128-
129-
130-
def _validate_timedelta_unit(arg):
131-
""" provide validation / translation for timedelta short units """
132-
try:
133-
return _unit_map[arg]
134-
except (KeyError, TypeError):
135-
if arg is None:
136-
return 'ns'
137-
raise ValueError("invalid timedelta unit {arg} provided"
138-
.format(arg=arg))
139-
140-
141109
def _coerce_scalar_to_timedelta_type(r, unit='ns', box=True, errors='raise'):
142110
"""Convert string 'r' to a timedelta object."""
143111

pandas/tests/scalar/timedelta/test_timedelta.py

+50-23
Original file line numberDiff line numberDiff line change
@@ -293,37 +293,64 @@ def test_nat_converters(self):
293293
assert to_timedelta('nat', box=False).astype('int64') == iNaT
294294
assert to_timedelta('nan', box=False).astype('int64') == iNaT
295295

296-
def testit(unit, transform):
297-
298-
# array
299-
result = to_timedelta(np.arange(5), unit=unit)
300-
expected = TimedeltaIndex([np.timedelta64(i, transform(unit))
296+
@pytest.mark.parametrize('units, np_unit',
297+
[(['Y', 'y'], 'Y'),
298+
(['M'], 'M'),
299+
(['W', 'w'], 'W'),
300+
(['D', 'd', 'days', 'day', 'Days', 'Day'], 'D'),
301+
(['m', 'minute', 'min', 'minutes', 't',
302+
'Minute', 'Min', 'Minutes', 'T'], 'm'),
303+
(['s', 'seconds', 'sec', 'second',
304+
'S', 'Seconds', 'Sec', 'Second'], 's'),
305+
(['ms', 'milliseconds', 'millisecond', 'milli',
306+
'millis', 'l', 'MS', 'Milliseconds',
307+
'Millisecond', 'Milli', 'Millis', 'L'], 'ms'),
308+
(['us', 'microseconds', 'microsecond', 'micro',
309+
'micros', 'u', 'US', 'Microseconds',
310+
'Microsecond', 'Micro', 'Micros', 'U'], 'us'),
311+
(['ns', 'nanoseconds', 'nanosecond', 'nano',
312+
'nanos', 'n', 'NS', 'Nanoseconds',
313+
'Nanosecond', 'Nano', 'Nanos', 'N'], 'ns')])
314+
@pytest.mark.parametrize('wrapper', [np.array, list, pd.Index])
315+
def test_unit_parser(self, units, np_unit, wrapper):
316+
# validate all units, GH 6855, GH 21762
317+
for unit in units:
318+
# array-likes
319+
expected = TimedeltaIndex([np.timedelta64(i, np_unit)
301320
for i in np.arange(5).tolist()])
321+
result = to_timedelta(wrapper(range(5)), unit=unit)
322+
tm.assert_index_equal(result, expected)
323+
result = TimedeltaIndex(wrapper(range(5)), unit=unit)
324+
tm.assert_index_equal(result, expected)
325+
326+
if unit == 'M':
327+
# M is treated as minutes in string repr
328+
expected = TimedeltaIndex([np.timedelta64(i, 'm')
329+
for i in np.arange(5).tolist()])
330+
331+
str_repr = ['{}{}'.format(x, unit) for x in np.arange(5)]
332+
result = to_timedelta(wrapper(str_repr))
333+
tm.assert_index_equal(result, expected)
334+
result = TimedeltaIndex(wrapper(str_repr))
302335
tm.assert_index_equal(result, expected)
303336

304337
# scalar
305-
result = to_timedelta(2, unit=unit)
306-
expected = Timedelta(np.timedelta64(2, transform(unit)).astype(
338+
expected = Timedelta(np.timedelta64(2, np_unit).astype(
307339
'timedelta64[ns]'))
308-
assert result == expected
309-
310-
# validate all units
311-
# GH 6855
312-
for unit in ['Y', 'M', 'W', 'D', 'y', 'w', 'd']:
313-
testit(unit, lambda x: x.upper())
314-
for unit in ['days', 'day', 'Day', 'Days']:
315-
testit(unit, lambda x: 'D')
316-
for unit in ['h', 'm', 's', 'ms', 'us', 'ns', 'H', 'S', 'MS', 'US',
317-
'NS']:
318-
testit(unit, lambda x: x.lower())
319340

320-
# offsets
341+
result = to_timedelta(2, unit=unit)
342+
assert result == expected
343+
result = Timedelta(2, unit=unit)
344+
assert result == expected
321345

322-
# m
323-
testit('T', lambda x: 'm')
346+
if unit == 'M':
347+
expected = Timedelta(np.timedelta64(2, 'm').astype(
348+
'timedelta64[ns]'))
324349

325-
# ms
326-
testit('L', lambda x: 'ms')
350+
result = to_timedelta('2{}'.format(unit))
351+
assert result == expected
352+
result = Timedelta('2{}'.format(unit))
353+
assert result == expected
327354

328355
def test_numeric_conversions(self):
329356
assert ct(0) == np.timedelta64(0, 'ns')

0 commit comments

Comments
 (0)