Skip to content

Commit d08dca6

Browse files
DEPR: Deprecate use of strings denoting units with 'M', 'Y' or 'y' in pd.to_timedelta (36666) (#36838)
1 parent 02a7420 commit d08dca6

File tree

8 files changed

+148
-103
lines changed

8 files changed

+148
-103
lines changed

doc/source/whatsnew/v1.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,7 @@ Deprecations
337337
- :meth:`Rolling.count` with ``min_periods=None`` will default to the size of the window in a future version (:issue:`31302`)
338338
- Deprecated slice-indexing on timezone-aware :class:`DatetimeIndex` with naive ``datetime`` objects, to match scalar indexing behavior (:issue:`36148`)
339339
- :meth:`Index.ravel` returning a ``np.ndarray`` is deprecated, in the future this will return a view on the same index (:issue:`19956`)
340+
- Deprecate use of strings denoting units with 'M', 'Y' or 'y' in :func:`~pandas.to_timedelta` (:issue:`36666`)
340341

341342
.. ---------------------------------------------------------------------------
342343

pandas/_libs/tslibs/timedeltas.pyx

+13-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import collections
2+
import warnings
23

34
import cython
45

@@ -466,6 +467,15 @@ cdef inline timedelta_from_spec(object number, object frac, object unit):
466467

467468
try:
468469
unit = ''.join(unit)
470+
471+
if unit in ["M", "Y", "y"]:
472+
warnings.warn(
473+
"Units 'M', 'Y' and 'y' do not represent unambiguous "
474+
"timedelta values and will be removed in a future version",
475+
FutureWarning,
476+
stacklevel=2,
477+
)
478+
469479
if unit == 'M':
470480
# To parse ISO 8601 string, 'M' should be treated as minute,
471481
# not month
@@ -634,9 +644,11 @@ cdef inline int64_t parse_iso_format_string(str ts) except? -1:
634644
else:
635645
neg = 1
636646
elif c in ['W', 'D', 'H', 'M']:
637-
unit.append(c)
638647
if c in ['H', 'M'] and len(number) > 2:
639648
raise ValueError(err_msg)
649+
if c == 'M':
650+
c = 'min'
651+
unit.append(c)
640652
r = timedelta_from_spec(number, '0', unit)
641653
result += timedelta_as_neg(r, neg)
642654

pandas/core/tools/timedeltas.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,12 @@ def to_timedelta(arg, unit=None, errors="raise"):
2525
Parameters
2626
----------
2727
arg : str, timedelta, list-like or Series
28-
The data to be converted to timedelta. The character M by itself,
29-
e.g. '1M', is treated as minute, not month. The characters Y and y
30-
are treated as the mean length of the Gregorian calendar year -
31-
365.2425 days or 365 days 5 hours 49 minutes 12 seconds.
28+
The data to be converted to timedelta.
29+
30+
.. deprecated:: 1.2
31+
Strings with units 'M', 'Y' and 'y' do not represent
32+
unambiguous timedelta values and will be removed in a future version
33+
3234
unit : str, optional
3335
Denotes the unit of the arg for numeric `arg`. Defaults to ``"ns"``.
3436

pandas/tests/indexes/interval/test_interval.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def test_properties(self, closed):
8686
[1, 1, 2, 5, 15, 53, 217, 1014, 5335, 31240, 201608],
8787
[-np.inf, -100, -10, 0.5, 1, 1.5, 3.8, 101, 202, np.inf],
8888
pd.to_datetime(["20170101", "20170202", "20170303", "20170404"]),
89-
pd.to_timedelta(["1ns", "2ms", "3s", "4M", "5H", "6D"]),
89+
pd.to_timedelta(["1ns", "2ms", "3s", "4min", "5H", "6D"]),
9090
],
9191
)
9292
def test_length(self, closed, breaks):

pandas/tests/scalar/interval/test_interval.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,8 @@ def test_hash(self, interval):
7979
(-np.inf, np.inf, np.inf),
8080
(Timedelta("0 days"), Timedelta("5 days"), Timedelta("5 days")),
8181
(Timedelta("10 days"), Timedelta("10 days"), Timedelta("0 days")),
82-
(Timedelta("1H10M"), Timedelta("5H5M"), Timedelta("3H55M")),
83-
(Timedelta("5S"), Timedelta("1H"), Timedelta("59M55S")),
82+
(Timedelta("1H10min"), Timedelta("5H5min"), Timedelta("3H55min")),
83+
(Timedelta("5S"), Timedelta("1H"), Timedelta("59min55S")),
8484
],
8585
)
8686
def test_length(self, left, right, expected):

pandas/tests/scalar/timedelta/test_timedelta.py

+103-94
Original file line numberDiff line numberDiff line change
@@ -160,108 +160,117 @@ def test_nat_converters(self):
160160
assert result.astype("int64") == iNaT
161161

162162
@pytest.mark.parametrize(
163-
"units, np_unit",
164-
[
165-
(["W", "w"], "W"),
166-
(["D", "d", "days", "day", "Days", "Day"], "D"),
167-
(
168-
["m", "minute", "min", "minutes", "t", "Minute", "Min", "Minutes", "T"],
163+
"unit, np_unit",
164+
[(value, "W") for value in ["W", "w"]]
165+
+ [(value, "D") for value in ["D", "d", "days", "day", "Days", "Day"]]
166+
+ [
167+
(value, "m")
168+
for value in [
169169
"m",
170-
),
171-
(["s", "seconds", "sec", "second", "S", "Seconds", "Sec", "Second"], "s"),
172-
(
173-
[
174-
"ms",
175-
"milliseconds",
176-
"millisecond",
177-
"milli",
178-
"millis",
179-
"l",
180-
"MS",
181-
"Milliseconds",
182-
"Millisecond",
183-
"Milli",
184-
"Millis",
185-
"L",
186-
],
170+
"minute",
171+
"min",
172+
"minutes",
173+
"t",
174+
"Minute",
175+
"Min",
176+
"Minutes",
177+
"T",
178+
]
179+
]
180+
+ [
181+
(value, "s")
182+
for value in [
183+
"s",
184+
"seconds",
185+
"sec",
186+
"second",
187+
"S",
188+
"Seconds",
189+
"Sec",
190+
"Second",
191+
]
192+
]
193+
+ [
194+
(value, "ms")
195+
for value in [
187196
"ms",
188-
),
189-
(
190-
[
191-
"us",
192-
"microseconds",
193-
"microsecond",
194-
"micro",
195-
"micros",
196-
"u",
197-
"US",
198-
"Microseconds",
199-
"Microsecond",
200-
"Micro",
201-
"Micros",
202-
"U",
203-
],
197+
"milliseconds",
198+
"millisecond",
199+
"milli",
200+
"millis",
201+
"l",
202+
"MS",
203+
"Milliseconds",
204+
"Millisecond",
205+
"Milli",
206+
"Millis",
207+
"L",
208+
]
209+
]
210+
+ [
211+
(value, "us")
212+
for value in [
204213
"us",
205-
),
206-
(
207-
[
208-
"ns",
209-
"nanoseconds",
210-
"nanosecond",
211-
"nano",
212-
"nanos",
213-
"n",
214-
"NS",
215-
"Nanoseconds",
216-
"Nanosecond",
217-
"Nano",
218-
"Nanos",
219-
"N",
220-
],
214+
"microseconds",
215+
"microsecond",
216+
"micro",
217+
"micros",
218+
"u",
219+
"US",
220+
"Microseconds",
221+
"Microsecond",
222+
"Micro",
223+
"Micros",
224+
"U",
225+
]
226+
]
227+
+ [
228+
(value, "ns")
229+
for value in [
221230
"ns",
222-
),
231+
"nanoseconds",
232+
"nanosecond",
233+
"nano",
234+
"nanos",
235+
"n",
236+
"NS",
237+
"Nanoseconds",
238+
"Nanosecond",
239+
"Nano",
240+
"Nanos",
241+
"N",
242+
]
223243
],
224244
)
225245
@pytest.mark.parametrize("wrapper", [np.array, list, pd.Index])
226-
def test_unit_parser(self, units, np_unit, wrapper):
246+
def test_unit_parser(self, unit, np_unit, wrapper):
227247
# validate all units, GH 6855, GH 21762
228-
for unit in units:
229-
# array-likes
230-
expected = TimedeltaIndex(
231-
[np.timedelta64(i, np_unit) for i in np.arange(5).tolist()]
232-
)
233-
result = to_timedelta(wrapper(range(5)), unit=unit)
234-
tm.assert_index_equal(result, expected)
235-
result = TimedeltaIndex(wrapper(range(5)), unit=unit)
236-
tm.assert_index_equal(result, expected)
237-
238-
if unit == "M":
239-
# M is treated as minutes in string repr
240-
expected = TimedeltaIndex(
241-
[np.timedelta64(i, "m") for i in np.arange(5).tolist()]
242-
)
243-
244-
str_repr = [f"{x}{unit}" for x in np.arange(5)]
245-
result = to_timedelta(wrapper(str_repr))
246-
tm.assert_index_equal(result, expected)
247-
result = TimedeltaIndex(wrapper(str_repr))
248-
tm.assert_index_equal(result, expected)
249-
250-
# scalar
251-
expected = Timedelta(np.timedelta64(2, np_unit).astype("timedelta64[ns]"))
252-
253-
result = to_timedelta(2, unit=unit)
254-
assert result == expected
255-
result = Timedelta(2, unit=unit)
256-
assert result == expected
257-
258-
if unit == "M":
259-
expected = Timedelta(np.timedelta64(2, "m").astype("timedelta64[ns]"))
260-
261-
result = to_timedelta(f"2{unit}")
262-
assert result == expected
263-
result = Timedelta(f"2{unit}")
264-
assert result == expected
248+
# array-likes
249+
expected = TimedeltaIndex(
250+
[np.timedelta64(i, np_unit) for i in np.arange(5).tolist()]
251+
)
252+
result = to_timedelta(wrapper(range(5)), unit=unit)
253+
tm.assert_index_equal(result, expected)
254+
result = TimedeltaIndex(wrapper(range(5)), unit=unit)
255+
tm.assert_index_equal(result, expected)
256+
257+
str_repr = [f"{x}{unit}" for x in np.arange(5)]
258+
result = to_timedelta(wrapper(str_repr))
259+
tm.assert_index_equal(result, expected)
260+
result = to_timedelta(wrapper(str_repr))
261+
tm.assert_index_equal(result, expected)
262+
263+
# scalar
264+
expected = Timedelta(np.timedelta64(2, np_unit).astype("timedelta64[ns]"))
265+
result = to_timedelta(2, unit=unit)
266+
assert result == expected
267+
result = Timedelta(2, unit=unit)
268+
assert result == expected
269+
270+
result = to_timedelta(f"2{unit}")
271+
assert result == expected
272+
result = Timedelta(f"2{unit}")
273+
assert result == expected
265274

266275
@pytest.mark.parametrize("unit", ["Y", "y", "M"])
267276
def test_unit_m_y_raises(self, unit):

pandas/tests/series/methods/test_shift.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def test_shift_always_copy(self, ser, shift_size):
3232
# GH22397
3333
assert ser.shift(shift_size) is not ser
3434

35-
@pytest.mark.parametrize("move_by_freq", [pd.Timedelta("1D"), pd.Timedelta("1M")])
35+
@pytest.mark.parametrize("move_by_freq", [pd.Timedelta("1D"), pd.Timedelta("1min")])
3636
def test_datetime_shift_always_copy(self, move_by_freq):
3737
# GH#22397
3838
ser = Series(range(5), index=date_range("2017", periods=5))

pandas/tests/tools/test_to_timedelta.py

+21
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,27 @@ def test_to_timedelta_invalid(self):
121121
invalid_data, to_timedelta(invalid_data, errors="ignore")
122122
)
123123

124+
@pytest.mark.parametrize(
125+
"val, warning",
126+
[
127+
("1M", FutureWarning),
128+
("1 M", FutureWarning),
129+
("1Y", FutureWarning),
130+
("1 Y", FutureWarning),
131+
("1y", FutureWarning),
132+
("1 y", FutureWarning),
133+
("1m", None),
134+
("1 m", None),
135+
("1 day", None),
136+
("2day", None),
137+
],
138+
)
139+
def test_unambiguous_timedelta_values(self, val, warning):
140+
# GH36666 Deprecate use of strings denoting units with 'M', 'Y', 'm' or 'y'
141+
# in pd.to_timedelta
142+
with tm.assert_produces_warning(warning, check_stacklevel=False):
143+
to_timedelta(val)
144+
124145
def test_to_timedelta_via_apply(self):
125146
# GH 5458
126147
expected = Series([np.timedelta64(1, "s")])

0 commit comments

Comments
 (0)