Skip to content

Commit 08a7a9e

Browse files
authored
BUG: to_datetime with Y or M unit not matching Timestamp (#50870)
* BUG: to_datetime with Y or M unit not matching Timestamp * GH ref
1 parent 448a023 commit 08a7a9e

File tree

6 files changed

+62
-40
lines changed

6 files changed

+62
-40
lines changed

doc/source/whatsnew/v2.0.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -957,6 +957,8 @@ Datetimelike
957957
- Bug in :func:`Timestamp.utctimetuple` raising a ``TypeError`` (:issue:`32174`)
958958
- Bug in :func:`to_datetime` was raising ``ValueError`` when parsing mixed-offset :class:`Timestamp` with ``errors='ignore'`` (:issue:`50585`)
959959
- Bug in :func:`to_datetime` was incorrectly handling floating-point inputs within 1 ``unit`` of the overflow boundaries (:issue:`50183`)
960+
- Bug in :func:`to_datetime` with unit of "Y" or "M" giving incorrect results, not matching pointwise :class:`Timestamp` results (:issue:`50870`)
961+
-
960962

961963
Timedelta
962964
^^^^^^^^^

pandas/_libs/tslib.pyx

+3-16
Original file line numberDiff line numberDiff line change
@@ -220,19 +220,6 @@ def format_array_from_datetime(
220220
return result
221221

222222

223-
cdef int64_t _wrapped_cast_from_unit(object val, str unit) except? -1:
224-
"""
225-
Call cast_from_unit and re-raise OverflowError as OutOfBoundsDatetime
226-
"""
227-
# See also timedeltas._maybe_cast_from_unit
228-
try:
229-
return cast_from_unit(val, unit)
230-
except OverflowError as err:
231-
raise OutOfBoundsDatetime(
232-
f"cannot convert input {val} with the unit '{unit}'"
233-
) from err
234-
235-
236223
def array_with_unit_to_datetime(
237224
ndarray[object] values,
238225
str unit,
@@ -302,7 +289,7 @@ def array_with_unit_to_datetime(
302289
if val != val or val == NPY_NAT:
303290
iresult[i] = NPY_NAT
304291
else:
305-
iresult[i] = _wrapped_cast_from_unit(val, unit)
292+
iresult[i] = cast_from_unit(val, unit)
306293

307294
elif isinstance(val, str):
308295
if len(val) == 0 or val in nat_strings:
@@ -317,7 +304,7 @@ def array_with_unit_to_datetime(
317304
f"non convertible value {val} with the unit '{unit}'"
318305
)
319306

320-
iresult[i] = _wrapped_cast_from_unit(fval, unit)
307+
iresult[i] = cast_from_unit(fval, unit)
321308

322309
else:
323310
# TODO: makes more sense as TypeError, but that would be an
@@ -362,7 +349,7 @@ cdef _array_with_unit_to_datetime_object_fallback(ndarray[object] values, str un
362349
else:
363350
try:
364351
oresult[i] = Timestamp(val, unit=unit)
365-
except OverflowError:
352+
except OutOfBoundsDatetime:
366353
oresult[i] = val
367354

368355
elif isinstance(val, str):

pandas/_libs/tslibs/conversion.pyx

+30-23
Original file line numberDiff line numberDiff line change
@@ -108,22 +108,41 @@ cdef int64_t cast_from_unit(object ts, str unit) except? -1:
108108
if ts is None:
109109
return m
110110

111-
if unit in ["Y", "M"] and is_float_object(ts) and not ts.is_integer():
112-
# GH#47267 it is clear that 2 "M" corresponds to 1970-02-01,
113-
# but not clear what 2.5 "M" corresponds to, so we will
114-
# disallow that case.
115-
raise ValueError(
116-
f"Conversion of non-round float with unit={unit} "
117-
"is ambiguous"
118-
)
111+
if unit in ["Y", "M"]:
112+
if is_float_object(ts) and not ts.is_integer():
113+
# GH#47267 it is clear that 2 "M" corresponds to 1970-02-01,
114+
# but not clear what 2.5 "M" corresponds to, so we will
115+
# disallow that case.
116+
raise ValueError(
117+
f"Conversion of non-round float with unit={unit} "
118+
"is ambiguous"
119+
)
120+
# GH#47266 go through np.datetime64 to avoid weird results e.g. with "Y"
121+
# and 150 we'd get 2120-01-01 09:00:00
122+
if is_float_object(ts):
123+
ts = int(ts)
124+
dt64obj = np.datetime64(ts, unit)
125+
return get_datetime64_nanos(dt64obj, NPY_FR_ns)
119126

120127
# cast the unit, multiply base/frace separately
121128
# to avoid precision issues from float -> int
122-
base = <int64_t>ts
129+
try:
130+
base = <int64_t>ts
131+
except OverflowError as err:
132+
raise OutOfBoundsDatetime(
133+
f"cannot convert input {ts} with the unit '{unit}'"
134+
) from err
135+
123136
frac = ts - base
124137
if p:
125138
frac = round(frac, p)
126-
return <int64_t>(base * m) + <int64_t>(frac * m)
139+
140+
try:
141+
return <int64_t>(base * m) + <int64_t>(frac * m)
142+
except OverflowError as err:
143+
raise OutOfBoundsDatetime(
144+
f"cannot convert input {ts} with the unit '{unit}'"
145+
) from err
127146

128147

129148
cpdef inline (int64_t, int) precision_from_unit(str unit):
@@ -278,25 +297,13 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
278297
if ts == NPY_NAT:
279298
obj.value = NPY_NAT
280299
else:
281-
if unit in ["Y", "M"]:
282-
# GH#47266 cast_from_unit leads to weird results e.g. with "Y"
283-
# and 150 we'd get 2120-01-01 09:00:00
284-
ts = np.datetime64(ts, unit)
285-
return convert_to_tsobject(ts, tz, None, False, False)
286-
287-
ts = ts * cast_from_unit(None, unit)
300+
ts = cast_from_unit(ts, unit)
288301
obj.value = ts
289302
pandas_datetime_to_datetimestruct(ts, NPY_FR_ns, &obj.dts)
290303
elif is_float_object(ts):
291304
if ts != ts or ts == NPY_NAT:
292305
obj.value = NPY_NAT
293306
else:
294-
if unit in ["Y", "M"]:
295-
if ts == int(ts):
296-
# GH#47266 Avoid cast_from_unit, which would give weird results
297-
# e.g. with "Y" and 150.0 we'd get 2120-01-01 09:00:00
298-
return convert_to_tsobject(int(ts), tz, unit, False, False)
299-
300307
ts = cast_from_unit(ts, unit)
301308
obj.value = ts
302309
pandas_datetime_to_datetimestruct(ts, NPY_FR_ns, &obj.dts)

pandas/_libs/tslibs/timedeltas.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -373,7 +373,7 @@ cdef _maybe_cast_from_unit(ts, str unit):
373373
# assert unit not in ["Y", "y", "M"]
374374
try:
375375
ts = cast_from_unit(ts, unit)
376-
except OverflowError as err:
376+
except OutOfBoundsDatetime as err:
377377
raise OutOfBoundsTimedelta(
378378
f"Cannot cast {ts} from {unit} to 'ns' without overflow."
379379
) from err

pandas/tests/scalar/timestamp/test_constructors.py

+9
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,15 @@ def test_constructor_from_date_second_reso(self):
5555
ts = Timestamp(obj)
5656
assert ts.unit == "s"
5757

58+
@pytest.mark.parametrize("typ", [int, float])
59+
def test_construct_from_int_float_with_unit_out_of_bound_raises(self, typ):
60+
# GH#50870 make sure we get a OutOfBoundsDatetime instead of OverflowError
61+
val = typ(150000000)
62+
63+
msg = f"cannot convert input {val} with the unit 'D'"
64+
with pytest.raises(OutOfBoundsDatetime, match=msg):
65+
Timestamp(val, unit="D")
66+
5867
@pytest.mark.parametrize("typ", [int, float])
5968
def test_constructor_int_float_with_YM_unit(self, typ):
6069
# GH#47266 avoid the conversions in cast_from_unit

pandas/tests/tools/test_to_datetime.py

+17
Original file line numberDiff line numberDiff line change
@@ -1701,6 +1701,23 @@ def test_to_datetime_fixed_offset(self):
17011701

17021702

17031703
class TestToDatetimeUnit:
1704+
@pytest.mark.parametrize("unit", ["Y", "M"])
1705+
@pytest.mark.parametrize("item", [150, float(150)])
1706+
def test_to_datetime_month_or_year_unit_int(self, cache, unit, item):
1707+
# GH#50870 Note we have separate tests that pd.Timestamp gets these right
1708+
ts = Timestamp(item, unit=unit)
1709+
expected = DatetimeIndex([ts])
1710+
1711+
result = to_datetime([item], unit=unit, cache=cache)
1712+
tm.assert_index_equal(result, expected)
1713+
1714+
# TODO: this should also work
1715+
# result = to_datetime(np.array([item]), unit=unit, cache=cache)
1716+
# tm.assert_index_equal(result, expected)
1717+
1718+
result = to_datetime(np.array([item], dtype=object), unit=unit, cache=cache)
1719+
tm.assert_index_equal(result, expected)
1720+
17041721
@pytest.mark.parametrize("unit", ["Y", "M"])
17051722
def test_to_datetime_month_or_year_unit_non_round_float(self, cache, unit):
17061723
# GH#50301

0 commit comments

Comments
 (0)