Skip to content

BUG: to_datetime with M or Y unit and non-round float #50301

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Dec 28, 2022
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -489,6 +489,7 @@ Other API changes
new DataFrame (shallow copy) instead of the original DataFrame, consistent with other
methods to get a full slice (for example ``df.loc[:]`` or ``df[:]``) (:issue:`49469`)
- Disallow computing ``cumprod`` for :class:`Timedelta` object; previously this returned incorrect values (:issue:`50246`)
- :func:`to_datetime` with ``unit`` of either "Y" or "M" will now raise if a sequence contains a non-round ``float`` value, matching the ``Timestamp`` behavior (:issue:`??`)
-

.. ---------------------------------------------------------------------------
Expand Down
46 changes: 44 additions & 2 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -271,9 +271,13 @@ def array_with_unit_to_datetime(
ndarray[int64_t] iresult
ndarray[object] oresult
object tz = None
bint is_ym
float fval

assert is_ignore or is_coerce or is_raise

is_ym = unit in "YM"

if unit == "ns":
result, tz = array_to_datetime(
values.astype(object, copy=False),
Expand All @@ -298,6 +302,18 @@ def array_with_unit_to_datetime(
if val != val or val == NPY_NAT:
iresult[i] = NPY_NAT
else:
if is_ym and is_float_object(val) and not val.is_integer():
# Analogous to GH#47266 for Timestamp
if is_raise:
raise ValueError(
f"Conversion of non-round float with unit={unit} "
"is ambiguous"
)
elif is_ignore:
raise AssertionError
iresult[i] = NPY_NAT
continue

try:
iresult[i] = cast_from_unit(val, unit)
except OverflowError:
Expand All @@ -314,8 +330,33 @@ def array_with_unit_to_datetime(
iresult[i] = NPY_NAT

else:

try:
fval = float(val)
except ValueError:
if is_raise:
raise ValueError(
f"non convertible value {val} with the unit '{unit}'"
)
elif is_ignore:
raise AssertionError
iresult[i] = NPY_NAT
continue

if is_ym and not fval.is_integer():
# Analogous to GH#47266 for Timestamp
if is_raise:
raise ValueError(
f"Conversion of non-round float with unit={unit} "
"is ambiguous"
)
elif is_ignore:
raise AssertionError
iresult[i] = NPY_NAT
continue
Comment on lines +338 to +348
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sorry to block this, but is there a test that hits this?

I can see tests when val is a non-round float, but what about when it's a string containing a non-round float (e.g. '1.5'), which I believe is what would reach this branch?


try:
iresult[i] = cast_from_unit(float(val), unit)
iresult[i] = cast_from_unit(fval, unit)
except ValueError:
if is_raise:
raise ValueError(
Expand Down Expand Up @@ -353,6 +394,7 @@ def array_with_unit_to_datetime(
# and are in ignore mode
# redo as object

# TODO: fix subtle differences between this and no-unit code
oresult = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_OBJECT, 0)
for i in range(n):
val = values[i]
Expand All @@ -365,7 +407,7 @@ def array_with_unit_to_datetime(
oresult[i] = <object>NaT
else:
try:
oresult[i] = Timestamp(cast_from_unit(val, unit))
oresult[i] = Timestamp(val, unit=unit)
except OverflowError:
oresult[i] = val

Expand Down
23 changes: 23 additions & 0 deletions pandas/tests/tools/test_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -1489,6 +1489,29 @@ def test_to_datetime_fixed_offset(self):


class TestToDatetimeUnit:
@pytest.mark.parametrize("unit", ["Y", "M"])
def test_to_datetime_month_or_year_unit_non_round_float(self, cache, unit):
# GH#50301
# Match Timestamp behavior in disallowing non-round floats with
# Y or M unit
msg = f"Conversion of non-round float with unit={unit} is ambiguous"
with pytest.raises(ValueError, match=msg):
to_datetime([1.5], unit=unit, errors="raise")

# with errors="ignore" we also end up raising within the Timestamp
# constructor; this may not be ideal
with pytest.raises(ValueError, match=msg):
to_datetime([1.5], unit=unit, errors="ignore")

res = to_datetime([1.5], unit=unit, errors="coerce")
expected = Index([NaT], dtype="M8[ns]")
tm.assert_index_equal(res, expected)

# round floats are OK
res = to_datetime([1.0], unit=unit)
expected = to_datetime([1], unit=unit)
tm.assert_index_equal(res, expected)

def test_unit(self, cache):
# GH 11758
# test proper behavior with errors
Expand Down