diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 9957ccb4fde50..5a121823e1453 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -495,6 +495,7 @@ Other API changes new DataFrame (shallow copy) instead of the original DataFrame, consistent with other methods to get a full slice (for example ``df.loc[:]`` or ``df[:]``) (:issue:`49469`) - Disallow computing ``cumprod`` for :class:`Timedelta` object; previously this returned incorrect values (:issue:`50246`) +- :func:`to_datetime` with ``unit`` of either "Y" or "M" will now raise if a sequence contains a non-round ``float`` value, matching the ``Timestamp`` behavior (:issue:`50301`) - .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 976a53e9117de..0b3316993bb11 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -263,9 +263,13 @@ def array_with_unit_to_datetime( ndarray[int64_t] iresult ndarray[object] oresult object tz = None + bint is_ym + float fval assert is_ignore or is_coerce or is_raise + is_ym = unit in "YM" + if unit == "ns": result, tz = array_to_datetime( values.astype(object, copy=False), @@ -290,6 +294,18 @@ def array_with_unit_to_datetime( if val != val or val == NPY_NAT: iresult[i] = NPY_NAT else: + if is_ym and is_float_object(val) and not val.is_integer(): + # Analogous to GH#47266 for Timestamp + if is_raise: + raise ValueError( + f"Conversion of non-round float with unit={unit} " + "is ambiguous" + ) + elif is_ignore: + raise AssertionError + iresult[i] = NPY_NAT + continue + try: iresult[i] = cast_from_unit(val, unit) except OverflowError: @@ -306,8 +322,33 @@ def array_with_unit_to_datetime( iresult[i] = NPY_NAT else: + + try: + fval = float(val) + except ValueError: + if is_raise: + raise ValueError( + f"non convertible value {val} with the unit '{unit}'" + ) + elif is_ignore: + raise AssertionError + iresult[i] = NPY_NAT + continue + + if is_ym and not fval.is_integer(): + # Analogous to GH#47266 for Timestamp + if is_raise: + raise ValueError( + f"Conversion of non-round float with unit={unit} " + "is ambiguous" + ) + elif is_ignore: + raise AssertionError + iresult[i] = NPY_NAT + continue + try: - iresult[i] = cast_from_unit(float(val), unit) + iresult[i] = cast_from_unit(fval, unit) except ValueError: if is_raise: raise ValueError( @@ -345,6 +386,7 @@ def array_with_unit_to_datetime( # and are in ignore mode # redo as object + # TODO: fix subtle differences between this and no-unit code oresult = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_OBJECT, 0) for i in range(n): val = values[i] @@ -357,7 +399,7 @@ def array_with_unit_to_datetime( oresult[i] = NaT else: try: - oresult[i] = Timestamp(cast_from_unit(val, unit)) + oresult[i] = Timestamp(val, unit=unit) except OverflowError: oresult[i] = val diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 83e40f5f1d98b..559e4602a37e1 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1489,6 +1489,38 @@ def test_to_datetime_fixed_offset(self): class TestToDatetimeUnit: + @pytest.mark.parametrize("unit", ["Y", "M"]) + def test_to_datetime_month_or_year_unit_non_round_float(self, cache, unit): + # GH#50301 + # Match Timestamp behavior in disallowing non-round floats with + # Y or M unit + msg = f"Conversion of non-round float with unit={unit} is ambiguous" + with pytest.raises(ValueError, match=msg): + to_datetime([1.5], unit=unit, errors="raise") + with pytest.raises(ValueError, match=msg): + to_datetime(["1.5"], unit=unit, errors="raise") + + # with errors="ignore" we also end up raising within the Timestamp + # constructor; this may not be ideal + with pytest.raises(ValueError, match=msg): + to_datetime([1.5], unit=unit, errors="ignore") + # TODO: we are NOT consistent with the Timestamp behavior in the + # float-like string case + # with pytest.raises(ValueError, match=msg): + # to_datetime(["1.5"], unit=unit, errors="ignore") + + res = to_datetime([1.5], unit=unit, errors="coerce") + expected = Index([NaT], dtype="M8[ns]") + tm.assert_index_equal(res, expected) + + res = to_datetime(["1.5"], unit=unit, errors="coerce") + tm.assert_index_equal(res, expected) + + # round floats are OK + res = to_datetime([1.0], unit=unit) + expected = to_datetime([1], unit=unit) + tm.assert_index_equal(res, expected) + def test_unit(self, cache): # GH 11758 # test proper behavior with errors