From 7e1f1355668aefd38b2b239d280cd0d8e82ef709 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Fri, 17 Nov 2023 17:22:49 -0800
Subject: [PATCH 1/6] BUG: to_datetime with floats and unit not matching
 Timestamp

---
 doc/source/whatsnew/v2.2.0.rst         |  1 +
 pandas/_libs/tslibs/conversion.pxd     |  2 +-
 pandas/_libs/tslibs/conversion.pyi     |  4 --
 pandas/_libs/tslibs/conversion.pyx     | 79 +++++++++++++++++++++++++-
 pandas/core/arrays/timedeltas.py       | 23 ++------
 pandas/core/tools/datetimes.py         | 34 +++++------
 pandas/tests/io/sas/test_sas7bdat.py   | 13 ++++-
 pandas/tests/tools/test_to_datetime.py | 22 ++++---
 8 files changed, 123 insertions(+), 55 deletions(-)

diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
index 3d34955a3baa5..c012cbb7d6e29 100644
--- a/doc/source/whatsnew/v2.2.0.rst
+++ b/doc/source/whatsnew/v2.2.0.rst
@@ -357,6 +357,7 @@ Datetimelike
 - Bug in creating a :class:`Index`, :class:`Series`, or :class:`DataFrame` with a non-nanosecond :class:`DatetimeTZDtype` and inputs that would be out of bounds with nanosecond resolution incorrectly raising ``OutOfBoundsDatetime`` (:issue:`54620`)
 - Bug in creating a :class:`Index`, :class:`Series`, or :class:`DataFrame` with a non-nanosecond ``datetime64`` (or :class:`DatetimeTZDtype`) from mixed-numeric inputs treating those as nanoseconds instead of as multiples of the dtype's unit (which would happen with non-mixed numeric inputs) (:issue:`56004`)
 - Bug in creating a :class:`Index`, :class:`Series`, or :class:`DataFrame` with a non-nanosecond ``datetime64`` dtype and inputs that would be out of bounds for a ``datetime64[ns]`` incorrectly raising ``OutOfBoundsDatetime`` (:issue:`55756`)
+- Bug in the results of :func:`pd.to_datetime` with an floating-dtype argument with ``unit`` not matching the pointwise results of :class:`Timestamp` (:issue:`??`)
 -
 
 Timedelta
diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd
index 7ffd630d6d8e1..6b9f41b1bb06f 100644
--- a/pandas/_libs/tslibs/conversion.pxd
+++ b/pandas/_libs/tslibs/conversion.pxd
@@ -45,7 +45,7 @@ cdef int64_t get_datetime64_nanos(object val, NPY_DATETIMEUNIT reso) except? -1
 
 cpdef datetime localize_pydatetime(datetime dt, tzinfo tz)
 cdef int64_t cast_from_unit(object ts, str unit, NPY_DATETIMEUNIT out_reso=*) except? -1
-cpdef (int64_t, int) precision_from_unit(
+cdef (int64_t, int) precision_from_unit(
     NPY_DATETIMEUNIT in_reso, NPY_DATETIMEUNIT out_reso=*
 )
 
diff --git a/pandas/_libs/tslibs/conversion.pyi b/pandas/_libs/tslibs/conversion.pyi
index a5f5d04efeb76..93935789abc80 100644
--- a/pandas/_libs/tslibs/conversion.pyi
+++ b/pandas/_libs/tslibs/conversion.pyi
@@ -8,8 +8,4 @@ import numpy as np
 DT64NS_DTYPE: np.dtype
 TD64NS_DTYPE: np.dtype
 
-def precision_from_unit(
-    in_reso: int,
-    out_reso: int = ...,
-) -> tuple[int, int]: ...  # (int64_t, _)
 def localize_pydatetime(dt: datetime, tz: tzinfo | None) -> datetime: ...
diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx
index 84aceecb09a33..764f2dc853df3 100644
--- a/pandas/_libs/tslibs/conversion.pyx
+++ b/pandas/_libs/tslibs/conversion.pyx
@@ -1,8 +1,11 @@
+cimport cython
+
 import numpy as np
 
 cimport numpy as cnp
 from libc.math cimport log10
 from numpy cimport (
+    float64_t,
     int32_t,
     int64_t,
 )
@@ -37,6 +40,7 @@ from pandas._libs.tslibs.np_datetime cimport (
     NPY_DATETIMEUNIT,
     NPY_FR_ns,
     NPY_FR_us,
+    astype_overflowsafe,
     check_dts_bounds,
     convert_reso,
     dts_to_iso_string,
@@ -74,6 +78,7 @@ from pandas._libs.tslibs.tzconversion cimport (
 from pandas._libs.tslibs.util cimport (
     is_float_object,
     is_integer_object,
+    is_nan,
 )
 
 # ----------------------------------------------------------------------
@@ -86,6 +91,78 @@ TD64NS_DTYPE = np.dtype("m8[ns]")
 # ----------------------------------------------------------------------
 # Unit Conversion Helpers
 
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.overflowcheck(True)
+def cast_from_unit_vectorized(
+    ndarray values,
+    str unit,
+):
+    """
+    Vectorized analogue to cast_from_unit.
+    """
+    cdef:
+        int64_t m
+        int p
+        NPY_DATETIMEUNIT in_reso, out_reso
+        Py_ssize_t i
+
+    assert values.dtype.kind == "f"
+
+    if unit in "YM":
+        if not (((values % 1) == 0) | np.isnan(values)).all():
+            # GH#47267 it is clear that 2 "M" corresponds to 1970-02-01,
+            #  but not clear what 2.5 "M" corresponds to, so we will
+            #  disallow that case.
+            raise ValueError(
+                f"Conversion of non-round float with unit={unit} "
+                "is ambiguous"
+            )
+
+        # GH#47266 go through np.datetime64 to avoid weird results e.g. with "Y"
+        #  and 150 we'd get 2120-01-01 09:00:00
+        values = values.astype(f"M8[{unit}]")
+        dtype = np.dtype("M8[ns]")
+        return astype_overflowsafe(values, dtype=dtype, copy=False).view("i8")
+
+    in_reso = abbrev_to_npy_unit(unit)
+    out_reso = abbrev_to_npy_unit("ns")
+    m, p = precision_from_unit(in_reso, out_reso)
+
+    cdef:
+        ndarray[int64_t] base, out
+        ndarray[float64_t] frac
+        tuple shape = (<object>values).shape
+
+    out = np.empty(shape, dtype="i8")
+    base = np.empty(shape, dtype="i8")
+    frac = np.empty(shape, dtype="f8")
+
+    for i in range(len(values)):
+        if is_nan(values[i]):
+            base[i] = NPY_NAT
+        else:
+            base[i] = <int64_t>values[i]
+            frac[i] = values[i] - base[i]
+
+    if p:
+        frac = np.round(frac, p)
+
+    try:
+        for i in range(len(values)):
+            if base[i] == NPY_NAT:
+                out[i] = NPY_NAT
+            else:
+                out[i] = <int64_t>(base[i] * m) + <int64_t>(frac[i] * m)
+    except (OverflowError, FloatingPointError) as err:
+        # FloatingPointError can be issued if we have float dtype and have
+        #  set np.errstate(over="raise")
+        raise OutOfBoundsDatetime(
+            f"cannot convert input {values[i]} with the unit '{unit}'"
+        ) from err
+    return out
+
+
 cdef int64_t cast_from_unit(
     object ts,
     str unit,
@@ -155,7 +232,7 @@ cdef int64_t cast_from_unit(
         ) from err
 
 
-cpdef (int64_t, int) precision_from_unit(
+cdef (int64_t, int) precision_from_unit(
     NPY_DATETIMEUNIT in_reso,
     NPY_DATETIMEUNIT out_reso=NPY_DATETIMEUNIT.NPY_FR_ns,
 ):
diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
index 226e2568fdbf8..f55d3de8878ad 100644
--- a/pandas/core/arrays/timedeltas.py
+++ b/pandas/core/arrays/timedeltas.py
@@ -6,7 +6,6 @@
     TYPE_CHECKING,
     cast,
 )
-import warnings
 
 import numpy as np
 
@@ -27,8 +26,7 @@
     npy_unit_to_abbrev,
     periods_per_second,
 )
-from pandas._libs.tslibs.conversion import precision_from_unit
-from pandas._libs.tslibs.dtypes import abbrev_to_npy_unit
+from pandas._libs.tslibs.conversion import cast_from_unit_vectorized
 from pandas._libs.tslibs.fields import (
     get_timedelta_days,
     get_timedelta_field,
@@ -1059,23 +1057,10 @@ def sequence_to_td64ns(
             data = data._data
         else:
             mask = np.isnan(data)
-        # The next few lines are effectively a vectorized 'cast_from_unit'
-        m, p = precision_from_unit(abbrev_to_npy_unit(unit or "ns"))
-        with warnings.catch_warnings():
-            # Suppress RuntimeWarning about All-NaN slice
-            warnings.filterwarnings(
-                "ignore", "invalid value encountered in cast", RuntimeWarning
-            )
-            base = data.astype(np.int64)
-        frac = data - base
-        if p:
-            frac = np.round(frac, p)
-        with warnings.catch_warnings():
-            warnings.filterwarnings(
-                "ignore", "invalid value encountered in cast", RuntimeWarning
-            )
-            data = (base * m + (frac * m).astype(np.int64)).view("timedelta64[ns]")
+
+        data = cast_from_unit_vectorized(data, unit or "ns")
         data[mask] = iNaT
+        data = data.view("m8[ns]")
         copy = False
 
     elif lib.is_np_dtype(data.dtype, "m"):
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index ea5e6e46f58ec..863fb414a75f2 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -26,12 +26,10 @@
     Timestamp,
     astype_overflowsafe,
     get_unit_from_dtype,
-    iNaT,
     is_supported_unit,
     timezones as libtimezones,
 )
-from pandas._libs.tslibs.conversion import precision_from_unit
-from pandas._libs.tslibs.dtypes import abbrev_to_npy_unit
+from pandas._libs.tslibs.conversion import cast_from_unit_vectorized
 from pandas._libs.tslibs.parsing import (
     DateParseError,
     guess_datetime_format,
@@ -551,23 +549,19 @@ def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index:
             tz_parsed = None
 
         elif arg.dtype.kind == "f":
-            mult, _ = precision_from_unit(abbrev_to_npy_unit(unit))
-
-            mask = np.isnan(arg) | (arg == iNaT)
-            fvalues = (arg * mult).astype("f8", copy=False)
-            fvalues[mask] = 0
-
-            if (fvalues < Timestamp.min._value).any() or (
-                fvalues > Timestamp.max._value
-            ).any():
-                if errors != "raise":
-                    arg = arg.astype(object)
-                    return _to_datetime_with_unit(arg, unit, name, utc, errors)
-                raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'")
-
-            arr = fvalues.astype("M8[ns]", copy=False)
-            arr[mask] = np.datetime64("NaT", "ns")
-
+            with np.errstate(over="raise"):
+                try:
+                    arr = cast_from_unit_vectorized(arg, unit=unit)
+                except OutOfBoundsDatetime:
+                    if errors != "raise":
+                        return _to_datetime_with_unit(
+                            arg.astype(object), unit, name, utc, errors
+                        )
+                    raise OutOfBoundsDatetime(
+                        f"cannot convert input with unit '{unit}'"
+                    )
+
+            arr = arr.view("M8[ns]")
             tz_parsed = None
         else:
             arg = arg.astype(object, copy=False)
diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py
index d56139d32b1da..58a6fa43dded5 100644
--- a/pandas/tests/io/sas/test_sas7bdat.py
+++ b/pandas/tests/io/sas/test_sas7bdat.py
@@ -187,7 +187,18 @@ def test_date_time(datapath):
         fname, parse_dates=["Date1", "Date2", "DateTime", "DateTimeHi", "Taiw"]
     )
     # GH 19732: Timestamps imported from sas will incur floating point errors
-    df[df.columns[3]] = df.iloc[:, 3].dt.round("us")
+    # 2023-11-16 we don't know the correct "expected" result bc we do not have
+    #  access to SAS to read the sas7bdat file. We are really just testing
+    #  that we are "close". This only seems to be an issue near the
+    #  implementation bounds.
+    res = df.iloc[:, 3].dt.round("us").copy()
+
+    # the first and last elements are near the implementation bounds, where we
+    #  would expect floating point error to occur.
+    res.iloc[0] -= pd.Timedelta(microseconds=1)
+    res.iloc[-1] += pd.Timedelta(microseconds=1)
+
+    df.iloc[:, 3] = res
     tm.assert_frame_equal(df, df0)
 
 
diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
index ef76d99260764..b7db5545a9e26 100644
--- a/pandas/tests/tools/test_to_datetime.py
+++ b/pandas/tests/tools/test_to_datetime.py
@@ -1864,16 +1864,14 @@ def test_to_datetime_month_or_year_unit_int(self, cache, unit, item, request):
         result = to_datetime(np.array([item], dtype=object), unit=unit, cache=cache)
         tm.assert_index_equal(result, expected)
 
-        # TODO: this should also work
-        if isinstance(item, float):
-            request.applymarker(
-                pytest.mark.xfail(
-                    reason=f"{type(item).__name__} in np.array should work"
-                )
-            )
         result = to_datetime(np.array([item]), unit=unit, cache=cache)
         tm.assert_index_equal(result, expected)
 
+        # with a nan!
+        result = to_datetime(np.array([item, np.nan]), unit=unit, cache=cache)
+        assert result.isna()[1]
+        tm.assert_index_equal(result[:1], expected)
+
     @pytest.mark.parametrize("unit", ["Y", "M"])
     def test_to_datetime_month_or_year_unit_non_round_float(self, cache, unit):
         # GH#50301
@@ -1883,6 +1881,8 @@ def test_to_datetime_month_or_year_unit_non_round_float(self, cache, unit):
         msg = f"Conversion of non-round float with unit={unit} is ambiguous"
         with pytest.raises(ValueError, match=msg):
             to_datetime([1.5], unit=unit, errors="raise")
+        with pytest.raises(ValueError, match=msg):
+            to_datetime(np.array([1.5]), unit=unit, errors="raise")
         with pytest.raises(ValueError, match=msg):
             with tm.assert_produces_warning(FutureWarning, match=warn_msg):
                 to_datetime(["1.5"], unit=unit, errors="raise")
@@ -2030,10 +2030,14 @@ def test_unit_mixed(self, cache, arr):
     def test_unit_rounding(self, cache):
         # GH 14156 & GH 20445: argument will incur floating point errors
         # but no premature rounding
-        result = to_datetime(1434743731.8770001, unit="s", cache=cache)
-        expected = Timestamp("2015-06-19 19:55:31.877000192")
+        value = 1434743731.8770001
+        result = to_datetime(value, unit="s", cache=cache)
+        expected = Timestamp("2015-06-19 19:55:31.877000093")
         assert result == expected
 
+        alt = Timestamp(value, unit="s")
+        assert alt == result
+
     def test_unit_ignore_keeps_name(self, cache):
         # GH 21697
         expected = Index([15e9] * 2, name="name")

From 48db2a5c1ce54bb76388a17bdd88cc57bb860259 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Fri, 17 Nov 2023 21:50:52 -0800
Subject: [PATCH 2/6] mypy fixup

---
 pandas/_libs/tslibs/conversion.pyi | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/_libs/tslibs/conversion.pyi b/pandas/_libs/tslibs/conversion.pyi
index 93935789abc80..cfe39fe2964cb 100644
--- a/pandas/_libs/tslibs/conversion.pyi
+++ b/pandas/_libs/tslibs/conversion.pyi
@@ -9,3 +9,4 @@ DT64NS_DTYPE: np.dtype
 TD64NS_DTYPE: np.dtype
 
 def localize_pydatetime(dt: datetime, tz: tzinfo | None) -> datetime: ...
+def cast_from_unit_vectorized(values: np.ndarray, unit: str) -> np.ndarray: ...

From 777a1a3d12c410a3dd9b941882c7149b55147af1 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 20 Nov 2023 09:57:23 -0800
Subject: [PATCH 3/6] Update doc/source/whatsnew/v2.2.0.rst

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
---
 doc/source/whatsnew/v2.2.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
index b565fa28f2b70..98244798a804c 100644
--- a/doc/source/whatsnew/v2.2.0.rst
+++ b/doc/source/whatsnew/v2.2.0.rst
@@ -359,7 +359,7 @@ Datetimelike
 - Bug in creating a :class:`Index`, :class:`Series`, or :class:`DataFrame` with a non-nanosecond :class:`DatetimeTZDtype` and inputs that would be out of bounds with nanosecond resolution incorrectly raising ``OutOfBoundsDatetime`` (:issue:`54620`)
 - Bug in creating a :class:`Index`, :class:`Series`, or :class:`DataFrame` with a non-nanosecond ``datetime64`` (or :class:`DatetimeTZDtype`) from mixed-numeric inputs treating those as nanoseconds instead of as multiples of the dtype's unit (which would happen with non-mixed numeric inputs) (:issue:`56004`)
 - Bug in creating a :class:`Index`, :class:`Series`, or :class:`DataFrame` with a non-nanosecond ``datetime64`` dtype and inputs that would be out of bounds for a ``datetime64[ns]`` incorrectly raising ``OutOfBoundsDatetime`` (:issue:`55756`)
-- Bug in the results of :func:`pd.to_datetime` with an floating-dtype argument with ``unit`` not matching the pointwise results of :class:`Timestamp` (:issue:`??`)
+- Bug in the results of :func:`pd.to_datetime` with an floating-dtype argument with ``unit`` not matching the pointwise results of :class:`Timestamp` (:issue:`56037`)
 -
 
 Timedelta

From 3192cfded90364b93aae10e0b5f656e8581a6959 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 20 Nov 2023 13:59:22 -0800
Subject: [PATCH 4/6] update for CoW

---
 pandas/tests/io/sas/test_sas7bdat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py
index 58a6fa43dded5..0ce428cef9520 100644
--- a/pandas/tests/io/sas/test_sas7bdat.py
+++ b/pandas/tests/io/sas/test_sas7bdat.py
@@ -198,7 +198,7 @@ def test_date_time(datapath):
     res.iloc[0] -= pd.Timedelta(microseconds=1)
     res.iloc[-1] += pd.Timedelta(microseconds=1)
 
-    df.iloc[:, 3] = res
+    df["DateTimeHi"] = res
     tm.assert_frame_equal(df, df0)
 
 

From f77107944e2d2d3e3b484835252518e598b4741d Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Tue, 21 Nov 2023 12:36:33 -0800
Subject: [PATCH 5/6] xfail float32 case

---
 pandas/tests/tools/test_to_timedelta.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/tools/test_to_timedelta.py b/pandas/tests/tools/test_to_timedelta.py
index c4c9b41c218a0..8b5b09092ec18 100644
--- a/pandas/tests/tools/test_to_timedelta.py
+++ b/pandas/tests/tools/test_to_timedelta.py
@@ -232,9 +232,18 @@ def test_to_timedelta_on_missing_values_list(self, val):
         actual = to_timedelta([val])
         assert actual[0]._value == np.timedelta64("NaT").astype("int64")
 
-    def test_to_timedelta_float(self):
+    @pytest.mark.parametrize(
+        "dtype",
+        [
+            pytest.param(
+                np.float32, marks=pytest.mark.xfail(reason="Floating point error")
+            ),
+            np.float64,
+        ],
+    )
+    def test_to_timedelta_float(self, dtype):
         # https://github.com/pandas-dev/pandas/issues/25077
-        arr = np.arange(0, 1, 1e-6)[-10:]
+        arr = np.arange(0, 1, 1e-6)[-10:].astype(dtype)
         result = to_timedelta(arr, unit="s")
         expected_asi8 = np.arange(999990000, 10**9, 1000, dtype="int64")
         tm.assert_numpy_array_equal(result.asi8, expected_asi8)

From 47efe18209a859850c70288a64747a21031a9117 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Tue, 21 Nov 2023 13:23:10 -0800
Subject: [PATCH 6/6] xfail on3 2bit

---
 pandas/tests/tools/test_to_timedelta.py | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/pandas/tests/tools/test_to_timedelta.py b/pandas/tests/tools/test_to_timedelta.py
index 8b5b09092ec18..e588bc83b0de8 100644
--- a/pandas/tests/tools/test_to_timedelta.py
+++ b/pandas/tests/tools/test_to_timedelta.py
@@ -6,6 +6,7 @@
 import numpy as np
 import pytest
 
+from pandas.compat import IS64
 from pandas.errors import OutOfBoundsTimedelta
 
 import pandas as pd
@@ -232,18 +233,10 @@ def test_to_timedelta_on_missing_values_list(self, val):
         actual = to_timedelta([val])
         assert actual[0]._value == np.timedelta64("NaT").astype("int64")
 
-    @pytest.mark.parametrize(
-        "dtype",
-        [
-            pytest.param(
-                np.float32, marks=pytest.mark.xfail(reason="Floating point error")
-            ),
-            np.float64,
-        ],
-    )
-    def test_to_timedelta_float(self, dtype):
+    @pytest.mark.xfail(not IS64, reason="Floating point error")
+    def test_to_timedelta_float(self):
         # https://github.com/pandas-dev/pandas/issues/25077
-        arr = np.arange(0, 1, 1e-6)[-10:].astype(dtype)
+        arr = np.arange(0, 1, 1e-6)[-10:]
         result = to_timedelta(arr, unit="s")
         expected_asi8 = np.arange(999990000, 10**9, 1000, dtype="int64")
         tm.assert_numpy_array_equal(result.asi8, expected_asi8)