From 84572a101f024d8f740429be26f9f95741cc3fde Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Wed, 21 Feb 2024 06:55:19 -0500 Subject: [PATCH] Fix accidental loss-of-precision for to_datetime(str, unit=...) In Pandas 1.5.3, the `float(val)` cast was inline to the `cast_from_unit` call in `array_with_unit_to_datetime`. This caused the intermediate (unnamed) value to be a Python float. Since #50301, a temporary variable was added to avoid multiple casts, but with explicit type `cdef float`, which defines a _Cython_ float. This type is 32-bit, and causes a loss of precision, and a regression in parsing from 1.5.3. So widen the explicit type of the temporary `fval` variable to (64-bit) `double`, which will not lose precision. Fixes #57051 --- doc/source/whatsnew/v2.2.2.rst | 2 +- pandas/_libs/tslib.pyx | 2 +- pandas/tests/tools/test_to_datetime.py | 8 ++++++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.2.2.rst b/doc/source/whatsnew/v2.2.2.rst index 54084abab7817..19539918b8c8f 100644 --- a/doc/source/whatsnew/v2.2.2.rst +++ b/doc/source/whatsnew/v2.2.2.rst @@ -15,7 +15,7 @@ Fixed regressions ~~~~~~~~~~~~~~~~~ - :meth:`DataFrame.__dataframe__` was producing incorrect data buffers when the a column's type was a pandas nullable on with missing values (:issue:`56702`) - :meth:`DataFrame.__dataframe__` was producing incorrect data buffers when the a column's type was a pyarrow nullable on with missing values (:issue:`57664`) -- +- Fixed regression in precision of :func:`to_datetime` with string and ``unit`` input (:issue:`57051`) .. --------------------------------------------------------------------------- .. _whatsnew_222.bug_fixes: diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index d320bfdbe3022..effd7f586f266 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -275,7 +275,7 @@ def array_with_unit_to_datetime( bint is_raise = errors == "raise" ndarray[int64_t] iresult tzinfo tz = None - float fval + double fval assert is_coerce or is_raise diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 9d93a05cf1761..7992b48a4b0cc 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1735,6 +1735,14 @@ def test_unit(self, cache): with pytest.raises(ValueError, match=msg): to_datetime([1], unit="D", format="%Y%m%d", cache=cache) + def test_unit_str(self, cache): + # GH 57051 + # Test that strs aren't dropping precision to 32-bit accidentally. + with tm.assert_produces_warning(FutureWarning): + res = to_datetime(["1704660000"], unit="s", origin="unix") + expected = to_datetime([1704660000], unit="s", origin="unix") + tm.assert_index_equal(res, expected) + def test_unit_array_mixed_nans(self, cache): values = [11111111111111111, 1, 1.0, iNaT, NaT, np.nan, "NaT", ""]