From c4570b19d7ceb63c30f2044bf2f5e78851a8ffbf Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 27 Jan 2021 20:58:34 +0100 Subject: [PATCH 1/5] BUG: DataFrame constructor reordering elements with ndarray from datetime dtype not datetime64[ns] --- doc/source/whatsnew/v1.3.0.rst | 1 + pandas/_libs/tslibs/conversion.pyx | 2 +- pandas/tests/frame/test_constructors.py | 31 +++++++++++++++++++++++++ 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 1dcde2000fc89..88e5971abea82 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -233,6 +233,7 @@ Datetimelike - Bug in constructing a :class:`DataFrame` or :class:`Series` with mismatched ``datetime64`` data and ``timedelta64`` dtype, or vice-versa, failing to raise ``TypeError`` (:issue:`38575`, :issue:`38764`, :issue:`38792`) - Bug in constructing a :class:`Series` or :class:`DataFrame` with a ``datetime`` object out of bounds for ``datetime64[ns]`` dtype or a ``timedelta`` object out of bounds for ``timedelta64[ns]`` dtype (:issue:`38792`, :issue:`38965`) - Bug in :meth:`DatetimeIndex.intersection`, :meth:`DatetimeIndex.symmetric_difference`, :meth:`PeriodIndex.intersection`, :meth:`PeriodIndex.symmetric_difference` always returning object-dtype when operating with :class:`CategoricalIndex` (:issue:`38741`) +- Bug in :class:`DataFrame` constructor reordering element when construction from datetime ndarray with dtype not ``"datetime64[ns]"`` (:issue:`39422`) - Bug in :meth:`Series.where` incorrectly casting ``datetime64`` values to ``int64`` (:issue:`37682`) - Bug in :class:`Categorical` incorrectly typecasting ``datetime`` object to ``Timestamp`` (:issue:`38878`) - Bug in comparisons between :class:`Timestamp` object and ``datetime64`` objects just outside the implementation bounds for nanosecond ``datetime64`` (:issue:`39221`) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 676ff7deb950f..afce60068c715 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -222,7 +222,7 @@ def ensure_datetime64ns(arr: ndarray, copy: bool=True): dtype = arr.dtype arr = arr.astype(dtype.newbyteorder("<")) - ivalues = arr.view(np.int64).ravel("K") + ivalues = arr.view(np.int64).ravel("C") result = np.empty(shape, dtype=DT64NS_DTYPE) iresult = result.ravel("K").view(np.int64) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index a6aaf3a6af750..daf2d647b51f1 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1762,6 +1762,37 @@ def test_constructor_datetimes_with_nulls(self, arr): expected = Series([np.dtype("datetime64[ns]")]) tm.assert_series_equal(result, expected) + @pytest.mark.parametrize( + "dtype", + [ + "datetime64[M]", + "datetime64[D]", + "datetime64[h]", + "datetime64[m]", + "datetime64[s]", + "datetime64[ms]", + "datetime64[us]", + "datetime64[ns]", + ], + ) + def test_constructor_datetimes_non_ns(self, dtype): + na = np.array( + [ + ["2015-01-01", "2015-01-02", "2015-01-03"], + ["2017-01-01", "2017-01-02", "2017-02-03"], + ], + dtype=dtype, + ) + df = DataFrame(na) + expected = DataFrame( + [ + ["2015-01-01", "2015-01-02", "2015-01-03"], + ["2017-01-01", "2017-01-02", "2017-02-03"], + ] + ) + expected = expected.astype(dtype=dtype) + tm.assert_frame_equal(df, expected) + def test_constructor_for_list_with_dtypes(self): # test list of lists/ndarrays df = DataFrame([np.arange(5) for x in range(5)]) From 3cdd1011ff827cd0e7b879ec8cb4b3caf813ee08 Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 27 Jan 2021 23:01:53 +0100 Subject: [PATCH 2/5] Parametrize over contiguity --- pandas/tests/frame/test_constructors.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index daf2d647b51f1..296bd0e54fff5 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1762,6 +1762,7 @@ def test_constructor_datetimes_with_nulls(self, arr): expected = Series([np.dtype("datetime64[ns]")]) tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("func", [np.asfortranarray, np.ascontiguousarray]) @pytest.mark.parametrize( "dtype", [ @@ -1775,8 +1776,8 @@ def test_constructor_datetimes_with_nulls(self, arr): "datetime64[ns]", ], ) - def test_constructor_datetimes_non_ns(self, dtype): - na = np.array( + def test_constructor_datetimes_non_ns(self, func, dtype): + na = func( [ ["2015-01-01", "2015-01-02", "2015-01-03"], ["2017-01-01", "2017-01-02", "2017-02-03"], From 62384855d676be4cf78ca7efa9a34ad03f468d0d Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 27 Jan 2021 23:03:45 +0100 Subject: [PATCH 3/5] Add order --- pandas/tests/frame/test_constructors.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 296bd0e54fff5..ade3b85181b95 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1762,7 +1762,7 @@ def test_constructor_datetimes_with_nulls(self, arr): expected = Series([np.dtype("datetime64[ns]")]) tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("func", [np.asfortranarray, np.ascontiguousarray]) + @pytest.mark.parametrize("order", ["K", "A", "C", "F"]) @pytest.mark.parametrize( "dtype", [ @@ -1776,13 +1776,14 @@ def test_constructor_datetimes_with_nulls(self, arr): "datetime64[ns]", ], ) - def test_constructor_datetimes_non_ns(self, func, dtype): - na = func( + def test_constructor_datetimes_non_ns(self, order, dtype): + na = np.array( [ ["2015-01-01", "2015-01-02", "2015-01-03"], ["2017-01-01", "2017-01-02", "2017-02-03"], ], dtype=dtype, + order=order, ) df = DataFrame(na) expected = DataFrame( From 6b39f010a60d78a13eb9a72f930f8562f6a5a33c Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 28 Jan 2021 00:29:38 +0100 Subject: [PATCH 4/5] Change empty construction --- pandas/_libs/tslibs/conversion.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index afce60068c715..0a22bd9b849a7 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -222,9 +222,9 @@ def ensure_datetime64ns(arr: ndarray, copy: bool=True): dtype = arr.dtype arr = arr.astype(dtype.newbyteorder("<")) - ivalues = arr.view(np.int64).ravel("C") + ivalues = arr.view(np.int64).ravel("K") - result = np.empty(shape, dtype=DT64NS_DTYPE) + result = np.empty_like(arr, dtype=DT64NS_DTYPE) iresult = result.ravel("K").view(np.int64) if len(iresult) == 0: From 85f0bd405088aec19c0cfe7b6814ace221fe88b5 Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 28 Jan 2021 19:54:03 +0100 Subject: [PATCH 5/5] Add timedelta tests --- pandas/tests/frame/test_constructors.py | 31 +++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index ade3b85181b95..f23b8d559a00a 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1795,6 +1795,37 @@ def test_constructor_datetimes_non_ns(self, order, dtype): expected = expected.astype(dtype=dtype) tm.assert_frame_equal(df, expected) + @pytest.mark.parametrize("order", ["K", "A", "C", "F"]) + @pytest.mark.parametrize( + "dtype", + [ + "timedelta64[D]", + "timedelta64[h]", + "timedelta64[m]", + "timedelta64[s]", + "timedelta64[ms]", + "timedelta64[us]", + "timedelta64[ns]", + ], + ) + def test_constructor_timedelta_non_ns(self, order, dtype): + na = np.array( + [ + [np.timedelta64(1, "D"), np.timedelta64(2, "D")], + [np.timedelta64(4, "D"), np.timedelta64(5, "D")], + ], + dtype=dtype, + order=order, + ) + df = DataFrame(na).astype("timedelta64[ns]") + expected = DataFrame( + [ + [Timedelta(1, "D"), Timedelta(2, "D")], + [Timedelta(4, "D"), Timedelta(5, "D")], + ], + ) + tm.assert_frame_equal(df, expected) + def test_constructor_for_list_with_dtypes(self): # test list of lists/ndarrays df = DataFrame([np.arange(5) for x in range(5)])