From 8f1f508c8909738a6e43b20230d16cb5044dd883 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 15 Mar 2021 10:27:49 +0100 Subject: [PATCH 1/7] [ArrayManager] DataFrame constructor from ndarray --- pandas/core/internals/construction.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 93aade8d58a71..a6646496a8c80 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -35,6 +35,7 @@ maybe_convert_platform, maybe_infer_to_datetimelike, maybe_upcast, + sanitize_to_nanoseconds, ) from pandas.core.dtypes.common import ( is_datetime64tz_dtype, @@ -295,6 +296,22 @@ def ndarray_to_mgr( index, columns = _get_axes( values.shape[0], values.shape[1], index=index, columns=columns ) + + if typ == "array": + + values = sanitize_to_nanoseconds(values) + if issubclass(values.dtype.type, str): + values = np.array(values, dtype=object) + + if dtype is None and is_object_dtype(values.dtype): + arrays = [ + maybe_infer_to_datetimelike(values[:, i].copy()) + for i in range(values.shape[1]) + ] + else: + arrays = [values[:, i].copy() for i in range(values.shape[1])] + return ArrayManager(arrays, [index, columns], verify_integrity=False) + values = values.T # if we don't have a dtype specified, then try to convert objects From f6589dfca744c833e4903c68019e9d747141a38a Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 20 Apr 2021 10:19:57 +0200 Subject: [PATCH 2/7] check shape of ndarray --- pandas/core/internals/construction.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 5bc1c91e792f5..660bbcdcc9638 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -306,6 +306,8 @@ def ndarray_to_mgr( values.shape[0], values.shape[1], index=index, columns=columns ) + _check_values_indices_shape_match(values, index, columns) + if typ == "array": values = sanitize_to_nanoseconds(values) @@ -323,8 +325,6 @@ def ndarray_to_mgr( values = values.T - _check_values_indices_shape_match(values, index, columns) - # if we don't have a dtype specified, then try to convert objects # on the entire block; this is to convert if we have datetimelike's # embedded in an object type @@ -366,13 +366,13 @@ def _check_values_indices_shape_match( Check that the shape implied by our axes matches the actual shape of the data. """ - if values.shape[0] != len(columns): + if values.shape[1] != len(columns) or values.shape[0] != len(index): # Could let this raise in Block constructor, but we get a more # helpful exception message this way. - if values.shape[1] == 0: + if values.shape[0] == 0: raise ValueError("Empty data passed with indices specified.") - passed = values.T.shape + passed = values.shape implied = (len(index), len(columns)) raise ValueError(f"Shape of passed values is {passed}, indices imply {implied}") From b02fbf863e3e97be994af827c2b9c983ca9958fd Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 20 Apr 2021 10:26:16 +0200 Subject: [PATCH 3/7] fix error message in test_astype_to_incorrect_datetimelike --- pandas/tests/frame/methods/test_astype.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index 544960113fafc..a8c339b13f39f 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -428,11 +428,23 @@ def test_astype_to_incorrect_datetimelike(self, unit): other = f"m8[{unit}]" df = DataFrame(np.array([[1, 2, 3]], dtype=dtype)) - msg = fr"Cannot cast DatetimeArray to dtype timedelta64\[{unit}\]" + msg = "|".join( + [ + fr"Cannot cast DatetimeArray to dtype timedelta64\[{unit}\]", + "cannot astype a datetimelike from " + fr"\[datetime64\[ns\]\] to \[timedelta64\[{unit}\]\]", + ] + ) with pytest.raises(TypeError, match=msg): df.astype(other) - msg = fr"Cannot cast TimedeltaArray to dtype datetime64\[{unit}\]" + msg = "|".join( + [ + fr"Cannot cast TimedeltaArray to dtype datetime64\[{unit}\]", + "cannot astype a timedelta from " + fr"\[timedelta64\[ns\]\] to \[datetime64\[{unit}\]\]", + ] + ) df = DataFrame(np.array([[1, 2, 3]], dtype=other)) with pytest.raises(TypeError, match=msg): df.astype(dtype) From da29bab5a4a265154ce381aa711031cc18377cd7 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 20 Apr 2021 14:44:36 +0200 Subject: [PATCH 4/7] ensure_wrapped_if_datetimelike --- pandas/core/internals/construction.py | 11 ++++++++++- pandas/tests/frame/test_constructors.py | 8 ++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 660bbcdcc9638..a716c7c9d87d6 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -34,6 +34,7 @@ ) from pandas.core.dtypes.common import ( is_datetime64tz_dtype, + is_datetime_or_timedelta_dtype, is_dtype_equal, is_extension_array_dtype, is_integer_dtype, @@ -59,6 +60,7 @@ DatetimeArray, ) from pandas.core.construction import ( + ensure_wrapped_if_datetimelike, extract_array, sanitize_array, ) @@ -316,7 +318,14 @@ def ndarray_to_mgr( if dtype is None and is_object_dtype(values.dtype): arrays = [ - maybe_infer_to_datetimelike(values[:, i].copy()) + ensure_wrapped_if_datetimelike( + maybe_infer_to_datetimelike(values[:, i].copy()) + ) + for i in range(values.shape[1]) + ] + elif is_datetime_or_timedelta_dtype(values.dtype): + arrays = [ + ensure_wrapped_if_datetimelike(values[:, i].copy()) for i in range(values.shape[1]) ] else: diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index ca68885fdc470..6a7f21d1077f4 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -46,6 +46,7 @@ ) import pandas._testing as tm from pandas.arrays import ( + DatetimeArray, IntervalArray, PeriodArray, SparseArray, @@ -2569,6 +2570,13 @@ def test_construction_from_set_raises(self, typ): with pytest.raises(TypeError, match=msg): Series(values) + def test_construction_from_ndarray_datetimelike(self): + # ensure the underlying arrays are properly wrapped as EA when + # constructed from 2D ndarray + arr = np.arange(0, 12, dtype="datetime64[ns]").reshape(4, 3) + df = DataFrame(arr) + assert all(isinstance(arr, DatetimeArray) for arr in df._mgr.arrays) + def get1(obj): if isinstance(obj, Series): From b2b8290c0e0e3734de642b2050b1f928a2f03095 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 22 Apr 2021 11:51:27 +0200 Subject: [PATCH 5/7] remove sanitize_to_nanoseconds --- pandas/core/internals/construction.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index a716c7c9d87d6..962c45e7ed12d 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -30,7 +30,6 @@ maybe_convert_platform, maybe_infer_to_datetimelike, maybe_upcast, - sanitize_to_nanoseconds, ) from pandas.core.dtypes.common import ( is_datetime64tz_dtype, @@ -312,7 +311,6 @@ def ndarray_to_mgr( if typ == "array": - values = sanitize_to_nanoseconds(values) if issubclass(values.dtype.type, str): values = np.array(values, dtype=object) From be15c4d5d78dca1f0dcf49717ec6aa89149ec6d2 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 22 Apr 2021 11:52:51 +0200 Subject: [PATCH 6/7] add comment in tests --- pandas/tests/frame/methods/test_astype.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index a8c339b13f39f..1583b3f91bea2 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -430,7 +430,9 @@ def test_astype_to_incorrect_datetimelike(self, unit): df = DataFrame(np.array([[1, 2, 3]], dtype=dtype)) msg = "|".join( [ + # BlockManager path fr"Cannot cast DatetimeArray to dtype timedelta64\[{unit}\]", + # ArrayManager path "cannot astype a datetimelike from " fr"\[datetime64\[ns\]\] to \[timedelta64\[{unit}\]\]", ] @@ -440,7 +442,9 @@ def test_astype_to_incorrect_datetimelike(self, unit): msg = "|".join( [ + # BlockManager path fr"Cannot cast TimedeltaArray to dtype datetime64\[{unit}\]", + # ArrayManager path "cannot astype a timedelta from " fr"\[timedelta64\[ns\]\] to \[datetime64\[{unit}\]\]", ] From fa6a3ef8734abe809601c1628f1f8b4e998ddf4f Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 23 Apr 2021 00:33:34 +0200 Subject: [PATCH 7/7] ensure_wrapped_if_datetimelike on full array --- pandas/core/internals/construction.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 962c45e7ed12d..d59c5abc9f37c 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -321,13 +321,11 @@ def ndarray_to_mgr( ) for i in range(values.shape[1]) ] - elif is_datetime_or_timedelta_dtype(values.dtype): - arrays = [ - ensure_wrapped_if_datetimelike(values[:, i].copy()) - for i in range(values.shape[1]) - ] else: + if is_datetime_or_timedelta_dtype(values.dtype): + values = ensure_wrapped_if_datetimelike(values) arrays = [values[:, i].copy() for i in range(values.shape[1])] + return ArrayManager(arrays, [index, columns], verify_integrity=False) values = values.T