From f8b25ba2d2167e3fd72b60f3666887ff906ed054 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 17 May 2021 16:45:57 -0700 Subject: [PATCH 1/3] REF: DataFrame._values return DTA/TDA where appropriate --- pandas/core/frame.py | 22 +++++------ pandas/core/internals/managers.py | 3 +- pandas/tests/frame/methods/test_values.py | 47 +++++++++++++++++++++++ 3 files changed, 60 insertions(+), 12 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index efefeb23445af..7a66018c40c38 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -858,25 +858,30 @@ def _can_fast_transpose(self) -> bool: return not is_1d_only_ea_dtype(dtype) @property - def _values_compat(self) -> np.ndarray | DatetimeArray | TimedeltaArray: + def _values(self) -> np.ndarray | DatetimeArray | TimedeltaArray: """ Analogue to ._values that may return a 2D ExtensionArray. """ + self._consolidate_inplace() + mgr = self._mgr + if isinstance(mgr, ArrayManager): - return self._values + if len(mgr.arrays) == 1 and not is_1d_only_ea_obj(mgr.arrays[0]): + return mgr.arrays[0].reshape(-1, 1) + return self.values blocks = mgr.blocks if len(blocks) != 1: - return self._values + return self.values arr = blocks[0].values if arr.ndim == 1: # non-2D ExtensionArray - return self._values + return self.values # more generally, whatever we allow in NDArrayBackedExtensionBlock - arr = cast("DatetimeArray | TimedeltaArray", arr) + arr = cast("np.ndarray | DatetimeArray | TimedeltaArray", arr) return arr.T # ---------------------------------------------------------------------- @@ -3322,7 +3327,7 @@ def transpose(self, *args, copy: bool = False) -> DataFrame: if self._can_fast_transpose: # Note: tests pass without this, but this improves perf quite a bit. - new_vals = self._values_compat.T + new_vals = self._values.T if copy: new_vals = new_vals.copy() @@ -10621,11 +10626,6 @@ def values(self) -> np.ndarray: self._consolidate_inplace() return self._mgr.as_array(transpose=True) - @property - def _values(self) -> np.ndarray: - """internal implementation""" - return self.values - DataFrame._add_numeric_operations() diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index ea31f9663cffe..323aa45874d96 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -759,7 +759,8 @@ def _slice_take_blocks_ax0( blk = self.blocks[blkno] # Otherwise, slicing along items axis is necessary. - if not blk._can_consolidate: + if not blk._can_consolidate and not blk._validate_ndim: + # i.e. we dont go through here for DatetimeTZBlock # A non-consolidatable block, it's easy, because there's # only one item and each mgr loc is a copy of that single # item. diff --git a/pandas/tests/frame/methods/test_values.py b/pandas/tests/frame/methods/test_values.py index 548482b23ebc4..c04aa84e08142 100644 --- a/pandas/tests/frame/methods/test_values.py +++ b/pandas/tests/frame/methods/test_values.py @@ -223,3 +223,50 @@ def test_values_lcd(self, mixed_float_frame, mixed_int_frame): values = mixed_int_frame[["C"]].values assert values.dtype == np.uint8 + + +class TestPrivateValues: + def test_private_values_dt64tz(self, using_array_manager, request): + if using_array_manager: + mark = pytest.mark.xfail(reason="doesn't share memory") + request.node.add_marker(mark) + + dta = date_range("2000", periods=4, tz="US/Central")._data.reshape(-1, 1) + + df = DataFrame(dta, columns=["A"]) + tm.assert_equal(df._values, dta) + + # we have a view + assert np.shares_memory(df._values._ndarray, dta._ndarray) + + # TimedeltaArray + tda = dta - dta + df2 = df - df + tm.assert_equal(df2._values, tda) + + @td.skip_array_manager_invalid_test + def test_private_values_dt64tz_multicol(self): + dta = date_range("2000", periods=8, tz="US/Central")._data.reshape(-1, 2) + + df = DataFrame(dta, columns=["A", "B"]) + tm.assert_equal(df._values, dta) + + # we have a view + assert np.shares_memory(df._values._ndarray, dta._ndarray) + + # TimedeltaArray + tda = dta - dta + df2 = df - df + tm.assert_equal(df2._values, tda) + + def test_private_values_dt64_multiblock(self): + dta = date_range("2000", periods=8)._data + + df = DataFrame({"A": dta[:4]}, copy=False) + df["B"] = dta[4:] + + assert df._mgr.nblocks == 2 + + result = df._values + expected = dta.reshape(2, 4).T + tm.assert_equal(result, expected) From e617c6c649dad94de3ac320405b929ef507e897d Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 18 May 2021 08:31:53 -0700 Subject: [PATCH 2/3] arraymanager xfail --- pandas/tests/frame/methods/test_values.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/methods/test_values.py b/pandas/tests/frame/methods/test_values.py index c04aa84e08142..2ff991b62b67e 100644 --- a/pandas/tests/frame/methods/test_values.py +++ b/pandas/tests/frame/methods/test_values.py @@ -259,13 +259,17 @@ def test_private_values_dt64tz_multicol(self): df2 = df - df tm.assert_equal(df2._values, tda) - def test_private_values_dt64_multiblock(self): + def test_private_values_dt64_multiblock(self, using_array_manager, request): + if using_array_manager: + mark = pytest.mark.xfail(reason="returns ndarray") + request.node.add_marker(mark) + dta = date_range("2000", periods=8)._data df = DataFrame({"A": dta[:4]}, copy=False) df["B"] = dta[4:] - assert df._mgr.nblocks == 2 + assert len(df._mgr.arrays) == 2 result = df._values expected = dta.reshape(2, 4).T From 6d52d99076b674d54c3d941df647deff18c51839 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 18 May 2021 10:48:34 -0700 Subject: [PATCH 3/3] mypy fixup --- pandas/core/frame.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7a66018c40c38..1fa149cd834b0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -857,8 +857,12 @@ def _can_fast_transpose(self) -> bool: # TODO(EA2D) special case would be unnecessary with 2D EAs return not is_1d_only_ea_dtype(dtype) + # error: Return type "Union[ndarray, DatetimeArray, TimedeltaArray]" of + # "_values" incompatible with return type "ndarray" in supertype "NDFrame" @property - def _values(self) -> np.ndarray | DatetimeArray | TimedeltaArray: + def _values( # type: ignore[override] + self, + ) -> np.ndarray | DatetimeArray | TimedeltaArray: """ Analogue to ._values that may return a 2D ExtensionArray. """ @@ -868,7 +872,9 @@ def _values(self) -> np.ndarray | DatetimeArray | TimedeltaArray: if isinstance(mgr, ArrayManager): if len(mgr.arrays) == 1 and not is_1d_only_ea_obj(mgr.arrays[0]): - return mgr.arrays[0].reshape(-1, 1) + # error: Item "ExtensionArray" of "Union[ndarray, ExtensionArray]" + # has no attribute "reshape" + return mgr.arrays[0].reshape(-1, 1) # type: ignore[union-attr] return self.values blocks = mgr.blocks