Skip to content

Commit 74a4651

Browse files
jbrockmendelTLouf
authored andcommitted
REF: DataFrame._values return DTA/TDA where appropriate (pandas-dev#41531)
1 parent c5a3cf8 commit 74a4651

File tree

3 files changed

+70
-12
lines changed

3 files changed

+70
-12
lines changed

pandas/core/frame.py

+17-11
Original file line numberDiff line numberDiff line change
@@ -857,26 +857,37 @@ def _can_fast_transpose(self) -> bool:
857857
# TODO(EA2D) special case would be unnecessary with 2D EAs
858858
return not is_1d_only_ea_dtype(dtype)
859859

860+
# error: Return type "Union[ndarray, DatetimeArray, TimedeltaArray]" of
861+
# "_values" incompatible with return type "ndarray" in supertype "NDFrame"
860862
@property
861-
def _values_compat(self) -> np.ndarray | DatetimeArray | TimedeltaArray:
863+
def _values( # type: ignore[override]
864+
self,
865+
) -> np.ndarray | DatetimeArray | TimedeltaArray:
862866
"""
863867
Analogue to ._values that may return a 2D ExtensionArray.
864868
"""
869+
self._consolidate_inplace()
870+
865871
mgr = self._mgr
872+
866873
if isinstance(mgr, ArrayManager):
867-
return self._values
874+
if len(mgr.arrays) == 1 and not is_1d_only_ea_obj(mgr.arrays[0]):
875+
# error: Item "ExtensionArray" of "Union[ndarray, ExtensionArray]"
876+
# has no attribute "reshape"
877+
return mgr.arrays[0].reshape(-1, 1) # type: ignore[union-attr]
878+
return self.values
868879

869880
blocks = mgr.blocks
870881
if len(blocks) != 1:
871-
return self._values
882+
return self.values
872883

873884
arr = blocks[0].values
874885
if arr.ndim == 1:
875886
# non-2D ExtensionArray
876-
return self._values
887+
return self.values
877888

878889
# more generally, whatever we allow in NDArrayBackedExtensionBlock
879-
arr = cast("DatetimeArray | TimedeltaArray", arr)
890+
arr = cast("np.ndarray | DatetimeArray | TimedeltaArray", arr)
880891
return arr.T
881892

882893
# ----------------------------------------------------------------------
@@ -3322,7 +3333,7 @@ def transpose(self, *args, copy: bool = False) -> DataFrame:
33223333

33233334
if self._can_fast_transpose:
33243335
# Note: tests pass without this, but this improves perf quite a bit.
3325-
new_vals = self._values_compat.T
3336+
new_vals = self._values.T
33263337
if copy:
33273338
new_vals = new_vals.copy()
33283339

@@ -10621,11 +10632,6 @@ def values(self) -> np.ndarray:
1062110632
self._consolidate_inplace()
1062210633
return self._mgr.as_array(transpose=True)
1062310634

10624-
@property
10625-
def _values(self) -> np.ndarray:
10626-
"""internal implementation"""
10627-
return self.values
10628-
1062910635

1063010636
DataFrame._add_numeric_operations()
1063110637

pandas/core/internals/managers.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -759,7 +759,8 @@ def _slice_take_blocks_ax0(
759759
blk = self.blocks[blkno]
760760

761761
# Otherwise, slicing along items axis is necessary.
762-
if not blk._can_consolidate:
762+
if not blk._can_consolidate and not blk._validate_ndim:
763+
# i.e. we dont go through here for DatetimeTZBlock
763764
# A non-consolidatable block, it's easy, because there's
764765
# only one item and each mgr loc is a copy of that single
765766
# item.

pandas/tests/frame/methods/test_values.py

+51
Original file line numberDiff line numberDiff line change
@@ -223,3 +223,54 @@ def test_values_lcd(self, mixed_float_frame, mixed_int_frame):
223223

224224
values = mixed_int_frame[["C"]].values
225225
assert values.dtype == np.uint8
226+
227+
228+
class TestPrivateValues:
229+
def test_private_values_dt64tz(self, using_array_manager, request):
230+
if using_array_manager:
231+
mark = pytest.mark.xfail(reason="doesn't share memory")
232+
request.node.add_marker(mark)
233+
234+
dta = date_range("2000", periods=4, tz="US/Central")._data.reshape(-1, 1)
235+
236+
df = DataFrame(dta, columns=["A"])
237+
tm.assert_equal(df._values, dta)
238+
239+
# we have a view
240+
assert np.shares_memory(df._values._ndarray, dta._ndarray)
241+
242+
# TimedeltaArray
243+
tda = dta - dta
244+
df2 = df - df
245+
tm.assert_equal(df2._values, tda)
246+
247+
@td.skip_array_manager_invalid_test
248+
def test_private_values_dt64tz_multicol(self):
249+
dta = date_range("2000", periods=8, tz="US/Central")._data.reshape(-1, 2)
250+
251+
df = DataFrame(dta, columns=["A", "B"])
252+
tm.assert_equal(df._values, dta)
253+
254+
# we have a view
255+
assert np.shares_memory(df._values._ndarray, dta._ndarray)
256+
257+
# TimedeltaArray
258+
tda = dta - dta
259+
df2 = df - df
260+
tm.assert_equal(df2._values, tda)
261+
262+
def test_private_values_dt64_multiblock(self, using_array_manager, request):
263+
if using_array_manager:
264+
mark = pytest.mark.xfail(reason="returns ndarray")
265+
request.node.add_marker(mark)
266+
267+
dta = date_range("2000", periods=8)._data
268+
269+
df = DataFrame({"A": dta[:4]}, copy=False)
270+
df["B"] = dta[4:]
271+
272+
assert len(df._mgr.arrays) == 2
273+
274+
result = df._values
275+
expected = dta.reshape(2, 4).T
276+
tm.assert_equal(result, expected)

0 commit comments

Comments
 (0)