Skip to content

REF: DataFrame._values return DTA/TDA where appropriate #41531

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 17 additions & 11 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -857,26 +857,37 @@ def _can_fast_transpose(self) -> bool:
# TODO(EA2D) special case would be unnecessary with 2D EAs
return not is_1d_only_ea_dtype(dtype)

# error: Return type "Union[ndarray, DatetimeArray, TimedeltaArray]" of
# "_values" incompatible with return type "ndarray" in supertype "NDFrame"
@property
def _values_compat(self) -> np.ndarray | DatetimeArray | TimedeltaArray:
def _values( # type: ignore[override]
self,
) -> np.ndarray | DatetimeArray | TimedeltaArray:
"""
Analogue to ._values that may return a 2D ExtensionArray.
"""
self._consolidate_inplace()

mgr = self._mgr

if isinstance(mgr, ArrayManager):
return self._values
if len(mgr.arrays) == 1 and not is_1d_only_ea_obj(mgr.arrays[0]):
# error: Item "ExtensionArray" of "Union[ndarray, ExtensionArray]"
# has no attribute "reshape"
return mgr.arrays[0].reshape(-1, 1) # type: ignore[union-attr]
return self.values

blocks = mgr.blocks
if len(blocks) != 1:
return self._values
return self.values

arr = blocks[0].values
if arr.ndim == 1:
# non-2D ExtensionArray
return self._values
return self.values

# more generally, whatever we allow in NDArrayBackedExtensionBlock
arr = cast("DatetimeArray | TimedeltaArray", arr)
arr = cast("np.ndarray | DatetimeArray | TimedeltaArray", arr)
return arr.T

# ----------------------------------------------------------------------
Expand Down Expand Up @@ -3322,7 +3333,7 @@ def transpose(self, *args, copy: bool = False) -> DataFrame:

if self._can_fast_transpose:
# Note: tests pass without this, but this improves perf quite a bit.
new_vals = self._values_compat.T
new_vals = self._values.T
if copy:
new_vals = new_vals.copy()

Expand Down Expand Up @@ -10621,11 +10632,6 @@ def values(self) -> np.ndarray:
self._consolidate_inplace()
return self._mgr.as_array(transpose=True)

@property
def _values(self) -> np.ndarray:
"""internal implementation"""
return self.values


DataFrame._add_numeric_operations()

Expand Down
3 changes: 2 additions & 1 deletion pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -759,7 +759,8 @@ def _slice_take_blocks_ax0(
blk = self.blocks[blkno]

# Otherwise, slicing along items axis is necessary.
if not blk._can_consolidate:
if not blk._can_consolidate and not blk._validate_ndim:
# i.e. we dont go through here for DatetimeTZBlock
# A non-consolidatable block, it's easy, because there's
# only one item and each mgr loc is a copy of that single
# item.
Expand Down
51 changes: 51 additions & 0 deletions pandas/tests/frame/methods/test_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,3 +223,54 @@ def test_values_lcd(self, mixed_float_frame, mixed_int_frame):

values = mixed_int_frame[["C"]].values
assert values.dtype == np.uint8


class TestPrivateValues:
def test_private_values_dt64tz(self, using_array_manager, request):
if using_array_manager:
mark = pytest.mark.xfail(reason="doesn't share memory")
request.node.add_marker(mark)

dta = date_range("2000", periods=4, tz="US/Central")._data.reshape(-1, 1)

df = DataFrame(dta, columns=["A"])
tm.assert_equal(df._values, dta)

# we have a view
assert np.shares_memory(df._values._ndarray, dta._ndarray)

# TimedeltaArray
tda = dta - dta
df2 = df - df
tm.assert_equal(df2._values, tda)

@td.skip_array_manager_invalid_test
def test_private_values_dt64tz_multicol(self):
dta = date_range("2000", periods=8, tz="US/Central")._data.reshape(-1, 2)

df = DataFrame(dta, columns=["A", "B"])
tm.assert_equal(df._values, dta)

# we have a view
assert np.shares_memory(df._values._ndarray, dta._ndarray)

# TimedeltaArray
tda = dta - dta
df2 = df - df
tm.assert_equal(df2._values, tda)

def test_private_values_dt64_multiblock(self, using_array_manager, request):
if using_array_manager:
mark = pytest.mark.xfail(reason="returns ndarray")
request.node.add_marker(mark)

dta = date_range("2000", periods=8)._data

df = DataFrame({"A": dta[:4]}, copy=False)
df["B"] = dta[4:]

assert len(df._mgr.arrays) == 2

result = df._values
expected = dta.reshape(2, 4).T
tm.assert_equal(result, expected)