Skip to content

Commit f04e8b0

Browse files
authored
API: avoid silent consolidation (#49456)
* API: avoid silent consolidation * update test * fix ArrayManager xfail * whatsnew
1 parent a614b7a commit f04e8b0

File tree

5 files changed

+8
-29
lines changed

5 files changed

+8
-29
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,7 @@ Other API changes
348348
- Changed behavior of :class:`Index` constructor with sequence containing at least one ``NaT`` and everything else either ``None`` or ``NaN`` to infer ``datetime64[ns]`` dtype instead of ``object``, matching :class:`Series` behavior (:issue:`49340`)
349349
- :func:`read_stata` with parameter ``index_col`` set to ``None`` (the default) will now set the index on the returned :class:`DataFrame` to a :class:`RangeIndex` instead of a :class:`Int64Index` (:issue:`49745`)
350350
- Changed behavior of :class:`Index` constructor with an object-dtype ``numpy.ndarray`` containing all-``bool`` values or all-complex values, this will now retain object dtype, consistent with the :class:`Series` behavior (:issue:`49594`)
351+
- :meth:`DataFrame.values`, :meth:`DataFrame.to_numpy`, :meth:`DataFrame.xs`, :meth:`DataFrame.reindex`, :meth:`DataFrame.fillna`, and :meth:`DataFrame.replace` no longer silently consolidate the underlying arrays; do ``df = df.copy()`` to ensure consolidation (:issue:`49356`)
351352
-
352353

353354
.. ---------------------------------------------------------------------------

pandas/core/frame.py

+3-6
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@
165165
)
166166
from pandas.core.arrays.sparse import SparseFrameAccessor
167167
from pandas.core.construction import (
168+
ensure_wrapped_if_datetimelike,
168169
extract_array,
169170
sanitize_array,
170171
sanitize_masked_array,
@@ -960,20 +961,18 @@ def _values( # type: ignore[override]
960961
"""
961962
Analogue to ._values that may return a 2D ExtensionArray.
962963
"""
963-
self._consolidate_inplace()
964-
965964
mgr = self._mgr
966965

967966
if isinstance(mgr, ArrayManager):
968967
if len(mgr.arrays) == 1 and not is_1d_only_ea_dtype(mgr.arrays[0].dtype):
969968
# error: Item "ExtensionArray" of "Union[ndarray, ExtensionArray]"
970969
# has no attribute "reshape"
971970
return mgr.arrays[0].reshape(-1, 1) # type: ignore[union-attr]
972-
return self.values
971+
return ensure_wrapped_if_datetimelike(self.values)
973972

974973
blocks = mgr.blocks
975974
if len(blocks) != 1:
976-
return self.values
975+
return ensure_wrapped_if_datetimelike(self.values)
977976

978977
arr = blocks[0].values
979978
if arr.ndim == 1:
@@ -1804,7 +1803,6 @@ def to_numpy(
18041803
array([[1, 3.0, Timestamp('2000-01-01 00:00:00')],
18051804
[2, 4.5, Timestamp('2000-01-02 00:00:00')]], dtype=object)
18061805
"""
1807-
self._consolidate_inplace()
18081806
if dtype is not None:
18091807
dtype = np.dtype(dtype)
18101808
result = self._mgr.as_array(dtype=dtype, copy=copy, na_value=na_value)
@@ -11291,7 +11289,6 @@ def values(self) -> np.ndarray:
1129111289
['lion', 80.5, 1],
1129211290
['monkey', nan, None]], dtype=object)
1129311291
"""
11294-
self._consolidate_inplace()
1129511292
return self._mgr.as_array()
1129611293

1129711294
@overload

pandas/core/generic.py

-9
Original file line numberDiff line numberDiff line change
@@ -3777,7 +3777,6 @@ def _take(
37773777
37783778
See the docstring of `take` for full explanation of the parameters.
37793779
"""
3780-
self._consolidate_inplace()
37813780

37823781
new_data = self._mgr.take(
37833782
indices,
@@ -3934,8 +3933,6 @@ class animal locomotion
39343933
else:
39353934
index = self.index
39363935

3937-
self._consolidate_inplace()
3938-
39393936
if isinstance(index, MultiIndex):
39403937
loc, new_index = index._get_loc_level(key, level=0)
39413938
if not drop_level:
@@ -5190,8 +5187,6 @@ def reindex(self: NDFrameT, *args, **kwargs) -> NDFrameT:
51905187
f'argument "{list(kwargs.keys())[0]}"'
51915188
)
51925189

5193-
self._consolidate_inplace()
5194-
51955190
# if all axes that are requested to reindex are equal, then only copy
51965191
# if indicated must have index names equal here as well as values
51975192
if all(
@@ -6730,8 +6725,6 @@ def fillna(
67306725
inplace = validate_bool_kwarg(inplace, "inplace")
67316726
value, method = validate_fillna_kwargs(value, method)
67326727

6733-
self._consolidate_inplace()
6734-
67356728
# set the default here, so functions examining the signaure
67366729
# can detect if something was set (e.g. in groupby) (GH9221)
67376730
if axis is None:
@@ -7049,8 +7042,6 @@ def replace(
70497042
if not is_bool(regex) and to_replace is not None:
70507043
raise ValueError("'to_replace' must be 'None' if 'regex' is not a bool")
70517044

7052-
self._consolidate_inplace()
7053-
70547045
if value is lib.no_default or method is not lib.no_default:
70557046
# GH#36984 if the user explicitly passes value=None we want to
70567047
# respect that. We have the corner case where the user explicitly

pandas/tests/frame/methods/test_values.py

+1-5
Original file line numberDiff line numberDiff line change
@@ -256,11 +256,7 @@ def test_private_values_dt64tz_multicol(self):
256256
df2 = df - df
257257
tm.assert_equal(df2._values, tda)
258258

259-
def test_private_values_dt64_multiblock(self, using_array_manager, request):
260-
if using_array_manager:
261-
mark = pytest.mark.xfail(reason="returns ndarray")
262-
request.node.add_marker(mark)
263-
259+
def test_private_values_dt64_multiblock(self):
264260
dta = date_range("2000", periods=8)._data
265261

266262
df = DataFrame({"A": dta[:4]}, copy=False)

pandas/tests/frame/test_block_internals.py

+3-9
Original file line numberDiff line numberDiff line change
@@ -85,12 +85,6 @@ def test_consolidate_inplace(self, float_frame):
8585
for letter in range(ord("A"), ord("Z")):
8686
float_frame[chr(letter)] = chr(letter)
8787

88-
def test_values_consolidate(self, float_frame):
89-
float_frame["E"] = 7.0
90-
assert not float_frame._mgr.is_consolidated()
91-
_ = float_frame.values
92-
assert float_frame._mgr.is_consolidated()
93-
9488
def test_modify_values(self, float_frame):
9589
float_frame.values[5] = 5
9690
assert (float_frame.values[5] == 5).all()
@@ -99,10 +93,10 @@ def test_modify_values(self, float_frame):
9993
float_frame["E"] = 7.0
10094
col = float_frame["E"]
10195
float_frame.values[6] = 6
102-
assert (float_frame.values[6] == 6).all()
96+
# as of 2.0 .values does not consolidate, so subsequent calls to .values
97+
# does not share data
98+
assert not (float_frame.values[6] == 6).all()
10399

104-
# check that item_cache was cleared
105-
assert float_frame["E"] is not col
106100
assert (col == 7).all()
107101

108102
def test_boolean_set_uncons(self, float_frame):

0 commit comments

Comments
 (0)