Skip to content

Commit f6e9b1a

Browse files
REGR: ensure DataFrame.select_dtypes() returns a copy (#48176)
1 parent 53d3c45 commit f6e9b1a

File tree

4 files changed

+16
-8
lines changed

4 files changed

+16
-8
lines changed

doc/source/whatsnew/v1.4.4.rst

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ Fixed regressions
2626
- Fixed regression in :meth:`DataFrame.loc` setting a length-1 array like value to a single value in the DataFrame (:issue:`46268`)
2727
- Fixed regression when slicing with :meth:`DataFrame.loc` with :class:`DateOffset`-index (:issue:`46671`)
2828
- Fixed regression in setting ``None`` or non-string value into a ``string``-dtype Series using a mask (:issue:`47628`)
29+
- Fixed regression in :meth:`DataFrame.select_dtypes` returning a view on the original DataFrame (:issue:`48090`)
2930
- Fixed regression using custom Index subclasses (for example, used in xarray) with :meth:`~DataFrame.reset_index` or :meth:`Index.insert` (:issue:`47071`)
3031
- Fixed regression in :meth:`DatetimeIndex.intersection` when the :class:`DatetimeIndex` has dates crossing daylight savings time (:issue:`46702`)
3132
- Fixed regression in :func:`merge` throwing an error when passing a :class:`Series` with a multi-level name (:issue:`47946`)

pandas/core/frame.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -4740,7 +4740,7 @@ def predicate(arr: ArrayLike) -> bool:
47404740

47414741
return True
47424742

4743-
mgr = self._mgr._get_data_subset(predicate)
4743+
mgr = self._mgr._get_data_subset(predicate).copy(deep=None)
47444744
return type(self)(mgr).__finalize__(self)
47454745

47464746
def insert(

pandas/tests/copy_view/test_methods.py

+5-7
Original file line numberDiff line numberDiff line change
@@ -139,18 +139,16 @@ def test_select_dtypes(using_copy_on_write):
139139
df2 = df.select_dtypes("int64")
140140
df2._mgr._verify_integrity()
141141

142-
# currently this always returns a "view"
143-
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
142+
if using_copy_on_write:
143+
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
144+
else:
145+
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
144146

145147
# mutating df2 triggers a copy-on-write for that column/block
146148
df2.iloc[0, 0] = 0
147149
if using_copy_on_write:
148150
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
149-
tm.assert_frame_equal(df, df_orig)
150-
else:
151-
# but currently select_dtypes() actually returns a view -> mutates parent
152-
df_orig.iloc[0, 0] = 0
153-
tm.assert_frame_equal(df, df_orig)
151+
tm.assert_frame_equal(df, df_orig)
154152

155153

156154
def test_to_frame(using_copy_on_write):

pandas/tests/frame/methods/test_select_dtypes.py

+9
Original file line numberDiff line numberDiff line change
@@ -456,3 +456,12 @@ def test_np_bool_ea_boolean_include_number(self):
456456
result = df.select_dtypes(include="number")
457457
expected = DataFrame({"a": [1, 2, 3]})
458458
tm.assert_frame_equal(result, expected)
459+
460+
def test_select_dtypes_no_view(self):
461+
# https://github.com/pandas-dev/pandas/issues/48090
462+
# result of this method is not a view on the original dataframe
463+
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
464+
df_orig = df.copy()
465+
result = df.select_dtypes(include=["number"])
466+
result.iloc[0, 0] = 0
467+
tm.assert_frame_equal(df, df_orig)

0 commit comments

Comments
 (0)