From 319338831edbcc173f79e75eeca5ef7fda5ccdd3 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 23 Aug 2022 16:58:28 +0200 Subject: [PATCH 1/4] Backport PR #48176: REGR: ensure DataFrame.select_dtypes() returns a copy --- doc/source/whatsnew/v1.4.4.rst | 1 + pandas/core/frame.py | 2 +- pandas/tests/frame/methods/test_select_dtypes.py | 9 +++++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.4.rst b/doc/source/whatsnew/v1.4.4.rst index 71f3db9af02ff..deff6e194c3bd 100644 --- a/doc/source/whatsnew/v1.4.4.rst +++ b/doc/source/whatsnew/v1.4.4.rst @@ -26,6 +26,7 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.loc` setting a length-1 array like value to a single value in the DataFrame (:issue:`46268`) - Fixed regression when slicing with :meth:`DataFrame.loc` with :class:`DateOffset`-index (:issue:`46671`) - Fixed regression in setting ``None`` or non-string value into a ``string``-dtype Series using a mask (:issue:`47628`) +- Fixed regression in :meth:`DataFrame.select_dtypes` returning a view on the original DataFrame (:issue:`48090`) - Fixed regression using custom Index subclasses (for example, used in xarray) with :meth:`~DataFrame.reset_index` or :meth:`Index.insert` (:issue:`47071`) - Fixed regression in :meth:`DatetimeIndex.intersection` when the :class:`DatetimeIndex` has dates crossing daylight savings time (:issue:`46702`) - Fixed regression in :func:`merge` throwing an error when passing a :class:`Series` with a multi-level name (:issue:`47946`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d37dc70367806..713fbe1e32323 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4378,7 +4378,7 @@ def predicate(arr: ArrayLike) -> bool: return True - mgr = self._mgr._get_data_subset(predicate) + mgr = self._mgr._get_data_subset(predicate).copy(deep=None) return type(self)(mgr).__finalize__(self) def insert( diff --git a/pandas/tests/frame/methods/test_select_dtypes.py b/pandas/tests/frame/methods/test_select_dtypes.py index 9958e024b6c7b..0a575bef7e05d 100644 --- a/pandas/tests/frame/methods/test_select_dtypes.py +++ b/pandas/tests/frame/methods/test_select_dtypes.py @@ -456,3 +456,12 @@ def test_np_bool_ea_boolean_include_number(self): result = df.select_dtypes(include="number") expected = DataFrame({"a": [1, 2, 3]}) tm.assert_frame_equal(result, expected) + + def test_select_dtypes_no_view(self): + # https://github.com/pandas-dev/pandas/issues/48090 + # result of this method is not a view on the original dataframe + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + df_orig = df.copy() + result = df.select_dtypes(include=["number"]) + result.iloc[0, 0] = 0 + tm.assert_frame_equal(df, df_orig) From 63b4eed90e29183314914aa4dc22259933708b4f Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Tue, 23 Aug 2022 16:34:21 +0100 Subject: [PATCH 2/4] skip array manager test --- pandas/tests/frame/methods/test_select_dtypes.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/frame/methods/test_select_dtypes.py b/pandas/tests/frame/methods/test_select_dtypes.py index 0a575bef7e05d..0ef0c6c0ef297 100644 --- a/pandas/tests/frame/methods/test_select_dtypes.py +++ b/pandas/tests/frame/methods/test_select_dtypes.py @@ -1,6 +1,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + from pandas.core.dtypes.dtypes import ExtensionDtype import pandas as pd @@ -457,6 +459,7 @@ def test_np_bool_ea_boolean_include_number(self): expected = DataFrame({"a": [1, 2, 3]}) tm.assert_frame_equal(result, expected) + @td.skip_array_manager_not_yet_implemented def test_select_dtypes_no_view(self): # https://github.com/pandas-dev/pandas/issues/48090 # result of this method is not a view on the original dataframe From 0d86439e81917906a1326e931c9addce967a05f4 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 24 Aug 2022 00:09:05 +0200 Subject: [PATCH 3/4] remove deep=None (not implemented on 1.4.x) --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 713fbe1e32323..298d0ac0f8420 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4378,7 +4378,7 @@ def predicate(arr: ArrayLike) -> bool: return True - mgr = self._mgr._get_data_subset(predicate).copy(deep=None) + mgr = self._mgr._get_data_subset(predicate).copy() return type(self)(mgr).__finalize__(self) def insert( From d0a6265661b0a5d2074c6c826d6e07a2b9ce528e Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 24 Aug 2022 15:09:23 +0100 Subject: [PATCH 4/4] use skip_array_manager_invalid_test to avoid XPASS --- pandas/tests/frame/methods/test_select_dtypes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_select_dtypes.py b/pandas/tests/frame/methods/test_select_dtypes.py index 0ef0c6c0ef297..72ec884ef2bc5 100644 --- a/pandas/tests/frame/methods/test_select_dtypes.py +++ b/pandas/tests/frame/methods/test_select_dtypes.py @@ -459,7 +459,7 @@ def test_np_bool_ea_boolean_include_number(self): expected = DataFrame({"a": [1, 2, 3]}) tm.assert_frame_equal(result, expected) - @td.skip_array_manager_not_yet_implemented + @td.skip_array_manager_invalid_test def test_select_dtypes_no_view(self): # https://github.com/pandas-dev/pandas/issues/48090 # result of this method is not a view on the original dataframe