pandas-dev · simonjayhawkins · Aug 23, 2022 · Aug 20, 2022 · Aug 20, 2022 · Aug 23, 2022
diff --git a/doc/source/whatsnew/v1.4.4.rst b/doc/source/whatsnew/v1.4.4.rst
@@ -22,6 +22,7 @@ Fixed regressions
 - Fixed regression in :meth:`DataFrame.loc` not aligning index in some cases when setting a :class:`DataFrame` (:issue:`47578`)
 - Fixed regression in :meth:`DataFrame.loc` setting a length-1 array like value to a single value in the DataFrame (:issue:`46268`)
 - Fixed regression in setting ``None`` or non-string value into a ``string``-dtype Series using a mask (:issue:`47628`)
+- Fixed regression in :meth:`DataFrame.select_dtypes` returning a view on the original DataFrame (:issue:`48090`)
 - Fixed regression in :func:`merge` throwing an error when passing a :class:`Series` with a multi-level name (:issue:`47946`)
 - Fixed regression in :meth:`DataFrame.eval` creating a copy when updating inplace (:issue:`47449`)
 -

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -4706,7 +4706,7 @@ def predicate(arr: ArrayLike) -> bool:
 
             return True
 
-        mgr = self._mgr._get_data_subset(predicate)
+        mgr = self._mgr._get_data_subset(predicate, copy=True)
         return type(self)(mgr).__finalize__(self)
 
     def insert(

diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
@@ -477,9 +477,12 @@ def is_view(self) -> bool:
     def is_single_block(self) -> bool:
         return len(self.arrays) == 1
 
-    def _get_data_subset(self: T, predicate: Callable) -> T:
+    def _get_data_subset(self: T, predicate: Callable, copy: bool = False) -> T:
         indices = [i for i, arr in enumerate(self.arrays) if predicate(arr)]
-        arrays = [self.arrays[i] for i in indices]
+        if copy:
+            arrays = [self.arrays[i].copy() for i in indices]
+        else:
+            arrays = [self.arrays[i] for i in indices]
         # TODO copy?
         # Note: using Index.take ensures we can retain e.g. DatetimeIndex.freq,
         #  see test_describe_datetime_columns
@@ -1329,8 +1332,11 @@ def idelete(self, indexer) -> SingleArrayManager:
         self._axes = [self._axes[0][to_keep]]
         return self
 
-    def _get_data_subset(self, predicate: Callable) -> SingleArrayManager:
+    def _get_data_subset(
+        self, predicate: Callable, copy: bool = False
+    ) -> SingleArrayManager:
         # used in get_numeric_data / get_bool_data
+        # copy keyword is being ignored, only added for signature compat with base class
         if predicate(self.array):
             return type(self)(self.arrays, self._axes, verify_integrity=False)
         else:

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -464,9 +464,9 @@ def is_view(self) -> bool:
 
         return False
 
-    def _get_data_subset(self: T, predicate: Callable) -> T:
+    def _get_data_subset(self: T, predicate: Callable, copy: bool = False) -> T:
         blocks = [blk for blk in self.blocks if predicate(blk.values)]
-        return self._combine(blocks, copy=False)
+        return self._combine(blocks, copy=copy)
 
     def get_bool_data(self: T, copy: bool = False) -> T:
         """

diff --git a/pandas/tests/frame/methods/test_select_dtypes.py b/pandas/tests/frame/methods/test_select_dtypes.py
@@ -456,3 +456,12 @@ def test_np_bool_ea_boolean_include_number(self):
         result = df.select_dtypes(include="number")
         expected = DataFrame({"a": [1, 2, 3]})
         tm.assert_frame_equal(result, expected)
+
+    def test_select_dtypes_no_view(self):
+        # https://github.com/pandas-dev/pandas/issues/48090
+        # result of this method is not a view on the original dataframe
+        df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+        df_orig = df.copy()
+        result = df.select_dtypes(include=["number"])
+        result.iloc[0, 0] = 0
+        tm.assert_frame_equal(df, df_orig)